From 537625233537179cb2e8293b2c0dc9c989363f41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 9 Mar 2025 09:41:42 +0100 Subject: [PATCH 01/11] genirq/msi: Make a few functions static None of these functions are used outside of the MSI core. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250309084110.204054172@linutronix.de --- include/linux/msi.h | 5 ----- kernel/irq/msi.c | 40 +++++++--------------------------------- 2 files changed, 7 insertions(+), 38 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index b10093c4d00ea..df2dd6355e1c2 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -81,7 +81,6 @@ struct device_attribute; struct irq_domain; struct irq_affinity_desc; -void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); #ifdef CONFIG_GENERIC_MSI_IRQ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); #else @@ -603,8 +602,6 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid); bool msi_match_device_irq_domain(struct device *dev, unsigned int domid, enum irq_domain_bus_token bus_token); -int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, - unsigned int first, unsigned int last); int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, unsigned int first, unsigned int last); int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs); @@ -613,8 +610,6 @@ struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, u const struct irq_affinity_desc *affdesc, union msi_instance_cookie *cookie); -void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, - unsigned int first, unsigned int last); void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, unsigned int first, unsigned int last); void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index fa92882efdb1f..57e64212eccb9 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -270,16 +270,11 @@ static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl * return ret; } -void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) -{ - *msg = entry->msg; -} - void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) { struct msi_desc *entry = irq_get_msi_desc(irq); - __get_cached_msi_msg(entry, msg); + *msg = entry->msg; } EXPORT_SYMBOL_GPL(get_cached_msi_msg); @@ -1352,21 +1347,17 @@ static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl) } /** - * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain + * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain * @dev: Pointer to device struct of the device for which the interrupts * are allocated * @domid: Id of the interrupt domain to operate on * @first: First index to allocate (inclusive) * @last: Last index to allocate (inclusive) * - * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() - * pair. Use this for MSI irqdomains which implement their own descriptor - * allocation/free. - * * Return: %0 on success or an error code. */ -int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, - unsigned int first, unsigned int last) +int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) { struct msi_ctrl ctrl = { .domid = domid, @@ -1374,27 +1365,10 @@ int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, .last = last, .nirqs = last + 1 - first, }; - - return msi_domain_alloc_locked(dev, &ctrl); -} - -/** - * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain - * @dev: Pointer to device struct of the device for which the interrupts - * are allocated - * @domid: Id of the interrupt domain to operate on - * @first: First index to allocate (inclusive) - * @last: Last index to allocate (inclusive) - * - * Return: %0 on success or an error code. - */ -int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, - unsigned int first, unsigned int last) -{ int ret; msi_lock_descs(dev); - ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last); + ret = msi_domain_alloc_locked(dev, &ctrl); msi_unlock_descs(dev); return ret; } @@ -1618,8 +1592,8 @@ static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl) * @first: First index to free (inclusive) * @last: Last index to free (inclusive) */ -void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, - unsigned int first, unsigned int last) +static void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) { struct msi_ctrl ctrl = { .domid = domid, From 08549ff3e53b9c7bc55724d660ca733041a8bd5f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 14:03:38 +0100 Subject: [PATCH 02/11] cleanup: Provide retain_ptr() In cases where an allocation is consumed by another function, the allocation needs to be retained on success or freed on failure. The code pattern is usually: struct foo *f = kzalloc(sizeof(*f), GFP_KERNEL); struct bar *b; ,,, // Initialize f ... if (ret) goto free; ... bar = bar_create(f); if (!bar) { ret = -ENOMEM; goto free; } ... return 0; free: kfree(f); return ret; This prevents using __free(kfree) on @f because there is no canonical way to tell the cleanup code that the allocation should not be freed. Abusing no_free_ptr() by force ignoring the return value is not really a sensible option either. Provide an explicit macro retain_ptr(), which NULLs the cleanup pointer. That makes it easy to analyze and reason about. Signed-off-by: Thomas Gleixner Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/all/20250313130321.442025758@linutronix.de --- include/linux/cleanup.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index ec00e3f7af2b3..6537f8dfe1bbb 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -216,6 +216,23 @@ const volatile void * __must_check_fn(const volatile void *val) #define return_ptr(p) return no_free_ptr(p) +/* + * Only for situations where an allocation is handed in to another function + * and consumed by that function on success. + * + * struct foo *f __free(kfree) = kzalloc(sizeof(*f), GFP_KERNEL); + * + * setup(f); + * if (some_condition) + * return -EINVAL; + * .... + * ret = bar(f); + * if (!ret) + * retain_ptr(f); + * return ret; + */ +#define retain_ptr(p) \ + __get_and_null(p, NULL) /* * DEFINE_CLASS(name, type, exit, init, init_args...): From 5c99e0226eccb11738a30e27454157e63dcf2b52 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 14:03:39 +0100 Subject: [PATCH 03/11] genirq/msi: Use lock guards for MSI descriptor locking Provide a lock guard for MSI descriptor locking and update the core code accordingly. No functional change intended. Signed-off-by: Thomas Gleixner Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/all/20250313130321.506045185@linutronix.de --- include/linux/irqdomain.h | 2 + include/linux/msi.h | 3 ++ kernel/irq/msi.c | 109 ++++++++++++++------------------------ 3 files changed, 45 insertions(+), 69 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e432b6a12a32f..d258aa2fc0227 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -281,6 +281,8 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) void irq_domain_free_fwnode(struct fwnode_handle *fwnode); +DEFINE_FREE(irq_domain_free_fwnode, struct fwnode_handle *, if (_T) irq_domain_free_fwnode(_T)) + struct irq_domain_chip_generic_info; /** diff --git a/include/linux/msi.h b/include/linux/msi.h index df2dd6355e1c2..3246ca633a5ca 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -227,6 +227,9 @@ int msi_setup_device_data(struct device *dev); void msi_lock_descs(struct device *dev); void msi_unlock_descs(struct device *dev); +DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, msi_lock_descs(_T->lock), + msi_unlock_descs(_T->lock)); + struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, enum msi_desc_filter filter); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 57e64212eccb9..e8a264c1d4fdb 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -443,7 +443,6 @@ EXPORT_SYMBOL_GPL(msi_next_desc); unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index) { struct msi_desc *desc; - unsigned int ret = 0; bool pcimsi = false; struct xarray *xa; @@ -457,7 +456,7 @@ unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN) pcimsi = to_pci_dev(dev)->msi_enabled; - msi_lock_descs(dev); + guard(msi_descs_lock)(dev); xa = &dev->msi.data->__domains[domid].store; desc = xa_load(xa, pcimsi ? 0 : index); if (desc && desc->irq) { @@ -466,16 +465,12 @@ unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne * PCI-MSIX and platform MSI use a descriptor per * interrupt. */ - if (pcimsi) { - if (index < desc->nvec_used) - ret = desc->irq + index; - } else { - ret = desc->irq; - } + if (!pcimsi) + return desc->irq; + if (index < desc->nvec_used) + return desc->irq + index; } - - msi_unlock_descs(dev); - return ret; + return 0; } EXPORT_SYMBOL_GPL(msi_domain_get_virq); @@ -993,9 +988,8 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, void *chip_data) { struct irq_domain *domain, *parent = dev->msi.domain; - struct fwnode_handle *fwnode, *fwnalloced = NULL; - struct msi_domain_template *bundle; const struct msi_parent_ops *pops; + struct fwnode_handle *fwnode; if (!irq_domain_is_msi_parent(parent)) return false; @@ -1003,7 +997,8 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, if (domid >= MSI_MAX_DEVICE_IRQDOMAINS) return false; - bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL); + struct msi_domain_template *bundle __free(kfree) = + bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL); if (!bundle) return false; @@ -1026,41 +1021,36 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, * node as they are not guaranteed to have a fwnode. They are never * looked up and always handled in the context of the device. */ - if (bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE) - fwnode = dev->fwnode; + struct fwnode_handle *fwnode_alloced __free(irq_domain_free_fwnode) = NULL; + + if (!(bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE)) + fwnode = fwnode_alloced = irq_domain_alloc_named_fwnode(bundle->name); else - fwnode = fwnalloced = irq_domain_alloc_named_fwnode(bundle->name); + fwnode = dev->fwnode; if (!fwnode) - goto free_bundle; + return false; if (msi_setup_device_data(dev)) - goto free_fwnode; - - msi_lock_descs(dev); + return false; + guard(msi_descs_lock)(dev); if (WARN_ON_ONCE(msi_get_device_domain(dev, domid))) - goto fail; + return false; if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info)) - goto fail; + return false; domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent); if (!domain) - goto fail; + return false; + /* @bundle and @fwnode_alloced are now in use. Prevent cleanup */ + retain_ptr(bundle); + retain_ptr(fwnode_alloced); domain->dev = dev; dev->msi.data->__domains[domid].domain = domain; - msi_unlock_descs(dev); return true; - -fail: - msi_unlock_descs(dev); -free_fwnode: - irq_domain_free_fwnode(fwnalloced); -free_bundle: - kfree(bundle); - return false; } /** @@ -1074,12 +1064,10 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid) struct msi_domain_info *info; struct irq_domain *domain; - msi_lock_descs(dev); - + guard(msi_descs_lock)(dev); domain = msi_get_device_domain(dev, domid); - if (!domain || !irq_domain_is_msi_device(domain)) - goto unlock; + return; dev->msi.data->__domains[domid].domain = NULL; info = domain->host_data; @@ -1088,9 +1076,6 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid) irq_domain_remove(domain); irq_domain_free_fwnode(fwnode); kfree(container_of(info, struct msi_domain_template, info)); - -unlock: - msi_unlock_descs(dev); } /** @@ -1106,16 +1091,14 @@ bool msi_match_device_irq_domain(struct device *dev, unsigned int domid, { struct msi_domain_info *info; struct irq_domain *domain; - bool ret = false; - msi_lock_descs(dev); + guard(msi_descs_lock)(dev); domain = msi_get_device_domain(dev, domid); if (domain && irq_domain_is_msi_device(domain)) { info = domain->host_data; - ret = info->bus_token == bus_token; + return info->bus_token == bus_token; } - msi_unlock_descs(dev); - return ret; + return false; } static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, @@ -1365,12 +1348,9 @@ int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, .last = last, .nirqs = last + 1 - first, }; - int ret; - msi_lock_descs(dev); - ret = msi_domain_alloc_locked(dev, &ctrl); - msi_unlock_descs(dev); - return ret; + guard(msi_descs_lock)(dev); + return msi_domain_alloc_locked(dev, &ctrl); } EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range); @@ -1474,12 +1454,8 @@ struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, u const struct irq_affinity_desc *affdesc, union msi_instance_cookie *icookie) { - struct msi_map map; - - msi_lock_descs(dev); - map = __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie); - msi_unlock_descs(dev); - return map; + guard(msi_descs_lock)(dev); + return __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie); } /** @@ -1516,13 +1492,11 @@ int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, icookie.value = ((u64)type << 32) | hwirq; - msi_lock_descs(dev); + guard(msi_descs_lock)(dev); if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain)) map.index = -EINVAL; else map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie); - msi_unlock_descs(dev); - return map.index >= 0 ? map.virq : map.index; } @@ -1615,9 +1589,8 @@ static void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int d void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, unsigned int first, unsigned int last) { - msi_lock_descs(dev); + guard(msi_descs_lock)(dev); msi_domain_free_irqs_range_locked(dev, domid, first, last); - msi_unlock_descs(dev); } EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all); @@ -1647,9 +1620,8 @@ void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid) */ void msi_domain_free_irqs_all(struct device *dev, unsigned int domid) { - msi_lock_descs(dev); + guard(msi_descs_lock)(dev); msi_domain_free_irqs_all_locked(dev, domid); - msi_unlock_descs(dev); } /** @@ -1668,12 +1640,11 @@ void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq) if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI)) return; - msi_lock_descs(dev); - if (!WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain)) { - msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index, - desc->msi_index); - } - msi_unlock_descs(dev); + guard(msi_descs_lock)(dev); + if (WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain)) + return; + msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index, + desc->msi_index); } /** From 211ea774889ace9988d28af4f19e865ac712a150 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 14:03:41 +0100 Subject: [PATCH 04/11] soc: ti: ti_sci_inta_msi: Switch MSI descriptor locking to guard() Convert the code to use the new guard(msi_descs_lock). No functional change intended. Signed-off-by: Thomas Gleixner Tested-by: Nishanth Menon Reviewed-by: Jonathan Cameron Reviewed-by: Dhruva Gole Link: https://lore.kernel.org/all/20250313130321.568379110@linutronix.de --- drivers/soc/ti/ti_sci_inta_msi.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/soc/ti/ti_sci_inta_msi.c b/drivers/soc/ti/ti_sci_inta_msi.c index c363645221573..193266f5e3f96 100644 --- a/drivers/soc/ti/ti_sci_inta_msi.c +++ b/drivers/soc/ti/ti_sci_inta_msi.c @@ -103,19 +103,15 @@ int ti_sci_inta_msi_domain_alloc_irqs(struct device *dev, if (ret) return ret; - msi_lock_descs(dev); + guard(msi_descs_lock)(dev); nvec = ti_sci_inta_msi_alloc_descs(dev, res); - if (nvec <= 0) { - ret = nvec; - goto unlock; - } + if (nvec <= 0) + return nvec; /* Use alloc ALL as it's unclear whether there are gaps in the indices */ ret = msi_domain_alloc_irqs_all_locked(dev, MSI_DEFAULT_DOMAIN, nvec); if (ret) dev_err(dev, "Failed to allocate IRQs %d\n", ret); -unlock: - msi_unlock_descs(dev); return ret; } EXPORT_SYMBOL_GPL(ti_sci_inta_msi_domain_alloc_irqs); From 5184d8a737d26f532cd039391d7c48a815d48e7d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 14:03:43 +0100 Subject: [PATCH 05/11] NTB/msi: Switch MSI descriptor locking to lock guard() Convert the code to use the new guard(msi_descs_lock). No functional change intended. Signed-off-by: Thomas Gleixner Reviewed-by: Jonathan Cameron Reviewed-by: Logan Gunthorpe Acked-by: Dave Jiang Link: https://lore.kernel.org/all/20250313130321.631772601@linutronix.de --- drivers/ntb/msi.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c index 6295e55ef85e2..368f6d894bba9 100644 --- a/drivers/ntb/msi.c +++ b/drivers/ntb/msi.c @@ -106,10 +106,10 @@ int ntb_msi_setup_mws(struct ntb_dev *ntb) if (!ntb->msi) return -EINVAL; - msi_lock_descs(&ntb->pdev->dev); - desc = msi_first_desc(&ntb->pdev->dev, MSI_DESC_ASSOCIATED); - addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32); - msi_unlock_descs(&ntb->pdev->dev); + scoped_guard (msi_descs_lock, &ntb->pdev->dev) { + desc = msi_first_desc(&ntb->pdev->dev, MSI_DESC_ASSOCIATED); + addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32); + } for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { peer_widx = ntb_peer_highest_mw_idx(ntb, peer); @@ -289,7 +289,7 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, if (!ntb->msi) return -EINVAL; - msi_lock_descs(dev); + guard(msi_descs_lock)(dev); msi_for_each_desc(entry, dev, MSI_DESC_ASSOCIATED) { if (irq_has_action(entry->irq)) continue; @@ -307,17 +307,11 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, ret = ntbm_msi_setup_callback(ntb, entry, msi_desc); if (ret) { devm_free_irq(&ntb->dev, entry->irq, dev_id); - goto unlock; + return ret; } - - ret = entry->irq; - goto unlock; + return entry->irq; } - ret = -ENODEV; - -unlock: - msi_unlock_descs(dev); - return ret; + return -ENODEV; } EXPORT_SYMBOL(ntbm_msi_request_threaded_irq); From 1bc7e262a20a417afafa76a21e56ec15ab759d1c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 14:03:44 +0100 Subject: [PATCH 06/11] PCI/MSI: Switch to MSI descriptor locking to guard() Convert the code to use the new guard(msi_descs_lock). No functional change intended. Signed-off-by: Thomas Gleixner Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/all/20250313130321.695027112@linutronix.de --- drivers/pci/msi/api.c | 6 +-- drivers/pci/msi/msi.c | 120 +++++++++++++++++++++--------------------- 2 files changed, 62 insertions(+), 64 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index b956ce591f964..d89f491afdf05 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -53,10 +53,9 @@ void pci_disable_msi(struct pci_dev *dev) if (!pci_msi_enabled() || !dev || !dev->msi_enabled) return; - msi_lock_descs(&dev->dev); + guard(msi_descs_lock)(&dev->dev); pci_msi_shutdown(dev); pci_free_msi_irqs(dev); - msi_unlock_descs(&dev->dev); } EXPORT_SYMBOL(pci_disable_msi); @@ -196,10 +195,9 @@ void pci_disable_msix(struct pci_dev *dev) if (!pci_msi_enabled() || !dev || !dev->msix_enabled) return; - msi_lock_descs(&dev->dev); + guard(msi_descs_lock)(&dev->dev); pci_msix_shutdown(dev); pci_free_msi_irqs(dev); - msi_unlock_descs(&dev->dev); } EXPORT_SYMBOL(pci_disable_msix); diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 2f647cac4cae3..b243510999e49 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -336,41 +336,11 @@ static int msi_verify_entries(struct pci_dev *dev) return !entry ? 0 : -EIO; } -/** - * msi_capability_init - configure device's MSI capability structure - * @dev: pointer to the pci_dev data structure of MSI device function - * @nvec: number of interrupts to allocate - * @affd: description of automatic IRQ affinity assignments (may be %NULL) - * - * Setup the MSI capability structure of the device with the requested - * number of interrupts. A return value of zero indicates the successful - * setup of an entry with the new MSI IRQ. A negative return value indicates - * an error, and a positive return value indicates the number of interrupts - * which could have been allocated. - */ -static int msi_capability_init(struct pci_dev *dev, int nvec, - struct irq_affinity *affd) +static int __msi_capability_init(struct pci_dev *dev, int nvec, struct irq_affinity_desc *masks) { - struct irq_affinity_desc *masks = NULL; + int ret = msi_setup_msi_desc(dev, nvec, masks); struct msi_desc *entry, desc; - int ret; - - /* Reject multi-MSI early on irq domain enabled architectures */ - if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY)) - return 1; - /* - * Disable MSI during setup in the hardware, but mark it enabled - * so that setup code can evaluate it. - */ - pci_msi_set_enable(dev, 0); - dev->msi_enabled = 1; - - if (affd) - masks = irq_create_affinity_masks(nvec, affd); - - msi_lock_descs(&dev->dev); - ret = msi_setup_msi_desc(dev, nvec, masks); if (ret) goto fail; @@ -399,19 +369,48 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, pcibios_free_irq(dev); dev->irq = entry->irq; - goto unlock; - + return 0; err: pci_msi_unmask(&desc, msi_multi_mask(&desc)); pci_free_msi_irqs(dev); fail: dev->msi_enabled = 0; -unlock: - msi_unlock_descs(&dev->dev); - kfree(masks); return ret; } +/** + * msi_capability_init - configure device's MSI capability structure + * @dev: pointer to the pci_dev data structure of MSI device function + * @nvec: number of interrupts to allocate + * @affd: description of automatic IRQ affinity assignments (may be %NULL) + * + * Setup the MSI capability structure of the device with the requested + * number of interrupts. A return value of zero indicates the successful + * setup of an entry with the new MSI IRQ. A negative return value indicates + * an error, and a positive return value indicates the number of interrupts + * which could have been allocated. + */ +static int msi_capability_init(struct pci_dev *dev, int nvec, + struct irq_affinity *affd) +{ + /* Reject multi-MSI early on irq domain enabled architectures */ + if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY)) + return 1; + + /* + * Disable MSI during setup in the hardware, but mark it enabled + * so that setup code can evaluate it. + */ + pci_msi_set_enable(dev, 0); + dev->msi_enabled = 1; + + struct irq_affinity_desc *masks __free(kfree) = + affd ? irq_create_affinity_masks(nvec, affd) : NULL; + + guard(msi_descs_lock)(&dev->dev); + return __msi_capability_init(dev, nvec, masks); +} + int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, struct irq_affinity *affd) { @@ -666,40 +665,41 @@ static void msix_mask_all(void __iomem *base, int tsize) writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); } -static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, - int nvec, struct irq_affinity *affd) +static int __msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, + int nvec, struct irq_affinity_desc *masks) { - struct irq_affinity_desc *masks = NULL; - int ret; - - if (affd) - masks = irq_create_affinity_masks(nvec, affd); + int ret = msix_setup_msi_descs(dev, entries, nvec, masks); - msi_lock_descs(&dev->dev); - ret = msix_setup_msi_descs(dev, entries, nvec, masks); if (ret) - goto out_free; + goto fail; ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); if (ret) - goto out_free; + goto fail; /* Check if all MSI entries honor device restrictions */ ret = msi_verify_entries(dev); if (ret) - goto out_free; + goto fail; msix_update_entries(dev, entries); - goto out_unlock; + return 0; -out_free: +fail: pci_free_msi_irqs(dev); -out_unlock: - msi_unlock_descs(&dev->dev); - kfree(masks); return ret; } +static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, + int nvec, struct irq_affinity *affd) +{ + struct irq_affinity_desc *masks __free(kfree) = + affd ? irq_create_affinity_masks(nvec, affd) : NULL; + + guard(msi_descs_lock)(&dev->dev); + return __msix_setup_interrupts(dev, entries, nvec, masks); +} + /** * msix_capability_init - configure device's MSI-X capability * @dev: pointer to the pci_dev data structure of MSI-X device function @@ -871,13 +871,13 @@ void __pci_restore_msix_state(struct pci_dev *dev) write_msg = arch_restore_msi_irqs(dev); - msi_lock_descs(&dev->dev); - msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { - if (write_msg) - __pci_write_msi_msg(entry, &entry->msg); - pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl); + scoped_guard (msi_descs_lock, &dev->dev) { + msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { + if (write_msg) + __pci_write_msi_msg(entry, &entry->msg); + pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl); + } } - msi_unlock_descs(&dev->dev); pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); } From 50410bad27146d03dcccbc337088550399e687c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 14:03:46 +0100 Subject: [PATCH 07/11] PCI: hv: Switch MSI descriptor locking to guard() Convert the code to use the new guard(msi_descs_lock). No functional change intended. Signed-off-by: Thomas Gleixner Reviewed-by: Jonathan Cameron Reviewed-by: Michael Kelley Acked-by: Wei Liu Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/all/20250313130321.758905320@linutronix.de --- drivers/pci/controller/pci-hyperv.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 6084b38bdda17..715446fd26a51 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -3976,24 +3976,18 @@ static int hv_pci_restore_msi_msg(struct pci_dev *pdev, void *arg) { struct irq_data *irq_data; struct msi_desc *entry; - int ret = 0; if (!pdev->msi_enabled && !pdev->msix_enabled) return 0; - msi_lock_descs(&pdev->dev); + guard(msi_descs_lock)(&pdev->dev); msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) { irq_data = irq_get_irq_data(entry->irq); - if (WARN_ON_ONCE(!irq_data)) { - ret = -EINVAL; - break; - } - + if (WARN_ON_ONCE(!irq_data)) + return -EINVAL; hv_compose_msi_msg(irq_data, &entry->msg); } - msi_unlock_descs(&pdev->dev); - - return ret; + return 0; } /* From b9db8df4333b895cd5c0b9f99234ac1cebd31bf7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 14:03:48 +0100 Subject: [PATCH 08/11] PCI/MSI: Provide a sane mechanism for TPH The PCI/TPH driver fiddles with the MSI-X control word of an active interrupt completely unserialized against concurrent operations issued from the interrupt core. It also brings the PCI/MSI-X internal cached control word out of sync. Provide a function, which has the required serialization and keeps the control word cache in sync. Unfortunately this requires to look up and lock the interrupt descriptor, which should be only done in the interrupt core code. But confining this particular oddity in the PCI/MSI core is the lesser of all evil. A interrupt core implementation would require a larger pile of infrastructure and indirections for dubious value. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/all/20250313130321.822790423@linutronix.de --- drivers/pci/msi/msi.c | 47 +++++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 9 +++++++++ 2 files changed, 56 insertions(+) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index b243510999e49..dc78d9d402c3b 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -916,6 +916,53 @@ void pci_free_msi_irqs(struct pci_dev *dev) } } +#ifdef CONFIG_PCIE_TPH +/** + * pci_msix_write_tph_tag - Update the TPH tag for a given MSI-X vector + * @pdev: The PCIe device to update + * @index: The MSI-X index to update + * @tag: The tag to write + * + * Returns: 0 on success, error code on failure + */ +int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag) +{ + struct msi_desc *msi_desc; + struct irq_desc *irq_desc; + unsigned int virq; + + if (!pdev->msix_enabled) + return -ENXIO; + + guard(msi_descs_lock)(&pdev->dev); + virq = msi_get_virq(&pdev->dev, index); + if (!virq) + return -ENXIO; + /* + * This is a horrible hack, but short of implementing a PCI + * specific interrupt chip callback and a huge pile of + * infrastructure, this is the minor nuissance. It provides the + * protection against concurrent operations on this entry and keeps + * the control word cache in sync. + */ + irq_desc = irq_to_desc(virq); + if (!irq_desc) + return -ENXIO; + + guard(raw_spinlock_irq)(&irq_desc->lock); + msi_desc = irq_data_get_msi_desc(&irq_desc->irq_data); + if (!msi_desc || msi_desc->pci.msi_attrib.is_virtual) + return -ENXIO; + + msi_desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_ST; + msi_desc->pci.msix_ctrl |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag); + pci_msix_write_vector_ctrl(msi_desc, msi_desc->pci.msix_ctrl); + /* Flush the write */ + readl(pci_msix_desc_addr(msi_desc)); + return 0; +} +#endif + /* Misc. infrastructure */ struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 01e51db8d285a..2e9cf26a9ee9b 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -989,6 +989,15 @@ int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *name); void pcim_release_region(struct pci_dev *pdev, int bar); +#ifdef CONFIG_PCI_MSI +int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag); +#else +static inline int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag) +{ + return -ENODEV; +} +#endif + /* * Config Address for PCI Configuration Mechanism #1 * From 79273d0a40076601b1d37ff60e2ce8305f0a5d0d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 14:03:50 +0100 Subject: [PATCH 09/11] PCI/TPH: Replace the broken MSI-X control word update The driver walks the MSI descriptors to test whether a descriptor exists for a given index. That's just abuse of the MSI internals. The same test can be done with a single function call by looking up whether there is a Linux interrupt number assigned at the index. What's worse is that the function is completely unserialized against modifications of the MSI-X control by operations issued from the interrupt core. It also brings the PCI/MSI-X internal cached control word out of sync. Remove the trainwreck and invoke the function provided by the PCI/MSI core to update it. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/all/20250313130321.898592817@linutronix.de --- drivers/pci/tph.c | 44 +------------------------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 07de59ca2ebfa..77fce5e1b830b 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -204,48 +204,6 @@ static u8 get_rp_completer_type(struct pci_dev *pdev) return FIELD_GET(PCI_EXP_DEVCAP2_TPH_COMP_MASK, reg); } -/* Write ST to MSI-X vector control reg - Return 0 if OK, otherwise -errno */ -static int write_tag_to_msix(struct pci_dev *pdev, int msix_idx, u16 tag) -{ -#ifdef CONFIG_PCI_MSI - struct msi_desc *msi_desc = NULL; - void __iomem *vec_ctrl; - u32 val; - int err = 0; - - msi_lock_descs(&pdev->dev); - - /* Find the msi_desc entry with matching msix_idx */ - msi_for_each_desc(msi_desc, &pdev->dev, MSI_DESC_ASSOCIATED) { - if (msi_desc->msi_index == msix_idx) - break; - } - - if (!msi_desc) { - err = -ENXIO; - goto err_out; - } - - /* Get the vector control register (offset 0xc) pointed by msix_idx */ - vec_ctrl = pdev->msix_base + msix_idx * PCI_MSIX_ENTRY_SIZE; - vec_ctrl += PCI_MSIX_ENTRY_VECTOR_CTRL; - - val = readl(vec_ctrl); - val &= ~PCI_MSIX_ENTRY_CTRL_ST; - val |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag); - writel(val, vec_ctrl); - - /* Read back to flush the update */ - val = readl(vec_ctrl); - -err_out: - msi_unlock_descs(&pdev->dev); - return err; -#else - return -ENODEV; -#endif -} - /* Write tag to ST table - Return 0 if OK, otherwise -errno */ static int write_tag_to_st_table(struct pci_dev *pdev, int index, u16 tag) { @@ -346,7 +304,7 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) switch (loc) { case PCI_TPH_LOC_MSIX: - err = write_tag_to_msix(pdev, index, tag); + err = pci_msix_write_tph_tag(pdev, index, tag); break; case PCI_TPH_LOC_CAP: err = write_tag_to_st_table(pdev, index, tag); From fc87dd58d8f9374a703fb0bfae58336e62971db5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 14:03:51 +0100 Subject: [PATCH 10/11] scsi: ufs: qcom: Remove the MSI descriptor abuse The driver abuses the MSI descriptors for internal purposes. Aside of core code and MSI providers nothing has to care about their existence. They have been encapsulated with a lot of effort because this kind of abuse caused all sorts of issues including a maintainability nightmare. Rewrite the code so it uses dedicated storage to hand the required information to the interrupt handler. No functional change intended. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250313130321.963504017@linutronix.de --- drivers/ufs/host/ufs-qcom.c | 75 +++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 23b9f6efa0475..2cfa1774944b1 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1782,15 +1782,19 @@ static void ufs_qcom_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) ufshcd_mcq_config_esi(hba, msg); } +struct ufs_qcom_irq { + unsigned int irq; + unsigned int idx; + struct ufs_hba *hba; +}; + static irqreturn_t ufs_qcom_mcq_esi_handler(int irq, void *data) { - struct msi_desc *desc = data; - struct device *dev = msi_desc_to_dev(desc); - struct ufs_hba *hba = dev_get_drvdata(dev); - u32 id = desc->msi_index; - struct ufs_hw_queue *hwq = &hba->uhq[id]; + struct ufs_qcom_irq *qi = data; + struct ufs_hba *hba = qi->hba; + struct ufs_hw_queue *hwq = &hba->uhq[qi->idx]; - ufshcd_mcq_write_cqis(hba, 0x1, id); + ufshcd_mcq_write_cqis(hba, 0x1, qi->idx); ufshcd_mcq_poll_cqe_lock(hba, hwq); return IRQ_HANDLED; @@ -1799,8 +1803,7 @@ static irqreturn_t ufs_qcom_mcq_esi_handler(int irq, void *data) static int ufs_qcom_config_esi(struct ufs_hba *hba) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); - struct msi_desc *desc; - struct msi_desc *failed_desc = NULL; + struct ufs_qcom_irq *qi; int nr_irqs, ret; if (host->esi_enabled) @@ -1811,47 +1814,47 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) * 2. Poll queues do not need ESI. */ nr_irqs = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL]; + qi = devm_kcalloc(hba->dev, nr_irqs, sizeof(*qi), GFP_KERNEL); + if (qi) + return -ENOMEM; + ret = platform_device_msi_init_and_alloc_irqs(hba->dev, nr_irqs, ufs_qcom_write_msi_msg); if (ret) { dev_err(hba->dev, "Failed to request Platform MSI %d\n", ret); - return ret; + goto cleanup; } - msi_lock_descs(hba->dev); - msi_for_each_desc(desc, hba->dev, MSI_DESC_ALL) { - ret = devm_request_irq(hba->dev, desc->irq, - ufs_qcom_mcq_esi_handler, - IRQF_SHARED, "qcom-mcq-esi", desc); + for (int idx = 0; idx < nr_irqs; idx++) { + qi[idx].irq = msi_get_virq(hba->dev, idx); + qi[idx].idx = idx; + qi[idx].hba = hba; + + ret = devm_request_irq(hba->dev, qi[idx].irq, ufs_qcom_mcq_esi_handler, + IRQF_SHARED, "qcom-mcq-esi", qi + idx); if (ret) { dev_err(hba->dev, "%s: Fail to request IRQ for %d, err = %d\n", - __func__, desc->irq, ret); - failed_desc = desc; - break; + __func__, qi[idx].irq, ret); + qi[idx].irq = 0; + goto cleanup; } } - msi_unlock_descs(hba->dev); - if (ret) { - /* Rewind */ - msi_lock_descs(hba->dev); - msi_for_each_desc(desc, hba->dev, MSI_DESC_ALL) { - if (desc == failed_desc) - break; - devm_free_irq(hba->dev, desc->irq, hba); - } - msi_unlock_descs(hba->dev); - platform_device_msi_free_irqs_all(hba->dev); - } else { - if (host->hw_ver.major == 6 && host->hw_ver.minor == 0 && - host->hw_ver.step == 0) - ufshcd_rmwl(hba, ESI_VEC_MASK, - FIELD_PREP(ESI_VEC_MASK, MAX_ESI_VEC - 1), - REG_UFS_CFG3); - ufshcd_mcq_enable_esi(hba); - host->esi_enabled = true; + if (host->hw_ver.major == 6 && host->hw_ver.minor == 0 && + host->hw_ver.step == 0) { + ufshcd_rmwl(hba, ESI_VEC_MASK, + FIELD_PREP(ESI_VEC_MASK, MAX_ESI_VEC - 1), + REG_UFS_CFG3); } + ufshcd_mcq_enable_esi(hba); + host->esi_enabled = true; + return 0; +cleanup: + for (int idx = 0; qi[idx].irq; idx++) + devm_free_irq(hba->dev, qi[idx].irq, hba); + platform_device_msi_free_irqs_all(hba->dev); + devm_kfree(hba->dev, qi); return ret; } From 8327df40592103bcc693a99c8cb478c35c7ec7e0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 14:03:53 +0100 Subject: [PATCH 11/11] genirq/msi: Rename msi_[un]lock_descs() Now that all abuse is gone and the legit users are converted to guard(msi_descs_lock), rename the lock functions and document them as internal. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/all/20250313130322.027190131@linutronix.de --- include/linux/msi.h | 8 ++++---- kernel/irq/msi.c | 16 ++++++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 3246ca633a5ca..0c39cafaf604a 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -224,11 +224,11 @@ struct msi_dev_domain { int msi_setup_device_data(struct device *dev); -void msi_lock_descs(struct device *dev); -void msi_unlock_descs(struct device *dev); +void __msi_lock_descs(struct device *dev); +void __msi_unlock_descs(struct device *dev); -DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, msi_lock_descs(_T->lock), - msi_unlock_descs(_T->lock)); +DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, __msi_lock_descs(_T->lock), + __msi_unlock_descs(_T->lock)); struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, enum msi_desc_filter filter); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index e8a264c1d4fdb..ad4a85e311606 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -338,26 +338,30 @@ int msi_setup_device_data(struct device *dev) } /** - * msi_lock_descs - Lock the MSI descriptor storage of a device + * __msi_lock_descs - Lock the MSI descriptor storage of a device * @dev: Device to operate on + * + * Internal function for guard(msi_descs_lock). Don't use in code. */ -void msi_lock_descs(struct device *dev) +void __msi_lock_descs(struct device *dev) { mutex_lock(&dev->msi.data->mutex); } -EXPORT_SYMBOL_GPL(msi_lock_descs); +EXPORT_SYMBOL_GPL(__msi_lock_descs); /** - * msi_unlock_descs - Unlock the MSI descriptor storage of a device + * __msi_unlock_descs - Unlock the MSI descriptor storage of a device * @dev: Device to operate on + * + * Internal function for guard(msi_descs_lock). Don't use in code. */ -void msi_unlock_descs(struct device *dev) +void __msi_unlock_descs(struct device *dev) { /* Invalidate the index which was cached by the iterator */ dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX; mutex_unlock(&dev->msi.data->mutex); } -EXPORT_SYMBOL_GPL(msi_unlock_descs); +EXPORT_SYMBOL_GPL(__msi_unlock_descs); static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid, enum msi_desc_filter filter)