From 66418687ac895717dc2f6ddffe24cf9b74cd0d3e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 10 Oct 2024 10:24:42 -0500 Subject: [PATCH 01/14] kernel/range: Const-ify range_contains parameters range_contains() does not modify the range values. David suggested it is safer to keep those parameters as const.[1] Make range parameters const Link: https://lore.kernel.org/all/20241008161032.GB1609@twin.jikos.cz/ [1] Reviewed-by: Dan Williams Reviewed-by: David Sterba Link: https://patch.msgid.link/20241010-const-range-v1-1-afb6e4bfd8ce@intel.com Signed-off-by: Ira Weiny Signed-off-by: Dave Jiang --- include/linux/range.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/range.h b/include/linux/range.h index 6ad0b73cb7adc..7dc5e835e079a 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -13,7 +13,8 @@ static inline u64 range_len(const struct range *range) return range->end - range->start + 1; } -static inline bool range_contains(struct range *r1, struct range *r2) +static inline bool range_contains(const struct range *r1, + const struct range *r2) { return r1->start <= r2->start && r1->end >= r2->end; } From 7a01213d6c18d97c2f98455bb22c8416f8cca28b Mon Sep 17 00:00:00 2001 From: "Kobayashi,Daisuke" Date: Wed, 2 Oct 2024 10:15:48 +0900 Subject: [PATCH 02/14] cxl/core/regs: Add rcd_pcie_cap initialization Add rcd_pcie_cap and its initialization to cache the offset of cxl1.1 device link status information. By caching it, avoid the walking memory map area to find the offset when output the register value. Given that this solution involves port lookups via cxl_pci_find_port() and multiple exit paths where that reference needs to be dropped, introduce a new put_cxl_root() scope-based-free handler. Reviewed-by: Jonathan Cameron Signed-off-by: Kobayashi,Daisuke Reviewed-by: Dan Williams Link: https://patch.msgid.link/20241002011549.408412-2-kobayashi.da-06@fujitsu.com Signed-off-by: Ira Weiny Signed-off-by: Dave Jiang --- drivers/cxl/core/core.h | 5 ++++ drivers/cxl/core/regs.c | 56 +++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 9 +++++++ drivers/cxl/pci.c | 25 +++++++++++++----- 4 files changed, 89 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 0c62b4069ba00..800466f96a685 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -89,6 +89,11 @@ resource_size_t __rcrb_to_component(struct device *dev, enum cxl_rcrb which); u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb); +#define PCI_RCRB_CAP_LIST_ID_MASK GENMASK(7, 0) +#define PCI_RCRB_CAP_HDR_ID_MASK GENMASK(7, 0) +#define PCI_RCRB_CAP_HDR_NEXT_MASK GENMASK(15, 8) +#define PCI_CAP_EXP_SIZEOF 0x3c + extern struct rw_semaphore cxl_dpa_rwsem; extern struct rw_semaphore cxl_region_rwsem; diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index e1082e749c69e..1c1c10c8bc7ae 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -506,6 +506,62 @@ u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb) return offset; } +static resource_size_t cxl_rcrb_to_linkcap(struct device *dev, struct cxl_dport *dport) +{ + resource_size_t rcrb = dport->rcrb.base; + void __iomem *addr; + u32 cap_hdr; + u16 offset; + + if (!request_mem_region(rcrb, SZ_4K, "CXL RCRB")) + return CXL_RESOURCE_NONE; + + addr = ioremap(rcrb, SZ_4K); + if (!addr) { + dev_err(dev, "Failed to map region %pr\n", addr); + release_mem_region(rcrb, SZ_4K); + return CXL_RESOURCE_NONE; + } + + offset = FIELD_GET(PCI_RCRB_CAP_LIST_ID_MASK, readw(addr + PCI_CAPABILITY_LIST)); + cap_hdr = readl(addr + offset); + while ((FIELD_GET(PCI_RCRB_CAP_HDR_ID_MASK, cap_hdr)) != PCI_CAP_ID_EXP) { + offset = FIELD_GET(PCI_RCRB_CAP_HDR_NEXT_MASK, cap_hdr); + if (offset == 0 || offset > SZ_4K) { + offset = 0; + break; + } + cap_hdr = readl(addr + offset); + } + + iounmap(addr); + release_mem_region(rcrb, SZ_4K); + if (!offset) + return CXL_RESOURCE_NONE; + + return offset; +} + +int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport) +{ + void __iomem *dport_pcie_cap = NULL; + resource_size_t pos; + struct cxl_rcrb_info *ri; + + ri = &dport->rcrb; + pos = cxl_rcrb_to_linkcap(&pdev->dev, dport); + if (pos == CXL_RESOURCE_NONE) + return -ENXIO; + + dport_pcie_cap = devm_cxl_iomap_block(&pdev->dev, + ri->base + pos, + PCI_CAP_EXP_SIZEOF); + dport->regs.rcd_pcie_cap = dport_pcie_cap; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_dport_map_rcd_linkcap, CXL); + resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri, enum cxl_rcrb which) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 0d8b810a51f04..1cfe3dd42e2f3 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -235,6 +235,14 @@ struct cxl_regs { struct_group_tagged(cxl_rch_regs, rch_regs, void __iomem *dport_aer; ); + + /* + * RCD upstream port specific PCIe cap register + * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB + */ + struct_group_tagged(cxl_rcd_regs, rcd_regs, + void __iomem *rcd_pcie_cap; + ); }; struct cxl_reg_map { @@ -304,6 +312,7 @@ int cxl_setup_regs(struct cxl_register_map *map); struct cxl_dport; resource_size_t cxl_rcd_component_reg_phys(struct device *dev, struct cxl_dport *dport); +int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 188412d45e0d2..0a48d29a7df84 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -475,9 +475,9 @@ static bool is_cxl_restricted(struct pci_dev *pdev) } static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, - struct cxl_register_map *map) + struct cxl_register_map *map, + struct cxl_dport *dport) { - struct cxl_dport *dport; resource_size_t component_reg_phys; *map = (struct cxl_register_map) { @@ -513,11 +513,24 @@ static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, * is an RCH and try to extract the Component Registers from * an RCRB. */ - if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) - rc = cxl_rcrb_get_comp_regs(pdev, map); - - if (rc) + if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { + struct cxl_dport *dport; + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + rc = cxl_rcrb_get_comp_regs(pdev, map, dport); + if (rc) + return rc; + + rc = cxl_dport_map_rcd_linkcap(pdev, dport); + if (rc) + return rc; + + } else if (rc) { return rc; + } return cxl_setup_regs(map); } From c5eaec79fa43e994ec54c11538dc603d60cd0c4e Mon Sep 17 00:00:00 2001 From: "Kobayashi,Daisuke" Date: Wed, 2 Oct 2024 10:15:49 +0900 Subject: [PATCH 03/14] cxl/pci: Add sysfs attribute for CXL 1.1 device link status Add sysfs attribute for CXL 1.1 device link status to the cxl pci device. In CXL1.1, the link status of the device is included in the RCRB mapped to the memory mapped register area. Critically, that arrangement makes the link status and control registers invisible to existing PCI user tooling. Export those registers via sysfs with the expectation that PCI user tooling will alternatively look for these sysfs files when attempting to access to these CXL 1.1 endpoints registers. Reviewed-by: Jonathan Cameron Signed-off-by: Kobayashi,Daisuke Reviewed-by: Dan Williams Link: https://patch.msgid.link/20241002011549.408412-3-kobayashi.da-06@fujitsu.com Signed-off-by: Ira Weiny Signed-off-by: Dave Jiang --- drivers/cxl/pci.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 0a48d29a7df84..be7021082959f 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -820,6 +820,83 @@ static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds) return 0; } +static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size_t width) +{ + struct cxl_dev_state *cxlds = dev_get_drvdata(dev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *root_dev; + struct cxl_dport *dport; + struct cxl_port *root __free(put_cxl_port) = + cxl_mem_find_port(cxlmd, &dport); + + if (!root) + return -ENXIO; + + root_dev = root->uport_dev; + if (!root_dev) + return -ENXIO; + + guard(device)(root_dev); + if (!root_dev->driver) + return -ENXIO; + + switch (width) { + case 2: + return sysfs_emit(buf, "%#x\n", + readw(dport->regs.rcd_pcie_cap + offset)); + case 4: + return sysfs_emit(buf, "%#x\n", + readl(dport->regs.rcd_pcie_cap + offset)); + default: + return -EINVAL; + } +} + +static ssize_t rcd_link_cap_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCAP, buf, sizeof(u32)); +} +static DEVICE_ATTR_RO(rcd_link_cap); + +static ssize_t rcd_link_ctrl_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCTL, buf, sizeof(u16)); +} +static DEVICE_ATTR_RO(rcd_link_ctrl); + +static ssize_t rcd_link_status_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return rcd_pcie_cap_emit(dev, PCI_EXP_LNKSTA, buf, sizeof(u16)); +} +static DEVICE_ATTR_RO(rcd_link_status); + +static struct attribute *cxl_rcd_attrs[] = { + &dev_attr_rcd_link_cap.attr, + &dev_attr_rcd_link_ctrl.attr, + &dev_attr_rcd_link_status.attr, + NULL +}; + +static umode_t cxl_rcd_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + + if (is_cxl_restricted(pdev)) + return a->mode; + + return 0; +} + +static struct attribute_group cxl_rcd_group = { + .attrs = cxl_rcd_attrs, + .is_visible = cxl_rcd_visible, +}; +__ATTRIBUTE_GROUPS(cxl_rcd); + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); @@ -1029,6 +1106,7 @@ static struct pci_driver cxl_pci_driver = { .id_table = cxl_mem_pci_tbl, .probe = cxl_pci_probe, .err_handler = &cxl_error_handlers, + .dev_groups = cxl_rcd_groups, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, From 9474d586819940f00a98dd98015fe456f9b35452 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 21 Oct 2024 13:04:43 +0800 Subject: [PATCH 04/14] cxl: downgrade a warning message to debug level in cxl_probe_component_regs() In cxl_probe_component_regs() the error message "Couldn't locate the CXL.cache and CXL.mem capability array header." is potentially a false positive error condition. Downgrade the message from error level to debug level by using dev_dbg() to print the message, and the end users won't worry about the message anymore. [djbw/iweiny: Fix up changelog] Reported-by: Kelvin Shieh Signed-off-by: Coly Li Cc: Dan Williams Cc: Jonathan Cameron Cc: Alison Schofield Reviewed-by: Dan Williams Link: https://patch.msgid.link/20241021050443.318712-1-colyli@suse.de Signed-off-by: Ira Weiny Signed-off-by: Dave Jiang --- drivers/cxl/core/regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 1c1c10c8bc7ae..429973a2165bb 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -52,7 +52,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET); if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) { - dev_err(dev, + dev_dbg(dev, "Couldn't locate the CXL.cache and CXL.mem capability array header.\n"); return; } From 8e7f07e608864dcf7cabc9c252ca02b6ca9ff0d4 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 25 Oct 2024 19:46:53 -0500 Subject: [PATCH 05/14] test printf: Add very basic struct resource tests The printf tests for struct resource were stubbed out. struct range printing will leverage the struct resource implementation. To prevent regression add some basic sanity tests for struct resource. Reviewed-by: Andy Shevchenko Reviewed-by: Jonathan Cameron Reviewed-by: Fan Ni Tested-by: Fan Ni Acked-by: Petr Mladek Link: https://patch.msgid.link/20241007-dcd-type2-upstream-v4-1-c261ee6eeded@intel.com Tested-by: Petr Mladek Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20241025-cxl-pra-v2-1-123a825daba2@intel.com Signed-off-by: Dave Jiang --- lib/test_printf.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/lib/test_printf.c b/lib/test_printf.c index 8448b6d02bd96..5afdf5efc6273 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -386,6 +386,50 @@ kernel_ptr(void) static void __init struct_resource(void) { + struct resource test_resource = { + .start = 0xc0ffee00, + .end = 0xc0ffee00, + .flags = IORESOURCE_MEM, + }; + + test("[mem 0xc0ffee00 flags 0x200]", + "%pr", &test_resource); + + test_resource = (struct resource) { + .start = 0xc0ffee, + .end = 0xba5eba11, + .flags = IORESOURCE_MEM, + }; + test("[mem 0x00c0ffee-0xba5eba11 flags 0x200]", + "%pr", &test_resource); + + test_resource = (struct resource) { + .start = 0xba5eba11, + .end = 0xc0ffee, + .flags = IORESOURCE_MEM, + }; + test("[mem 0xba5eba11-0x00c0ffee flags 0x200]", + "%pr", &test_resource); + + test_resource = (struct resource) { + .start = 0xba5eba11, + .end = 0xba5eca11, + .flags = IORESOURCE_MEM, + }; + + test("[mem 0xba5eba11-0xba5eca11 flags 0x200]", + "%pr", &test_resource); + + test_resource = (struct resource) { + .start = 0xba11, + .end = 0xca10, + .flags = IORESOURCE_IO | + IORESOURCE_DISABLED | + IORESOURCE_UNSET, + }; + + test("[io size 0x1000 disabled]", + "%pR", &test_resource); } static void __init From 3dff66ff8367cd4dabb6a34633e55324c281348a Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 25 Oct 2024 19:46:54 -0500 Subject: [PATCH 06/14] Documentation/printf: struct resource add start == end special case The code when printing a struct resource will check for start == end and only print the start value. Document this special case. Suggested-by: Petr Mladek Signed-off-by: Ira Weiny Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20241025-cxl-pra-v2-2-123a825daba2@intel.com Signed-off-by: Dave Jiang --- Documentation/core-api/printk-formats.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst index 14e093da3ccd9..552f51046cf31 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -209,12 +209,17 @@ Struct Resources :: %pr [mem 0x60000000-0x6fffffff flags 0x2200] or + [mem 0x60000000 flags 0x2200] or [mem 0x0000000060000000-0x000000006fffffff flags 0x2200] + [mem 0x0000000060000000 flags 0x2200] %pR [mem 0x60000000-0x6fffffff pref] or + [mem 0x60000000 pref] or [mem 0x0000000060000000-0x000000006fffffff pref] + [mem 0x0000000060000000 pref] For printing struct resources. The ``R`` and ``r`` specifiers result in a -printed resource with (R) or without (r) a decoded flags member. +printed resource with (R) or without (r) a decoded flags member. If start is +equal to end only print the start value. Passed by reference. From 4261974701851630951e9ab31f0de4ade0faea53 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 25 Oct 2024 19:46:55 -0500 Subject: [PATCH 07/14] printf: Add print format (%pra) for struct range The use of struct range in the CXL subsystem is growing. In particular, the addition of Dynamic Capacity devices uses struct range in a number of places which are reported in debug and error messages. To wit requiring the printing of the start/end fields in each print became cumbersome. Dan Williams mentions in [1] that it might be time to have a print specifier for struct range similar to struct resource. A few alternatives were considered including '%par', '%r', and '%pn'. %pra follows that struct range is similar to struct resource (%p[rR]) but needs to be different. Based on discussions with Petr and Andy '%pra' was chosen.[2] Andy also suggested to keep the range prints similar to struct resource though combined code. Add hex_range() to handle printing for both pointer types. Finally introduce DEFINE_RANGE() as a parallel to DEFINE_RES_*() and use it in the tests. Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: open list Link: https://lore.kernel.org/all/663922b475e50_d54d72945b@dwillia2-xfh.jf.intel.com.notmuch/ [1] Link: https://lore.kernel.org/all/66cea3bf3332f_f937b29424@iweiny-mobl.notmuch/ [2] Suggested-by: Dan Williams Signed-off-by: Ira Weiny Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20241025-cxl-pra-v2-3-123a825daba2@intel.com Signed-off-by: Dave Jiang --- Documentation/core-api/printk-formats.rst | 13 ++++++ include/linux/range.h | 6 +++ lib/test_printf.c | 17 +++++++ lib/vsprintf.c | 57 ++++++++++++++++++++--- 4 files changed, 87 insertions(+), 6 deletions(-) diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst index 552f51046cf31..ecccc0473da9c 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -236,6 +236,19 @@ width of the CPU data path. Passed by reference. +Struct Range +------------ + +:: + + %pra [range 0x0000000060000000-0x000000006fffffff] or + [range 0x0000000060000000] + +For printing struct range. struct range holds an arbitrary range of u64 +values. If start is equal to end only print the start value. + +Passed by reference. + DMA address types dma_addr_t ---------------------------- diff --git a/include/linux/range.h b/include/linux/range.h index 6ad0b73cb7adc..1358d4b1807a8 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -31,4 +31,10 @@ int clean_sort_range(struct range *range, int az); void sort_range(struct range *range, int nr_range); +#define DEFINE_RANGE(_start, _end) \ +(struct range) { \ + .start = (_start), \ + .end = (_end), \ + } + #endif diff --git a/lib/test_printf.c b/lib/test_printf.c index 5afdf5efc6273..59dbe4f9a4cb5 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -432,6 +432,22 @@ struct_resource(void) "%pR", &test_resource); } +static void __init +struct_range(void) +{ + struct range test_range = DEFINE_RANGE(0xc0ffee00ba5eba11, + 0xc0ffee00ba5eba11); + test("[range 0xc0ffee00ba5eba11]", "%pra", &test_range); + + test_range = DEFINE_RANGE(0xc0ffee, 0xba5eba11); + test("[range 0x0000000000c0ffee-0x00000000ba5eba11]", + "%pra", &test_range); + + test_range = DEFINE_RANGE(0xba5eba11, 0xc0ffee); + test("[range 0x00000000ba5eba11-0x0000000000c0ffee]", + "%pra", &test_range); +} + static void __init addr(void) { @@ -807,6 +823,7 @@ test_pointer(void) symbol_ptr(); kernel_ptr(); struct_resource(); + struct_range(); addr(); escaped_str(); hex_string(); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index c5e2ec9303c5d..6ac02bbb7df14 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1039,6 +1039,20 @@ static const struct printf_spec default_dec04_spec = { .flags = ZEROPAD, }; +static noinline_for_stack +char *hex_range(char *buf, char *end, u64 start_val, u64 end_val, + struct printf_spec spec) +{ + buf = number(buf, end, start_val, spec); + if (start_val == end_val) + return buf; + + if (buf < end) + *buf = '-'; + ++buf; + return number(buf, end, end_val, spec); +} + static noinline_for_stack char *resource_string(char *buf, char *end, struct resource *res, struct printf_spec spec, const char *fmt) @@ -1115,11 +1129,7 @@ char *resource_string(char *buf, char *end, struct resource *res, p = string_nocheck(p, pend, "size ", str_spec); p = number(p, pend, resource_size(res), *specp); } else { - p = number(p, pend, res->start, *specp); - if (res->start != res->end) { - *p++ = '-'; - p = number(p, pend, res->end, *specp); - } + p = hex_range(p, pend, res->start, res->end, *specp); } if (decode) { if (res->flags & IORESOURCE_MEM_64) @@ -1140,6 +1150,31 @@ char *resource_string(char *buf, char *end, struct resource *res, return string_nocheck(buf, end, sym, spec); } +static noinline_for_stack +char *range_string(char *buf, char *end, const struct range *range, + struct printf_spec spec, const char *fmt) +{ + char sym[sizeof("[range 0x0123456789abcdef-0x0123456789abcdef]")]; + char *p = sym, *pend = sym + sizeof(sym); + + struct printf_spec range_spec = { + .field_width = 2 + 2 * sizeof(range->start), /* 0x + 2 * 8 */ + .flags = SPECIAL | SMALL | ZEROPAD, + .base = 16, + .precision = -1, + }; + + if (check_pointer(&buf, end, range, spec)) + return buf; + + p = string_nocheck(p, pend, "[range ", default_str_spec); + p = hex_range(p, pend, range->start, range->end, range_spec); + *p++ = ']'; + *p = '\0'; + + return string_nocheck(buf, end, sym, spec); +} + static noinline_for_stack char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec, const char *fmt) @@ -2229,6 +2264,15 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } +static noinline_for_stack +char *resource_or_range(const char *fmt, char *buf, char *end, void *ptr, + struct printf_spec spec) +{ + if (*fmt == 'r' && fmt[1] == 'a') + return range_string(buf, end, ptr, spec, fmt); + return resource_string(buf, end, ptr, spec, fmt); +} + int __init no_hash_pointers_enable(char *str) { if (no_hash_pointers) @@ -2277,6 +2321,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr); * - 'Bb' as above with module build ID (for use in backtraces) * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] + * - 'ra' For struct ranges, e.g., [range 0x0000000000000000 - 0x00000000000000ff] * - 'b[l]' For a bitmap, the number of bits is determined by the field * width which must be explicitly specified either as part of the * format string '%32b[l]' or through '%*b[l]', [l] selects @@ -2401,7 +2446,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return symbol_string(buf, end, ptr, spec, fmt); case 'R': case 'r': - return resource_string(buf, end, ptr, spec, fmt); + return resource_or_range(fmt, buf, end, ptr, spec); case 'h': return hex_string(buf, end, ptr, spec, fmt); case 'b': From bdd7c35fc59f391de5a1d93ca7cbc715bf8c015c Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 25 Oct 2024 19:46:56 -0500 Subject: [PATCH 08/14] cxl/cdat: Use %pra for dpa range outputs Now that there is a printf specifier for struct range use it to enhance the debug output of CDAT data. Reviewed-by: Alison Schofield Signed-off-by: Ira Weiny Reviewed-by: Jonathan Cameron Reviewed-by: Fan Ni Link: https://patch.msgid.link/20241025-cxl-pra-v2-4-123a825daba2@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/cdat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index ef1621d40f054..438869df241a1 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -247,8 +247,8 @@ static void update_perf_entry(struct device *dev, struct dsmas_entry *dent, dpa_perf->dpa_range = dent->dpa_range; dpa_perf->qos_class = dent->qos_class; dev_dbg(dev, - "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n", - dent->dpa_range.start, dpa_perf->qos_class, + "DSMAS: dpa: %pra qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n", + &dent->dpa_range, dpa_perf->qos_class, dent->coord[ACCESS_COORDINATE_CPU].read_bandwidth, dent->coord[ACCESS_COORDINATE_CPU].write_bandwidth, dent->coord[ACCESS_COORDINATE_CPU].read_latency, @@ -279,8 +279,8 @@ static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, range_contains(&pmem_range, &dent->dpa_range)) update_perf_entry(dev, dent, &mds->pmem_perf); else - dev_dbg(dev, "no partition for dsmas dpa: %#llx\n", - dent->dpa_range.start); + dev_dbg(dev, "no partition for dsmas dpa: %pra\n", + &dent->dpa_range); } } From 06cf321aadef17c7b1578369e314193c0e1c7d8e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 7 Nov 2024 14:58:19 -0600 Subject: [PATCH 09/14] range: Add range_overlaps() Code to support CXL Dynamic Capacity devices will have extent ranges which need to be compared for intersection not a subset as is being checked in range_contains(). range_overlaps() is defined in btrfs with a different meaning from what is required in the standard range code. Dan Williams pointed this out in [1]. Adjust the btrfs call according to his suggestion there. Then add a generic range_overlaps(). Cc: Dan Williams Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Cc: linux-btrfs@vger.kernel.org Link: https://lore.kernel.org/all/65949f79ef908_8dc68294f2@dwillia2-xfh.jf.intel.com.notmuch/ [1] Acked-by: David Sterba Reviewed-by: Davidlohr Bueso Reviewed-by: Johannes Thumshirn Reviewed-by: Fan Ni Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20241107-dcd-type2-upstream-v7-1-56a84e66bc36@intel.com Signed-off-by: Dave Jiang --- fs/btrfs/ordered-data.c | 10 +++++----- include/linux/range.h | 8 ++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2104d60c21616..744c3375ee6a8 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -111,8 +111,8 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, return NULL; } -static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, - u64 len) +static int btrfs_range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, + u64 len) { if (file_offset + len <= entry->file_offset || entry->file_offset + entry->num_bytes <= file_offset) @@ -985,7 +985,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range( while (1) { entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); - if (range_overlaps(entry, file_offset, len)) + if (btrfs_range_overlaps(entry, file_offset, len)) break; if (entry->file_offset >= file_offset + len) { @@ -1114,12 +1114,12 @@ struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range( } if (prev) { entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node); - if (range_overlaps(entry, file_offset, len)) + if (btrfs_range_overlaps(entry, file_offset, len)) goto out; } if (next) { entry = rb_entry(next, struct btrfs_ordered_extent, rb_node); - if (range_overlaps(entry, file_offset, len)) + if (btrfs_range_overlaps(entry, file_offset, len)) goto out; } /* No ordered extent in the range */ diff --git a/include/linux/range.h b/include/linux/range.h index 6ad0b73cb7adc..876cd5355158e 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -13,11 +13,19 @@ static inline u64 range_len(const struct range *range) return range->end - range->start + 1; } +/* True if r1 completely contains r2 */ static inline bool range_contains(struct range *r1, struct range *r2) { return r1->start <= r2->start && r1->end >= r2->end; } +/* True if any part of r1 overlaps r2 */ +static inline bool range_overlaps(const struct range *r1, + const struct range *r2) +{ + return r1->start <= r2->end && r1->end >= r2->start; +} + int add_range(struct range *range, int az, int nr_range, u64 start, u64 end); From d62e2ed065785c9a7837519067e1307e4a24d2c2 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 7 Nov 2024 14:58:20 -0600 Subject: [PATCH 10/14] ACPI/CDAT: Add CDAT/DSMAS shared and read only flag values The Coherent Device Attribute Table (CDAT) Device Scoped Memory Affinity Structure (DSMAS) version 1.04 [1] defines flags to indicate if a DPA range is read only and/or shared. Add read only and shareable flag definitions. This change was merged in ACPICA via PR 976.[2] Link: https://uefi.org/sites/default/files/resources/Coherent%20Device%20Attribute%20Table_1.04%20published_0.pdf [1] Link: https://github.com/acpica/acpica/pull/976 [2] Cc: Robert Moore Cc: Len Brown Cc: Rafael J. Wysocki Cc: linux-acpi@vger.kernel.org Cc: acpica-devel@lists.linux.dev Acked-by: Rafael J. Wysocki Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20241107-dcd-type2-upstream-v7-2-56a84e66bc36@intel.com Signed-off-by: Dave Jiang --- include/acpi/actbl1.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 199afc2cd122c..387fc821703a8 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -403,6 +403,8 @@ struct acpi_cdat_dsmas { /* Flags for subtable above */ #define ACPI_CDAT_DSMAS_NON_VOLATILE (1 << 2) +#define ACPI_CDAT_DSMAS_SHAREABLE (1 << 3) +#define ACPI_CDAT_DSMAS_READ_ONLY (1 << 6) /* Subtable 1: Device scoped Latency and Bandwidth Information Structure (DSLBIS) */ From f88b3ecc9cc737fc518b7a386d38bb2110712fa2 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 7 Nov 2024 14:58:21 -0600 Subject: [PATCH 11/14] dax: Document struct dev_dax_range The device DAX structure is being enhanced to track additional DCD information. Specifically the range tuple needs additional parameters. The current range tuple is not fully documented and is large enough to warrant its own definition. Separate the struct dax_dev_range definition and document it prior to adding information for DC. Suggested-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20241107-dcd-type2-upstream-v7-3-56a84e66bc36@intel.com Signed-off-by: Dave Jiang --- drivers/dax/dax-private.h | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index 446617b73aeab..0867115aeef2e 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -40,12 +40,30 @@ struct dax_region { struct device *youngest; }; +/** + * struct dax_mapping - device to display mapping range attributes + * @dev: device representing this range + * @range_id: index within dev_dax ranges array + * @id: ida of this mapping + */ struct dax_mapping { struct device dev; int range_id; int id; }; +/** + * struct dev_dax_range - tuple represenging a range of memory used by dev_dax + * @pgoff: page offset + * @range: resource-span + * @mapping: reference to the dax_mapping for this range + */ +struct dev_dax_range { + unsigned long pgoff; + struct range range; + struct dax_mapping *mapping; +}; + /** * struct dev_dax - instance data for a subdivision of a dax region, and * data while the device is activated in the driver. @@ -58,7 +76,7 @@ struct dax_mapping { * @dev - device core * @pgmap - pgmap for memmap setup / lifetime (driver owned) * @nr_range: size of @ranges - * @ranges: resource-span + pgoff tuples for the instance + * @ranges: range tuples of memory used */ struct dev_dax { struct dax_region *region; @@ -72,11 +90,7 @@ struct dev_dax { struct dev_pagemap *pgmap; bool memmap_on_memory; int nr_range; - struct dev_dax_range { - unsigned long pgoff; - struct range range; - struct dax_mapping *mapping; - } *ranges; + struct dev_dax_range *ranges; }; /* From 0f6f0d687adcb4747e71f2a797acc9a739d71778 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 7 Nov 2024 14:58:22 -0600 Subject: [PATCH 12/14] cxl/pci: Delay event buffer allocation The event buffer does not need to be allocated if something has failed in setting up event irq's. In prep for adjusting event configuration for DCD events move the buffer allocation to the end of the event configuration. Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Fan Ni Reviewed-by: Li Ming Link: https://lore.kernel.org/all/663922b475e50_d54d72945b@dwillia2-xfh.jf.intel.com.notmuch/ [1] Suggested-by: Dan Williams Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20241107-dcd-type2-upstream-v7-4-56a84e66bc36@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 188412d45e0d2..295779c433b2a 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -764,10 +764,6 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, return 0; } - rc = cxl_mem_alloc_event_buf(mds); - if (rc) - return rc; - rc = cxl_event_get_int_policy(mds, &policy); if (rc) return rc; @@ -781,6 +777,10 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, return -EBUSY; } + rc = cxl_mem_alloc_event_buf(mds); + if (rc) + return rc; + rc = cxl_event_irqsetup(mds); if (rc) return rc; From 27fcfb416827b9e549d821317a9bd21d1abe6821 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 7 Nov 2024 14:58:23 -0600 Subject: [PATCH 13/14] cxl/hdm: Use guard() in cxl_dpa_set_mode() Additional DCD functionality is being added to this call which will be simplified by the use of guard() with the cxl_dpa_rwsem. Convert the function to use guard() prior to adding DCD functionality. Suggested-by: Jonathan Cameron Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20241107-dcd-type2-upstream-v7-5-56a84e66bc36@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/hdm.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 3df10517a3278..463ba2669cea5 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -424,7 +424,6 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxled->cxld.dev; - int rc; switch (mode) { case CXL_DECODER_RAM: @@ -435,11 +434,9 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, return -EINVAL; } - down_write(&cxl_dpa_rwsem); - if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) { - rc = -EBUSY; - goto out; - } + guard(rwsem_write)(&cxl_dpa_rwsem); + if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) + return -EBUSY; /* * Only allow modes that are supported by the current partition @@ -447,21 +444,15 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, */ if (mode == CXL_DECODER_PMEM && !resource_size(&cxlds->pmem_res)) { dev_dbg(dev, "no available pmem capacity\n"); - rc = -ENXIO; - goto out; + return -ENXIO; } if (mode == CXL_DECODER_RAM && !resource_size(&cxlds->ram_res)) { dev_dbg(dev, "no available ram capacity\n"); - rc = -ENXIO; - goto out; + return -ENXIO; } cxled->mode = mode; - rc = 0; -out: - up_write(&cxl_dpa_rwsem); - - return rc; + return 0; } int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) From a90326c76bd684bdf0a4f2842ff987ad5c77ff11 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 7 Nov 2024 14:58:24 -0600 Subject: [PATCH 14/14] cxl/region: Refactor common create region code create_pmem_region_store() and create_ram_region_store() are identical with the exception of the region mode. With the addition of DC region mode this would end up being 3 copies of the same code. Refactor create_pmem_region_store() and create_ram_region_store() to use a single common function to be used in subsequent DC code. Suggested-by: Fan Ni Reviewed-by: Jonathan Cameron Reviewed-by: Fan Ni Reviewed-by: Dave Jiang Reviewed-by: Li Ming Reviewed-by: Alison Schofield Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20241107-dcd-type2-upstream-v7-6-56a84e66bc36@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e701e4b040328..02437e716b7e0 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2536,9 +2536,8 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM); } -static ssize_t create_pmem_region_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t create_region_store(struct device *dev, const char *buf, + size_t len, enum cxl_decoder_mode mode) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); struct cxl_region *cxlr; @@ -2548,31 +2547,26 @@ static ssize_t create_pmem_region_store(struct device *dev, if (rc != 1) return -EINVAL; - cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id); + cxlr = __create_region(cxlrd, mode, id); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); return len; } + +static ssize_t create_pmem_region_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return create_region_store(dev, buf, len, CXL_DECODER_PMEM); +} DEVICE_ATTR_RW(create_pmem_region); static ssize_t create_ram_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); - struct cxl_region *cxlr; - int rc, id; - - rc = sscanf(buf, "region%d\n", &id); - if (rc != 1) - return -EINVAL; - - cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id); - if (IS_ERR(cxlr)) - return PTR_ERR(cxlr); - - return len; + return create_region_store(dev, buf, len, CXL_DECODER_RAM); } DEVICE_ATTR_RW(create_ram_region);