From 4497ee914f10264894b08066cbee026604cd244f Mon Sep 17 00:00:00 2001 From: Yan Zhen Date: Mon, 23 Sep 2024 15:50:16 +0800 Subject: [PATCH 001/366] watchdog: fix typo in the comment Correctly spelled comments make it easier for the reader to understand the code. Fix typos: 'hearbeat' -> 'heartbeat', 'retrigggers' -> 'retriggers', 'funtions' -> 'functions', 'Resgister' -> 'Register'. Signed-off-by: Yan Zhen Reviewed-by: Wim Van Sebroeck Link: https://lore.kernel.org/r/20240923075016.2439774-1-yanzhen@vivo.com Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/pcwd.c | 2 +- drivers/watchdog/rzn1_wdt.c | 2 +- drivers/watchdog/smsc37b787_wdt.c | 2 +- drivers/watchdog/starfive-wdt.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/pcwd.c b/drivers/watchdog/pcwd.c index 1a4282235aac5..31d3dcbf815e9 100644 --- a/drivers/watchdog/pcwd.c +++ b/drivers/watchdog/pcwd.c @@ -833,7 +833,7 @@ static int pcwd_isa_match(struct device *dev, unsigned int id) port0 = inb_p(base_addr); port1 = inb_p(base_addr + 1); - /* Has either hearbeat bit changed? */ + /* Has either heartbeat bit changed? */ if ((port0 ^ last_port0) & WD_HRTBT || (port1 ^ last_port1) & WD_REVC_HRBT) { retval = 1; diff --git a/drivers/watchdog/rzn1_wdt.c b/drivers/watchdog/rzn1_wdt.c index 7d3192d34afd5..96fd04fbc2a26 100644 --- a/drivers/watchdog/rzn1_wdt.c +++ b/drivers/watchdog/rzn1_wdt.c @@ -52,7 +52,7 @@ static int rzn1_wdt_ping(struct watchdog_device *w) { struct rzn1_watchdog *wdt = watchdog_get_drvdata(w); - /* Any value retrigggers the watchdog */ + /* Any value retriggers the watchdog */ writel(0, wdt->base + RZN1_WDT_RETRIGGER); return 0; diff --git a/drivers/watchdog/smsc37b787_wdt.c b/drivers/watchdog/smsc37b787_wdt.c index 97ca500ec8a8a..3011e1af00f98 100644 --- a/drivers/watchdog/smsc37b787_wdt.c +++ b/drivers/watchdog/smsc37b787_wdt.c @@ -485,7 +485,7 @@ static long wb_smsc_wdt_ioctl(struct file *file, } } -/* -- Notifier funtions -----------------------------------------*/ +/* -- Notifier functions -----------------------------------------*/ static int wb_smsc_wdt_notify_sys(struct notifier_block *this, unsigned long code, void *unused) diff --git a/drivers/watchdog/starfive-wdt.c b/drivers/watchdog/starfive-wdt.c index 19a2620d3d389..a8b6cf767117f 100644 --- a/drivers/watchdog/starfive-wdt.c +++ b/drivers/watchdog/starfive-wdt.c @@ -80,7 +80,7 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); struct starfive_wdt_variant { - unsigned int control; /* Watchdog Control Resgister for reset enable */ + unsigned int control; /* Watchdog Control Register for reset enable */ unsigned int load; /* Watchdog Load register */ unsigned int reload; /* Watchdog Reload Control register */ unsigned int enable; /* Watchdog Enable Register */ From daa814d784ac034c62ab3fb0ef83daeafef527e2 Mon Sep 17 00:00:00 2001 From: Oleksandr Ocheretnyi Date: Fri, 13 Sep 2024 12:14:03 -0700 Subject: [PATCH 002/366] iTCO_wdt: mask NMI_NOW bit for update_no_reboot_bit() call Commit da23b6faa8bf ("watchdog: iTCO: Add support for Cannon Lake PCH iTCO") does not mask NMI_NOW bit during TCO1_CNT register's value comparison for update_no_reboot_bit() call causing following failure: ... iTCO_vendor_support: vendor-support=0 iTCO_wdt iTCO_wdt: unable to reset NO_REBOOT flag, device disabled by hardware/BIOS ... and this can lead to unexpected NMIs later during regular crashkernel's workflow because of watchdog probe call failures. This change masks NMI_NOW bit for TCO1_CNT register values to avoid unexpected NMI_NOW bit inversions. Fixes: da23b6faa8bf ("watchdog: iTCO: Add support for Cannon Lake PCH iTCO") Signed-off-by: Oleksandr Ocheretnyi Reviewed-by: Guenter Roeck Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20240913191403.2560805-1-oocheret@cisco.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/iTCO_wdt.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 35b358bcf94ce..f01ed38aba675 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -82,6 +82,13 @@ #define TCO2_CNT(p) (TCOBASE(p) + 0x0a) /* TCO2 Control Register */ #define TCOv2_TMR(p) (TCOBASE(p) + 0x12) /* TCOv2 Timer Initial Value*/ +/* + * NMI_NOW is bit 8 of TCO1_CNT register + * Read/Write + * This bit is implemented as RW but has no effect on HW. + */ +#define NMI_NOW BIT(8) + /* internal variables */ struct iTCO_wdt_private { struct watchdog_device wddev; @@ -219,13 +226,23 @@ static int update_no_reboot_bit_cnt(void *priv, bool set) struct iTCO_wdt_private *p = priv; u16 val, newval; - val = inw(TCO1_CNT(p)); + /* + * writing back 1b1 to NMI_NOW of TCO1_CNT register + * causes NMI_NOW bit inversion what consequently does + * not allow to perform the register's value comparison + * properly. + * + * NMI_NOW bit masking for TCO1_CNT register values + * helps to avoid possible NMI_NOW bit inversions on + * following write operation. + */ + val = inw(TCO1_CNT(p)) & ~NMI_NOW; if (set) val |= BIT(0); else val &= ~BIT(0); outw(val, TCO1_CNT(p)); - newval = inw(TCO1_CNT(p)); + newval = inw(TCO1_CNT(p)) & ~NMI_NOW; /* make sure the update is successful */ return val != newval ? -EIO : 0; From 006778844c2c132c28cfa90e3570560351e01b9a Mon Sep 17 00:00:00 2001 From: Harini T Date: Fri, 13 Sep 2024 17:02:30 +0530 Subject: [PATCH 003/366] watchdog: xilinx_wwdt: Calculate max_hw_heartbeat_ms using clock frequency In the current implementation, the value of max_hw_heartbeat_ms is set to the timeout period expressed in milliseconds and fails to verify if the close window percentage exceeds the maximum value that the hardware supports. 1. Calculate max_hw_heartbeat_ms based on input clock frequency. 2. Update frequency check to require a minimum frequency of 1Mhz. 3. Limit the close and open window percent to hardware supported value to avoid truncation. 4. If the user input timeout exceeds the maximum timeout supported, use only open window and the framework supports the higher timeouts. Fixes: 12984cea1b8c ("watchdog: xilinx_wwdt: Add Versal window watchdog support") Signed-off-by: Harini T Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240913113230.1939373-1-harini.t@amd.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/xilinx_wwdt.c | 75 ++++++++++++++++++++++++++++------ 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/drivers/watchdog/xilinx_wwdt.c b/drivers/watchdog/xilinx_wwdt.c index d271e2e8d6e27..3d2a156f71800 100644 --- a/drivers/watchdog/xilinx_wwdt.c +++ b/drivers/watchdog/xilinx_wwdt.c @@ -2,7 +2,7 @@ /* * Window watchdog device driver for Xilinx Versal WWDT * - * Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. + * Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. */ #include @@ -36,6 +36,12 @@ #define XWWDT_CLOSE_WINDOW_PERCENT 50 +/* Maximum count value of each 32 bit window */ +#define XWWDT_MAX_COUNT_WINDOW GENMASK(31, 0) + +/* Maximum count value of closed and open window combined */ +#define XWWDT_MAX_COUNT_WINDOW_COMBINED GENMASK_ULL(32, 1) + static int wwdt_timeout; static int closed_window_percent; @@ -54,6 +60,8 @@ MODULE_PARM_DESC(closed_window_percent, * @xilinx_wwdt_wdd: watchdog device structure * @freq: source clock frequency of WWDT * @close_percent: Closed window percent + * @closed_timeout: Closed window timeout in ticks + * @open_timeout: Open window timeout in ticks */ struct xwwdt_device { void __iomem *base; @@ -61,27 +69,22 @@ struct xwwdt_device { struct watchdog_device xilinx_wwdt_wdd; unsigned long freq; u32 close_percent; + u64 closed_timeout; + u64 open_timeout; }; static int xilinx_wwdt_start(struct watchdog_device *wdd) { struct xwwdt_device *xdev = watchdog_get_drvdata(wdd); struct watchdog_device *xilinx_wwdt_wdd = &xdev->xilinx_wwdt_wdd; - u64 time_out, closed_timeout, open_timeout; u32 control_status_reg; - /* Calculate timeout count */ - time_out = xdev->freq * wdd->timeout; - closed_timeout = div_u64(time_out * xdev->close_percent, 100); - open_timeout = time_out - closed_timeout; - wdd->min_hw_heartbeat_ms = xdev->close_percent * 10 * wdd->timeout; - spin_lock(&xdev->spinlock); iowrite32(XWWDT_MWR_MASK, xdev->base + XWWDT_MWR_OFFSET); iowrite32(~(u32)XWWDT_ESR_WEN_MASK, xdev->base + XWWDT_ESR_OFFSET); - iowrite32((u32)closed_timeout, xdev->base + XWWDT_FWR_OFFSET); - iowrite32((u32)open_timeout, xdev->base + XWWDT_SWR_OFFSET); + iowrite32((u32)xdev->closed_timeout, xdev->base + XWWDT_FWR_OFFSET); + iowrite32((u32)xdev->open_timeout, xdev->base + XWWDT_SWR_OFFSET); /* Enable the window watchdog timer */ control_status_reg = ioread32(xdev->base + XWWDT_ESR_OFFSET); @@ -133,7 +136,12 @@ static int xwwdt_probe(struct platform_device *pdev) struct watchdog_device *xilinx_wwdt_wdd; struct device *dev = &pdev->dev; struct xwwdt_device *xdev; + u64 max_per_window_ms; + u64 min_per_window_ms; + u64 timeout_count; struct clk *clk; + u32 timeout_ms; + u64 ms_count; int ret; xdev = devm_kzalloc(dev, sizeof(*xdev), GFP_KERNEL); @@ -154,12 +162,13 @@ static int xwwdt_probe(struct platform_device *pdev) return PTR_ERR(clk); xdev->freq = clk_get_rate(clk); - if (!xdev->freq) + if (xdev->freq < 1000000) return -EINVAL; xilinx_wwdt_wdd->min_timeout = XWWDT_MIN_TIMEOUT; xilinx_wwdt_wdd->timeout = XWWDT_DEFAULT_TIMEOUT; - xilinx_wwdt_wdd->max_hw_heartbeat_ms = 1000 * xilinx_wwdt_wdd->timeout; + xilinx_wwdt_wdd->max_hw_heartbeat_ms = + div64_u64(XWWDT_MAX_COUNT_WINDOW_COMBINED, xdev->freq) * 1000; if (closed_window_percent == 0 || closed_window_percent >= 100) xdev->close_percent = XWWDT_CLOSE_WINDOW_PERCENT; @@ -167,6 +176,48 @@ static int xwwdt_probe(struct platform_device *pdev) xdev->close_percent = closed_window_percent; watchdog_init_timeout(xilinx_wwdt_wdd, wwdt_timeout, &pdev->dev); + + /* Calculate ticks for 1 milli-second */ + ms_count = div_u64(xdev->freq, 1000); + timeout_ms = xilinx_wwdt_wdd->timeout * 1000; + timeout_count = timeout_ms * ms_count; + + if (timeout_ms > xilinx_wwdt_wdd->max_hw_heartbeat_ms) { + /* + * To avoid ping restrictions until the minimum hardware heartbeat, + * we will solely rely on the open window and + * adjust the minimum hardware heartbeat to 0. + */ + xdev->closed_timeout = 0; + xdev->open_timeout = XWWDT_MAX_COUNT_WINDOW; + xilinx_wwdt_wdd->min_hw_heartbeat_ms = 0; + xilinx_wwdt_wdd->max_hw_heartbeat_ms = xilinx_wwdt_wdd->max_hw_heartbeat_ms / 2; + } else { + xdev->closed_timeout = div64_u64(timeout_count * xdev->close_percent, 100); + xilinx_wwdt_wdd->min_hw_heartbeat_ms = + div64_u64(timeout_ms * xdev->close_percent, 100); + + if (timeout_ms > xilinx_wwdt_wdd->max_hw_heartbeat_ms / 2) { + max_per_window_ms = xilinx_wwdt_wdd->max_hw_heartbeat_ms / 2; + min_per_window_ms = timeout_ms - max_per_window_ms; + + if (xilinx_wwdt_wdd->min_hw_heartbeat_ms > max_per_window_ms) { + dev_info(xilinx_wwdt_wdd->parent, + "Closed window cannot be set to %d%%. Using maximum supported value.\n", + xdev->close_percent); + xdev->closed_timeout = max_per_window_ms * ms_count; + xilinx_wwdt_wdd->min_hw_heartbeat_ms = max_per_window_ms; + } else if (xilinx_wwdt_wdd->min_hw_heartbeat_ms < min_per_window_ms) { + dev_info(xilinx_wwdt_wdd->parent, + "Closed window cannot be set to %d%%. Using minimum supported value.\n", + xdev->close_percent); + xdev->closed_timeout = min_per_window_ms * ms_count; + xilinx_wwdt_wdd->min_hw_heartbeat_ms = min_per_window_ms; + } + } + xdev->open_timeout = timeout_count - xdev->closed_timeout; + } + spin_lock_init(&xdev->spinlock); watchdog_set_drvdata(xilinx_wwdt_wdd, xdev); watchdog_set_nowayout(xilinx_wwdt_wdd, 1); From b2dd16c277bce344b168ed5286f2378868a58d42 Mon Sep 17 00:00:00 2001 From: lijuang Date: Fri, 20 Sep 2024 15:11:41 +0800 Subject: [PATCH 004/366] dt-bindings: watchdog: Document Qualcomm QCS615 watchdog Add devicetree binding for watchdog present on Qualcomm QCS615 SoC. Signed-off-by: Lijuan Gao Acked-by: Rob Herring (Arm) Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240920-add_watchdog_compatible_for_qcs615-v2-1-427944f1151e@quicinc.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml index 932393f8c649a..32eaf43aadb3a 100644 --- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml @@ -26,6 +26,7 @@ properties: - qcom,apss-wdt-msm8994 - qcom,apss-wdt-qcm2290 - qcom,apss-wdt-qcs404 + - qcom,apss-wdt-qcs615 - qcom,apss-wdt-sa8255p - qcom,apss-wdt-sa8775p - qcom,apss-wdt-sc7180 From 8af9ff6b11122bf4b91378148ded7177bfbae582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 Sep 2024 17:34:36 +0200 Subject: [PATCH 005/366] watchdog: ziirave_wdt: Drop explicit initialization of struct i2c_device_id::driver_data to 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These drivers don't use the driver_data member of struct i2c_device_id, so don't explicitly initialize this member. This prepares putting driver_data in an anonymous union which requires either no initialization or named designators. But it's also a nice cleanup on its own. Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240920153430.503212-18-u.kleine-koenig@baylibre.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 775838346bb50..fcc1ba02e75b6 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -715,7 +715,7 @@ static void ziirave_wdt_remove(struct i2c_client *client) } static const struct i2c_device_id ziirave_wdt_id[] = { - { "rave-wdt", 0 }, + { "rave-wdt" }, { } }; MODULE_DEVICE_TABLE(i2c, ziirave_wdt_id); From 68adabf48f79a46bf2bb615264a54d9bf88528e6 Mon Sep 17 00:00:00 2001 From: Xingyu Wu Date: Fri, 27 Sep 2024 14:50:32 +0800 Subject: [PATCH 006/366] MAINTAINERS: Update the maintainer of StarFive watchdog driver Samin quits maintaining the StarFive watchdog driver and Ziv joins instead. Update the maintainer of this driver from Samin to Ziv. Signed-off-by: Xingyu Wu Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240927065032.2773997-1-xingyu.wu@starfivetech.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index bdae0faf000c7..04a831ad728d2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22059,7 +22059,7 @@ F: drivers/char/hw_random/jh7110-trng.c STARFIVE WATCHDOG DRIVER M: Xingyu Wu -M: Samin Guo +M: Ziv Xu S: Supported F: Documentation/devicetree/bindings/watchdog/starfive* F: drivers/watchdog/starfive-wdt.c From 3ab1663af6c1ac7d4bd1fb1371a4972bac2922a4 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 15 Apr 2024 15:48:47 +0200 Subject: [PATCH 007/366] watchdog: stm32_iwdg: Add pretimeout support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The STM32MP15xx IWDG adds registers which permit this IP to generate pretimeout interrupt. This interrupt can also be used to wake the CPU from suspend. Implement support for generating this interrupt and let userspace configure the pretimeout. In case the pretimeout is not configured by user, set pretimeout to 3/4 of the WDT timeout cycle. Signed-off-by: Marek Vasut Reviewed-by: Clément Le Goffic Tested-by: Clément Le Goffic Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240415134903.8084-1-marex@denx.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/stm32_iwdg.c | 95 ++++++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c index 5404e03876202..d700e0d49bb95 100644 --- a/drivers/watchdog/stm32_iwdg.c +++ b/drivers/watchdog/stm32_iwdg.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #define DEFAULT_TIMEOUT 10 @@ -28,6 +29,7 @@ #define IWDG_RLR 0x08 /* ReLoad Register */ #define IWDG_SR 0x0C /* Status Register */ #define IWDG_WINR 0x10 /* Windows Register */ +#define IWDG_EWCR 0x14 /* Early Wake-up Register */ /* IWDG_KR register bit mask */ #define KR_KEY_RELOAD 0xAAAA /* reload counter enable */ @@ -47,22 +49,29 @@ #define SR_PVU BIT(0) /* Watchdog prescaler value update */ #define SR_RVU BIT(1) /* Watchdog counter reload value update */ +#define EWCR_EWIT GENMASK(11, 0) /* Watchdog counter window value */ +#define EWCR_EWIC BIT(14) /* Watchdog early interrupt acknowledge */ +#define EWCR_EWIE BIT(15) /* Watchdog early interrupt enable */ + /* set timeout to 100000 us */ #define TIMEOUT_US 100000 #define SLEEP_US 1000 struct stm32_iwdg_data { bool has_pclk; + bool has_early_wakeup; u32 max_prescaler; }; static const struct stm32_iwdg_data stm32_iwdg_data = { .has_pclk = false, + .has_early_wakeup = false, .max_prescaler = 256, }; static const struct stm32_iwdg_data stm32mp1_iwdg_data = { .has_pclk = true, + .has_early_wakeup = true, .max_prescaler = 1024, }; @@ -88,13 +97,18 @@ static inline void reg_write(void __iomem *base, u32 reg, u32 val) static int stm32_iwdg_start(struct watchdog_device *wdd) { struct stm32_iwdg *wdt = watchdog_get_drvdata(wdd); - u32 tout, presc, iwdg_rlr, iwdg_pr, iwdg_sr; + u32 tout, ptot, presc, iwdg_rlr, iwdg_ewcr, iwdg_pr, iwdg_sr; int ret; dev_dbg(wdd->parent, "%s\n", __func__); + if (!wdd->pretimeout) + wdd->pretimeout = 3 * wdd->timeout / 4; + tout = clamp_t(unsigned int, wdd->timeout, wdd->min_timeout, wdd->max_hw_heartbeat_ms / 1000); + ptot = clamp_t(unsigned int, tout - wdd->pretimeout, + wdd->min_timeout, tout); presc = DIV_ROUND_UP(tout * wdt->rate, RLR_MAX + 1); @@ -102,6 +116,7 @@ static int stm32_iwdg_start(struct watchdog_device *wdd) presc = roundup_pow_of_two(presc); iwdg_pr = presc <= 1 << PR_SHIFT ? 0 : ilog2(presc) - PR_SHIFT; iwdg_rlr = ((tout * wdt->rate) / presc) - 1; + iwdg_ewcr = ((ptot * wdt->rate) / presc) - 1; /* enable write access */ reg_write(wdt->regs, IWDG_KR, KR_KEY_EWA); @@ -109,6 +124,8 @@ static int stm32_iwdg_start(struct watchdog_device *wdd) /* set prescaler & reload registers */ reg_write(wdt->regs, IWDG_PR, iwdg_pr); reg_write(wdt->regs, IWDG_RLR, iwdg_rlr); + if (wdt->data->has_early_wakeup) + reg_write(wdt->regs, IWDG_EWCR, iwdg_ewcr | EWCR_EWIE); reg_write(wdt->regs, IWDG_KR, KR_KEY_ENABLE); /* wait for the registers to be updated (max 100ms) */ @@ -151,6 +168,34 @@ static int stm32_iwdg_set_timeout(struct watchdog_device *wdd, return 0; } +static int stm32_iwdg_set_pretimeout(struct watchdog_device *wdd, + unsigned int pretimeout) +{ + dev_dbg(wdd->parent, "%s pretimeout: %d sec\n", __func__, pretimeout); + + wdd->pretimeout = pretimeout; + + if (watchdog_active(wdd)) + return stm32_iwdg_start(wdd); + + return 0; +} + +static irqreturn_t stm32_iwdg_isr(int irq, void *wdog_arg) +{ + struct watchdog_device *wdd = wdog_arg; + struct stm32_iwdg *wdt = watchdog_get_drvdata(wdd); + u32 reg; + + reg = reg_read(wdt->regs, IWDG_EWCR); + reg |= EWCR_EWIC; + reg_write(wdt->regs, IWDG_EWCR, reg); + + watchdog_notify_pretimeout(wdd); + + return IRQ_HANDLED; +} + static void stm32_clk_disable_unprepare(void *data) { clk_disable_unprepare(data); @@ -207,11 +252,20 @@ static const struct watchdog_info stm32_iwdg_info = { .identity = "STM32 Independent Watchdog", }; +static const struct watchdog_info stm32_iwdg_preinfo = { + .options = WDIOF_SETTIMEOUT | + WDIOF_MAGICCLOSE | + WDIOF_KEEPALIVEPING | + WDIOF_PRETIMEOUT, + .identity = "STM32 Independent Watchdog", +}; + static const struct watchdog_ops stm32_iwdg_ops = { .owner = THIS_MODULE, .start = stm32_iwdg_start, .ping = stm32_iwdg_ping, .set_timeout = stm32_iwdg_set_timeout, + .set_pretimeout = stm32_iwdg_set_pretimeout, }; static const struct of_device_id stm32_iwdg_of_match[] = { @@ -221,6 +275,40 @@ static const struct of_device_id stm32_iwdg_of_match[] = { }; MODULE_DEVICE_TABLE(of, stm32_iwdg_of_match); +static int stm32_iwdg_irq_init(struct platform_device *pdev, + struct stm32_iwdg *wdt) +{ + struct device_node *np = pdev->dev.of_node; + struct watchdog_device *wdd = &wdt->wdd; + struct device *dev = &pdev->dev; + int irq, ret; + + if (!wdt->data->has_early_wakeup) + return 0; + + irq = platform_get_irq(pdev, 0); + if (irq <= 0) + return 0; + + if (of_property_read_bool(np, "wakeup-source")) { + ret = device_init_wakeup(dev, true); + if (ret) + return ret; + + ret = dev_pm_set_wake_irq(dev, irq); + if (ret) + return ret; + } + + ret = devm_request_irq(dev, irq, stm32_iwdg_isr, 0, + dev_name(dev), wdd); + if (ret) + return ret; + + wdd->info = &stm32_iwdg_preinfo; + return 0; +} + static int stm32_iwdg_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -255,6 +343,11 @@ static int stm32_iwdg_probe(struct platform_device *pdev) wdd->max_hw_heartbeat_ms = ((RLR_MAX + 1) * wdt->data->max_prescaler * 1000) / wdt->rate; + /* Initialize IRQ, this might override wdd->info, hence it is here. */ + ret = stm32_iwdg_irq_init(pdev, wdt); + if (ret) + return ret; + watchdog_set_drvdata(wdd, wdt); watchdog_set_nowayout(wdd, WATCHDOG_NOWAYOUT); watchdog_init_timeout(wdd, 0, dev); From 51dfe714c03c066aabc815a2bb2adcc998dfcb30 Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Wed, 2 Oct 2024 00:59:51 +0800 Subject: [PATCH 008/366] watchdog: apple: Actually flush writes after requesting watchdog restart Although there is an existing code comment about flushing the writes, writes were not actually being flushed. Actually flush the writes by changing readl_relaxed() to readl(). Fixes: 4ed224aeaf661 ("watchdog: Add Apple SoC watchdog driver") Suggested-by: Arnd Bergmann Signed-off-by: Nick Chan Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241001170018.20139-2-towinchenmi@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/apple_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/apple_wdt.c b/drivers/watchdog/apple_wdt.c index d4f739932f0be..62dabf223d909 100644 --- a/drivers/watchdog/apple_wdt.c +++ b/drivers/watchdog/apple_wdt.c @@ -130,7 +130,7 @@ static int apple_wdt_restart(struct watchdog_device *wdd, unsigned long mode, * can take up to ~20-25ms until the SoC is actually reset. Just wait * 50ms here to be safe. */ - (void)readl_relaxed(wdt->regs + APPLE_WDT_WD1_CUR_TIME); + (void)readl(wdt->regs + APPLE_WDT_WD1_CUR_TIME); mdelay(50); return 0; From e6a08988eb5948a99d1f3b48afab4ffff01ae9a4 Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Wed, 2 Oct 2024 00:59:52 +0800 Subject: [PATCH 009/366] watchdog: apple: Increase reset delay to 150ms The Apple A8X SoC seems to be slowest at resetting, taking up to around 125ms to reset. Wait 150ms to be safe here. Signed-off-by: Nick Chan Reviewed-by: Sven Peter Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241001170018.20139-3-towinchenmi@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/apple_wdt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/apple_wdt.c b/drivers/watchdog/apple_wdt.c index 62dabf223d909..95d9e37df41cd 100644 --- a/drivers/watchdog/apple_wdt.c +++ b/drivers/watchdog/apple_wdt.c @@ -127,11 +127,11 @@ static int apple_wdt_restart(struct watchdog_device *wdd, unsigned long mode, /* * Flush writes and then wait for the SoC to reset. Even though the * reset is queued almost immediately experiments have shown that it - * can take up to ~20-25ms until the SoC is actually reset. Just wait - * 50ms here to be safe. + * can take up to ~120-125ms until the SoC is actually reset. Just + * wait 150ms here to be safe. */ (void)readl(wdt->regs + APPLE_WDT_WD1_CUR_TIME); - mdelay(50); + mdelay(150); return 0; } From 06ba0b8da1daabaf10dbec4c04e883d7a6c81706 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Thu, 3 Oct 2024 14:45:08 -0700 Subject: [PATCH 010/366] watchdog: armada_37xx_wdt: remove struct resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need for it with devm_platform_ioremap_resource. Simplifies probe slightly. Signed-off-by: Rosen Penev Reviewed-by: Marek Behún Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241003214508.121107-1-rosenp@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/armada_37xx_wdt.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/watchdog/armada_37xx_wdt.c b/drivers/watchdog/armada_37xx_wdt.c index 8133a5d05647c..a17a7911771ac 100644 --- a/drivers/watchdog/armada_37xx_wdt.c +++ b/drivers/watchdog/armada_37xx_wdt.c @@ -248,7 +248,6 @@ static const struct watchdog_ops armada_37xx_wdt_ops = { static int armada_37xx_wdt_probe(struct platform_device *pdev) { struct armada_37xx_watchdog *dev; - struct resource *res; struct regmap *regmap; int ret; @@ -266,12 +265,9 @@ static int armada_37xx_wdt_probe(struct platform_device *pdev) return PTR_ERR(regmap); dev->cpu_misc = regmap; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENODEV; - dev->reg = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - if (!dev->reg) - return -ENOMEM; + dev->reg = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(dev->reg)) + return PTR_ERR(dev->reg); /* init clock */ dev->clk = devm_clk_get_enabled(&pdev->dev, NULL); From 140fb00c40c1b751eb352c09c608477444da0420 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Oct 2024 22:03:04 +0200 Subject: [PATCH 011/366] watchdog: always print when registering watchdog fails So far, only 'watchdog_register_device' prints an error if registering the watchdog driver fails. '__watchdog_register_device' doesn't. Refactor the code so that both print out. Drivers can then rely on that and skip their own error messages. Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241004200314.5459-2-wsa+renesas@sang-engineering.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/watchdog_core.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index aff2c3912ead6..d46d8c8c01f2d 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -237,7 +237,7 @@ void watchdog_set_restart_priority(struct watchdog_device *wdd, int priority) } EXPORT_SYMBOL_GPL(watchdog_set_restart_priority); -static int __watchdog_register_device(struct watchdog_device *wdd) +static int ___watchdog_register_device(struct watchdog_device *wdd) { int ret, id = -1; @@ -337,6 +337,22 @@ static int __watchdog_register_device(struct watchdog_device *wdd) return 0; } +static int __watchdog_register_device(struct watchdog_device *wdd) +{ + const char *dev_str; + int ret; + + ret = ___watchdog_register_device(wdd); + if (ret) { + dev_str = wdd->parent ? dev_name(wdd->parent) : + (const char *)wdd->info->identity; + pr_err("%s: failed to register watchdog device (err = %d)\n", + dev_str, ret); + } + + return ret; +} + /** * watchdog_register_device() - register a watchdog device * @wdd: watchdog device @@ -350,7 +366,6 @@ static int __watchdog_register_device(struct watchdog_device *wdd) int watchdog_register_device(struct watchdog_device *wdd) { - const char *dev_str; int ret = 0; mutex_lock(&wtd_deferred_reg_mutex); @@ -360,13 +375,6 @@ int watchdog_register_device(struct watchdog_device *wdd) watchdog_deferred_registration_add(wdd); mutex_unlock(&wtd_deferred_reg_mutex); - if (ret) { - dev_str = wdd->parent ? dev_name(wdd->parent) : - (const char *)wdd->info->identity; - pr_err("%s: failed to register watchdog device (err = %d)\n", - dev_str, ret); - } - return ret; } EXPORT_SYMBOL_GPL(watchdog_register_device); From c9e8ba37163a34929304d9c5f6392bd038d6cfe5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Oct 2024 22:03:05 +0200 Subject: [PATCH 012/366] watchdog: da9055_wdt: don't print out if registering watchdog fails The core will do this already. Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241004200314.5459-3-wsa+renesas@sang-engineering.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/da9055_wdt.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/watchdog/da9055_wdt.c b/drivers/watchdog/da9055_wdt.c index 389a4bdd208ce..9d5a2009466f7 100644 --- a/drivers/watchdog/da9055_wdt.c +++ b/drivers/watchdog/da9055_wdt.c @@ -146,12 +146,7 @@ static int da9055_wdt_probe(struct platform_device *pdev) return ret; } - ret = devm_watchdog_register_device(dev, &driver_data->wdt); - if (ret != 0) - dev_err(da9055->dev, "watchdog_register_device() failed: %d\n", - ret); - - return ret; + return devm_watchdog_register_device(dev, &driver_data->wdt); } static struct platform_driver da9055_wdt_driver = { From 7022274d625935d99c2a5c8ac84ed4754229ce64 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Oct 2024 22:03:06 +0200 Subject: [PATCH 013/366] watchdog: gxp-wdt: don't print out if registering watchdog fails The core will do this already. Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241004200314.5459-4-wsa+renesas@sang-engineering.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/gxp-wdt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/watchdog/gxp-wdt.c b/drivers/watchdog/gxp-wdt.c index 2fd85be882786..f2c2361602662 100644 --- a/drivers/watchdog/gxp-wdt.c +++ b/drivers/watchdog/gxp-wdt.c @@ -151,10 +151,8 @@ static int gxp_wdt_probe(struct platform_device *pdev) watchdog_stop_on_reboot(&drvdata->wdd); err = devm_watchdog_register_device(dev, &drvdata->wdd); - if (err) { - dev_err(dev, "Failed to register watchdog device"); + if (err) return err; - } dev_info(dev, "HPE GXP watchdog timer"); From fb2de4ea05780b19cb4c082b481d65477564ddce Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Oct 2024 22:03:07 +0200 Subject: [PATCH 014/366] watchdog: iTCO_wdt: don't print out if registering watchdog fails The core will do this already. Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241004200314.5459-5-wsa+renesas@sang-engineering.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/iTCO_wdt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index f01ed38aba675..7672582fa4076 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -609,10 +609,8 @@ static int iTCO_wdt_probe(struct platform_device *pdev) watchdog_stop_on_reboot(&p->wddev); watchdog_stop_on_unregister(&p->wddev); ret = devm_watchdog_register_device(dev, &p->wddev); - if (ret != 0) { - dev_err(dev, "cannot register watchdog device (err=%d)\n", ret); + if (ret != 0) return ret; - } dev_info(dev, "initialized. heartbeat=%d sec (nowayout=%d)\n", heartbeat, nowayout); From ebc75304f0b65246dedc79c6b7a9c98ee64e3a8c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Oct 2024 22:03:08 +0200 Subject: [PATCH 015/366] watchdog: it87_wdt: don't print out if registering watchdog fails The core will do this already. Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241004200314.5459-6-wsa+renesas@sang-engineering.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/it87_wdt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c index 3e8c15138edda..676cd134e6778 100644 --- a/drivers/watchdog/it87_wdt.c +++ b/drivers/watchdog/it87_wdt.c @@ -349,10 +349,8 @@ static int __init it87_wdt_init(void) watchdog_stop_on_reboot(&wdt_dev); rc = watchdog_register_device(&wdt_dev); - if (rc) { - pr_err("Cannot register watchdog device (err=%d)\n", rc); + if (rc) return rc; - } pr_info("Chip IT%04x revision %d initialized. timeout=%d sec (nowayout=%d testmode=%d)\n", chip_type, chip_rev, timeout, nowayout, testmode); From 8904da69098512968dbcf310668c521b0a360108 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Oct 2024 22:03:09 +0200 Subject: [PATCH 016/366] watchdog: octeon-wdt: don't print out if registering watchdog fails The core will do this already. Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241004200314.5459-7-wsa+renesas@sang-engineering.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/octeon-wdt-main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/watchdog/octeon-wdt-main.c b/drivers/watchdog/octeon-wdt-main.c index 52d49e4e35a0c..0615bb8160821 100644 --- a/drivers/watchdog/octeon-wdt-main.c +++ b/drivers/watchdog/octeon-wdt-main.c @@ -559,10 +559,8 @@ static int __init octeon_wdt_init(void) watchdog_set_nowayout(&octeon_wdt, nowayout); ret = watchdog_register_device(&octeon_wdt); - if (ret) { - pr_err("watchdog_register_device() failed: %d\n", ret); + if (ret) return ret; - } if (disable) { pr_notice("disabled\n"); From 844f8dff29e52a054fc82ca102060ee3a16620f0 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Oct 2024 22:03:10 +0200 Subject: [PATCH 017/366] watchdog: rti_wdt: don't print out if registering watchdog fails The core will do this already. Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241004200314.5459-8-wsa+renesas@sang-engineering.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/rti_wdt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index 4895a69015a8e..e319fa0787c27 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -336,10 +336,8 @@ static int rti_wdt_probe(struct platform_device *pdev) watchdog_init_timeout(wdd, heartbeat, dev); ret = watchdog_register_device(wdd); - if (ret) { - dev_err(dev, "cannot register watchdog device\n"); + if (ret) goto err_iomap; - } if (last_ping) watchdog_set_last_hw_keepalive(wdd, last_ping); From 74ccee5e6ceff65ea83b0da8e300c62b313e8ebc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Oct 2024 22:03:11 +0200 Subject: [PATCH 018/366] watchdog: rza_wdt: don't print out if registering watchdog fails The core will do this already. Signed-off-by: Wolfram Sang Reviewed-by: Wim Van Sebroeck Link: https://lore.kernel.org/r/20241004200314.5459-9-wsa+renesas@sang-engineering.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/rza_wdt.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/watchdog/rza_wdt.c b/drivers/watchdog/rza_wdt.c index cb4901b3f7778..9334255a37e93 100644 --- a/drivers/watchdog/rza_wdt.c +++ b/drivers/watchdog/rza_wdt.c @@ -169,7 +169,6 @@ static int rza_wdt_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct rza_wdt *priv; unsigned long rate; - int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -218,11 +217,7 @@ static int rza_wdt_probe(struct platform_device *pdev) watchdog_init_timeout(&priv->wdev, 0, dev); watchdog_set_drvdata(&priv->wdev, priv); - ret = devm_watchdog_register_device(dev, &priv->wdev); - if (ret) - dev_err(dev, "Cannot register watchdog device\n"); - - return ret; + return devm_watchdog_register_device(dev, &priv->wdev); } static const struct of_device_id rza_wdt_of_match[] = { From 39885f22e9f44f6c85d126789c7b0ee099904acd Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Oct 2024 22:03:12 +0200 Subject: [PATCH 019/366] watchdog: sl28cpld_wdt: don't print out if registering watchdog fails The core will do this already. Signed-off-by: Wolfram Sang Reviewed-by: Michael Walle Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241004200314.5459-10-wsa+renesas@sang-engineering.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sl28cpld_wdt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/watchdog/sl28cpld_wdt.c b/drivers/watchdog/sl28cpld_wdt.c index 9ce456f09f736..8630c29818f2e 100644 --- a/drivers/watchdog/sl28cpld_wdt.c +++ b/drivers/watchdog/sl28cpld_wdt.c @@ -198,10 +198,8 @@ static int sl28cpld_wdt_probe(struct platform_device *pdev) } ret = devm_watchdog_register_device(&pdev->dev, wdd); - if (ret < 0) { - dev_err(&pdev->dev, "failed to register watchdog device\n"); + if (ret < 0) return ret; - } dev_info(&pdev->dev, "initial timeout %d sec%s\n", wdd->timeout, nowayout ? ", nowayout" : ""); From bcbd7b2b031d14c2e239039c74897cb2e7d5425a Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Fri, 11 Oct 2024 12:43:52 +0200 Subject: [PATCH 020/366] dt-bindings: watchdog: airoha: document watchdog for Airoha EN7581 Document watchdog for Airoha EN7581. This SoC implement a simple watchdog that supports a max timeout of 28 seconds. The watchdog ticks on half the BUS clock and requires the BUS clock to be referenced. Signed-off-by: Christian Marangi Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241011104411.28659-1-ansuelsmth@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../bindings/watchdog/airoha,en7581-wdt.yaml | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 Documentation/devicetree/bindings/watchdog/airoha,en7581-wdt.yaml diff --git a/Documentation/devicetree/bindings/watchdog/airoha,en7581-wdt.yaml b/Documentation/devicetree/bindings/watchdog/airoha,en7581-wdt.yaml new file mode 100644 index 0000000000000..6bbab3cb28e54 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/airoha,en7581-wdt.yaml @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/airoha,en7581-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Airoha EN7581 Watchdog Timer + +maintainers: + - Christian Marangi + +allOf: + - $ref: watchdog.yaml# + +properties: + compatible: + const: airoha,en7581-wdt + + reg: + maxItems: 1 + + clocks: + description: BUS clock (timer ticks at half the BUS clock) + maxItems: 1 + + clock-names: + const: bus + +required: + - compatible + - reg + - clocks + - clock-names + +unevaluatedProperties: false + +examples: + - | + #include + + watchdog@1fbf0100 { + compatible = "airoha,en7581-wdt"; + reg = <0x1fbf0100 0x3c>; + + clocks = <&scuclk EN7523_CLK_BUS>; + clock-names = "bus"; + }; From 3cf67f3769b8227ca75ca7102180a2e270ee01aa Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Fri, 11 Oct 2024 12:43:53 +0200 Subject: [PATCH 021/366] watchdog: Add support for Airoha EN7851 watchdog Add support for Airoha EN7851 watchdog. This is a very basic watchdog with no pretimeout support, max timeout is 28 seconds and it ticks based on half the SoC BUS clock. Signed-off-by: Christian Marangi Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241011104411.28659-2-ansuelsmth@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 8 ++ drivers/watchdog/Makefile | 1 + drivers/watchdog/airoha_wdt.c | 216 ++++++++++++++++++++++++++++++++++ 3 files changed, 225 insertions(+) create mode 100644 drivers/watchdog/airoha_wdt.c diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 684b9fe84fff5..6e0944cfb409f 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -408,6 +408,14 @@ config SL28CPLD_WATCHDOG # ARM Architecture +config AIROHA_WATCHDOG + tristate "Airoha EN7581 Watchdog" + depends on ARCH_AIROHA || COMPILE_TEST + select WATCHDOG_CORE + help + Watchdog timer embedded into Airoha SoC. This will reboot your + system when the timeout is reached. + config ARM_SP805_WATCHDOG tristate "ARM SP805 Watchdog" depends on (ARM || ARM64 || COMPILE_TEST) && ARM_AMBA diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index ab6f2b41e38e6..99eccefd6c335 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_USBPCWATCHDOG) += pcwd_usb.o obj-$(CONFIG_ARM_SP805_WATCHDOG) += sp805_wdt.o obj-$(CONFIG_ARM_SBSA_WATCHDOG) += sbsa_gwdt.o obj-$(CONFIG_ARMADA_37XX_WATCHDOG) += armada_37xx_wdt.o +obj-$(CONFIG_AIROHA_WATCHDOG) += airoha_wdt.o obj-$(CONFIG_ASM9260_WATCHDOG) += asm9260_wdt.o obj-$(CONFIG_AT91RM9200_WATCHDOG) += at91rm9200_wdt.o obj-$(CONFIG_AT91SAM9X_WATCHDOG) += at91sam9_wdt.o diff --git a/drivers/watchdog/airoha_wdt.c b/drivers/watchdog/airoha_wdt.c new file mode 100644 index 0000000000000..dc8ca11c14d81 --- /dev/null +++ b/drivers/watchdog/airoha_wdt.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Airoha Watchdog Driver + * + * Copyright (c) 2024, AIROHA All rights reserved. + * + * Mayur Kumar + * Christian Marangi + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Base address of timer and watchdog registers */ +#define TIMER_CTRL 0x0 +#define WDT_ENABLE BIT(25) +#define WDT_TIMER_INTERRUPT BIT(21) +/* Timer3 is used as Watchdog Timer */ +#define WDT_TIMER_ENABLE BIT(5) +#define WDT_TIMER_LOAD_VALUE 0x2c +#define WDT_TIMER_CUR_VALUE 0x30 +#define WDT_TIMER_VAL GENMASK(31, 0) +#define WDT_RELOAD 0x38 +#define WDT_RLD BIT(0) + +/* Airoha watchdog structure description */ +struct airoha_wdt_desc { + struct watchdog_device wdog_dev; + unsigned int wdt_freq; + void __iomem *base; +}; + +#define WDT_HEARTBEAT 24 +static int heartbeat = WDT_HEARTBEAT; +module_param(heartbeat, int, 0); +MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. (default=" + __MODULE_STRING(WDT_HEARTBEAT) ")"); + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +static int airoha_wdt_start(struct watchdog_device *wdog_dev) +{ + struct airoha_wdt_desc *airoha_wdt = watchdog_get_drvdata(wdog_dev); + u32 val; + + val = readl(airoha_wdt->base + TIMER_CTRL); + val |= (WDT_TIMER_ENABLE | WDT_ENABLE | WDT_TIMER_INTERRUPT); + writel(val, airoha_wdt->base + TIMER_CTRL); + val = wdog_dev->timeout * airoha_wdt->wdt_freq; + writel(val, airoha_wdt->base + WDT_TIMER_LOAD_VALUE); + + return 0; +} + +static int airoha_wdt_stop(struct watchdog_device *wdog_dev) +{ + struct airoha_wdt_desc *airoha_wdt = watchdog_get_drvdata(wdog_dev); + u32 val; + + val = readl(airoha_wdt->base + TIMER_CTRL); + val &= (~WDT_ENABLE & ~WDT_TIMER_ENABLE); + writel(val, airoha_wdt->base + TIMER_CTRL); + + return 0; +} + +static int airoha_wdt_ping(struct watchdog_device *wdog_dev) +{ + struct airoha_wdt_desc *airoha_wdt = watchdog_get_drvdata(wdog_dev); + u32 val; + + val = readl(airoha_wdt->base + WDT_RELOAD); + val |= WDT_RLD; + writel(val, airoha_wdt->base + WDT_RELOAD); + + return 0; +} + +static int airoha_wdt_set_timeout(struct watchdog_device *wdog_dev, unsigned int timeout) +{ + wdog_dev->timeout = timeout; + + if (watchdog_active(wdog_dev)) { + airoha_wdt_stop(wdog_dev); + return airoha_wdt_start(wdog_dev); + } + + return 0; +} + +static unsigned int airoha_wdt_get_timeleft(struct watchdog_device *wdog_dev) +{ + struct airoha_wdt_desc *airoha_wdt = watchdog_get_drvdata(wdog_dev); + u32 val; + + val = readl(airoha_wdt->base + WDT_TIMER_CUR_VALUE); + return DIV_ROUND_UP(val, airoha_wdt->wdt_freq); +} + +static const struct watchdog_info airoha_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING, + .identity = "Airoha Watchdog", +}; + +static const struct watchdog_ops airoha_wdt_ops = { + .owner = THIS_MODULE, + .start = airoha_wdt_start, + .stop = airoha_wdt_stop, + .ping = airoha_wdt_ping, + .set_timeout = airoha_wdt_set_timeout, + .get_timeleft = airoha_wdt_get_timeleft, +}; + +static int airoha_wdt_probe(struct platform_device *pdev) +{ + struct airoha_wdt_desc *airoha_wdt; + struct watchdog_device *wdog_dev; + struct device *dev = &pdev->dev; + struct clk *bus_clk; + int ret; + + airoha_wdt = devm_kzalloc(dev, sizeof(*airoha_wdt), GFP_KERNEL); + if (!airoha_wdt) + return -ENOMEM; + + airoha_wdt->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(airoha_wdt->base)) + return PTR_ERR(airoha_wdt->base); + + bus_clk = devm_clk_get_enabled(dev, "bus"); + if (IS_ERR(bus_clk)) + return dev_err_probe(dev, PTR_ERR(bus_clk), + "failed to enable bus clock\n"); + + /* Watchdog ticks at half the bus rate */ + airoha_wdt->wdt_freq = clk_get_rate(bus_clk) / 2; + + /* Initialize struct watchdog device */ + wdog_dev = &airoha_wdt->wdog_dev; + wdog_dev->timeout = heartbeat; + wdog_dev->info = &airoha_wdt_info; + wdog_dev->ops = &airoha_wdt_ops; + /* Bus 300MHz, watchdog 150MHz, 28 seconds */ + wdog_dev->max_timeout = FIELD_MAX(WDT_TIMER_VAL) / airoha_wdt->wdt_freq; + wdog_dev->parent = dev; + + watchdog_set_drvdata(wdog_dev, airoha_wdt); + watchdog_set_nowayout(wdog_dev, nowayout); + watchdog_stop_on_unregister(wdog_dev); + + ret = devm_watchdog_register_device(dev, wdog_dev); + if (ret) + return ret; + + platform_set_drvdata(pdev, airoha_wdt); + return 0; +} + +static int airoha_wdt_suspend(struct device *dev) +{ + struct airoha_wdt_desc *airoha_wdt = dev_get_drvdata(dev); + + if (watchdog_active(&airoha_wdt->wdog_dev)) + airoha_wdt_stop(&airoha_wdt->wdog_dev); + + return 0; +} + +static int airoha_wdt_resume(struct device *dev) +{ + struct airoha_wdt_desc *airoha_wdt = dev_get_drvdata(dev); + + if (watchdog_active(&airoha_wdt->wdog_dev)) { + airoha_wdt_start(&airoha_wdt->wdog_dev); + airoha_wdt_ping(&airoha_wdt->wdog_dev); + } + return 0; +} + +static const struct of_device_id airoha_wdt_of_match[] = { + { .compatible = "airoha,en7581-wdt", }, + { }, +}; + +MODULE_DEVICE_TABLE(of, airoha_wdt_of_match); + +static DEFINE_SIMPLE_DEV_PM_OPS(airoha_wdt_pm_ops, airoha_wdt_suspend, airoha_wdt_resume); + +static struct platform_driver airoha_wdt_driver = { + .probe = airoha_wdt_probe, + .driver = { + .name = "airoha-wdt", + .pm = pm_sleep_ptr(&airoha_wdt_pm_ops), + .of_match_table = airoha_wdt_of_match, + }, +}; + +module_platform_driver(airoha_wdt_driver); + +MODULE_AUTHOR("Mayur Kumar "); +MODULE_AUTHOR("Christian Marangi "); +MODULE_DESCRIPTION("Airoha EN7581 Watchdog Driver"); +MODULE_LICENSE("GPL"); From 3a6a399cfbb72a96d61597c519a4076d4ab8669f Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 22 Oct 2024 11:47:31 +0200 Subject: [PATCH 022/366] watchdog: Delete the cpu5wdt driver This driver has a number of issues (accesses arbitrary I/O ports without identifying the hardware, doesn't document what hardware it supports, suspiciously inconsistent locking model, doesn't implement WDIOC_SETTIMEOUT, potential integer overflow...) The driver was added in 2003 and there's no evidence that it has any recent user, all changes seem to be tree-wide, subsystem-wide, or the result of static code analysis. So I believe we should simply drop this legacy piece of code. Signed-off-by: Jean Delvare Message-ID: <20241011170710.484a257a@endymion.delvare> Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241022114731.31f69c94@endymion.delvare Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../watchdog/watchdog-parameters.rst | 10 - drivers/watchdog/Kconfig | 8 - drivers/watchdog/Makefile | 1 - drivers/watchdog/cpu5wdt.c | 284 ------------------ 4 files changed, 303 deletions(-) delete mode 100644 drivers/watchdog/cpu5wdt.c diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst index 29153eed66890..0a0119edfa82d 100644 --- a/Documentation/watchdog/watchdog-parameters.rst +++ b/Documentation/watchdog/watchdog-parameters.rst @@ -120,16 +120,6 @@ coh901327_wdt: ------------------------------------------------- -cpu5wdt: - port: - base address of watchdog card, default is 0x91 - verbose: - be verbose, default is 0 (no) - ticks: - count down ticks, default is 10000 - -------------------------------------------------- - cpwd: wd0_timeout: Default watchdog0 timeout in 1/10secs diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 6e0944cfb409f..b0010f5943f5e 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1541,14 +1541,6 @@ config SBC7240_WDT To compile this driver as a module, choose M here: the module will be called sbc7240_wdt. -config CPU5_WDT - tristate "SMA CPU5 Watchdog" - depends on (X86 || COMPILE_TEST) && HAS_IOPORT - help - TBD. - To compile this driver as a module, choose M here: the - module will be called cpu5wdt. - config SMSC_SCH311X_WDT tristate "SMSC SCH311X Watchdog Timer" depends on (X86 || COMPILE_TEST) && HAS_IOPORT diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 99eccefd6c335..9ee08e260d6fb 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -138,7 +138,6 @@ obj-$(CONFIG_RDC321X_WDT) += rdc321x_wdt.o obj-$(CONFIG_60XX_WDT) += sbc60xxwdt.o obj-$(CONFIG_SBC8360_WDT) += sbc8360.o obj-$(CONFIG_SBC7240_WDT) += sbc7240_wdt.o -obj-$(CONFIG_CPU5_WDT) += cpu5wdt.o obj-$(CONFIG_SMSC_SCH311X_WDT) += sch311x_wdt.o obj-$(CONFIG_SMSC37B787_WDT) += smsc37b787_wdt.o obj-$(CONFIG_TQMX86_WDT) += tqmx86_wdt.o diff --git a/drivers/watchdog/cpu5wdt.c b/drivers/watchdog/cpu5wdt.c deleted file mode 100644 index f94b840486121..0000000000000 --- a/drivers/watchdog/cpu5wdt.c +++ /dev/null @@ -1,284 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * sma cpu5 watchdog driver - * - * Copyright (C) 2003 Heiko Ronsdorf - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* adjustable parameters */ - -static int verbose; -static int port = 0x91; -static int ticks = 10000; -static DEFINE_SPINLOCK(cpu5wdt_lock); - -#define PFX "cpu5wdt: " - -#define CPU5WDT_EXTENT 0x0A - -#define CPU5WDT_STATUS_REG 0x00 -#define CPU5WDT_TIME_A_REG 0x02 -#define CPU5WDT_TIME_B_REG 0x03 -#define CPU5WDT_MODE_REG 0x04 -#define CPU5WDT_TRIGGER_REG 0x07 -#define CPU5WDT_ENABLE_REG 0x08 -#define CPU5WDT_RESET_REG 0x09 - -#define CPU5WDT_INTERVAL (HZ/10+1) - -/* some device data */ - -static struct { - struct completion stop; - int running; - struct timer_list timer; - int queue; - int default_ticks; - unsigned long inuse; -} cpu5wdt_device; - -/* generic helper functions */ - -static void cpu5wdt_trigger(struct timer_list *unused) -{ - if (verbose > 2) - pr_debug("trigger at %i ticks\n", ticks); - - if (cpu5wdt_device.running) - ticks--; - - spin_lock(&cpu5wdt_lock); - /* keep watchdog alive */ - outb(1, port + CPU5WDT_TRIGGER_REG); - - /* requeue?? */ - if (cpu5wdt_device.queue && ticks) - mod_timer(&cpu5wdt_device.timer, jiffies + CPU5WDT_INTERVAL); - else { - /* ticks doesn't matter anyway */ - complete(&cpu5wdt_device.stop); - } - spin_unlock(&cpu5wdt_lock); - -} - -static void cpu5wdt_reset(void) -{ - ticks = cpu5wdt_device.default_ticks; - - if (verbose) - pr_debug("reset (%i ticks)\n", (int) ticks); - -} - -static void cpu5wdt_start(void) -{ - unsigned long flags; - - spin_lock_irqsave(&cpu5wdt_lock, flags); - if (!cpu5wdt_device.queue) { - cpu5wdt_device.queue = 1; - outb(0, port + CPU5WDT_TIME_A_REG); - outb(0, port + CPU5WDT_TIME_B_REG); - outb(1, port + CPU5WDT_MODE_REG); - outb(0, port + CPU5WDT_RESET_REG); - outb(0, port + CPU5WDT_ENABLE_REG); - mod_timer(&cpu5wdt_device.timer, jiffies + CPU5WDT_INTERVAL); - } - /* if process dies, counter is not decremented */ - cpu5wdt_device.running++; - spin_unlock_irqrestore(&cpu5wdt_lock, flags); -} - -static int cpu5wdt_stop(void) -{ - unsigned long flags; - - spin_lock_irqsave(&cpu5wdt_lock, flags); - if (cpu5wdt_device.running) - cpu5wdt_device.running = 0; - ticks = cpu5wdt_device.default_ticks; - spin_unlock_irqrestore(&cpu5wdt_lock, flags); - if (verbose) - pr_crit("stop not possible\n"); - return -EIO; -} - -/* filesystem operations */ - -static int cpu5wdt_open(struct inode *inode, struct file *file) -{ - if (test_and_set_bit(0, &cpu5wdt_device.inuse)) - return -EBUSY; - return stream_open(inode, file); -} - -static int cpu5wdt_release(struct inode *inode, struct file *file) -{ - clear_bit(0, &cpu5wdt_device.inuse); - return 0; -} - -static long cpu5wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - unsigned int value; - static const struct watchdog_info ident = { - .options = WDIOF_CARDRESET, - .identity = "CPU5 WDT", - }; - - switch (cmd) { - case WDIOC_GETSUPPORT: - if (copy_to_user(argp, &ident, sizeof(ident))) - return -EFAULT; - break; - case WDIOC_GETSTATUS: - value = inb(port + CPU5WDT_STATUS_REG); - value = (value >> 2) & 1; - return put_user(value, p); - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - case WDIOC_SETOPTIONS: - if (get_user(value, p)) - return -EFAULT; - if (value & WDIOS_ENABLECARD) - cpu5wdt_start(); - if (value & WDIOS_DISABLECARD) - cpu5wdt_stop(); - break; - case WDIOC_KEEPALIVE: - cpu5wdt_reset(); - break; - default: - return -ENOTTY; - } - return 0; -} - -static ssize_t cpu5wdt_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - if (!count) - return -EIO; - cpu5wdt_reset(); - return count; -} - -static const struct file_operations cpu5wdt_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = cpu5wdt_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .open = cpu5wdt_open, - .write = cpu5wdt_write, - .release = cpu5wdt_release, -}; - -static struct miscdevice cpu5wdt_misc = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &cpu5wdt_fops, -}; - -/* init/exit function */ - -static int cpu5wdt_init(void) -{ - unsigned int val; - int err; - - if (verbose) - pr_debug("port=0x%x, verbose=%i\n", port, verbose); - - init_completion(&cpu5wdt_device.stop); - cpu5wdt_device.queue = 0; - timer_setup(&cpu5wdt_device.timer, cpu5wdt_trigger, 0); - cpu5wdt_device.default_ticks = ticks; - - if (!request_region(port, CPU5WDT_EXTENT, PFX)) { - pr_err("request_region failed\n"); - err = -EBUSY; - goto no_port; - } - - /* watchdog reboot? */ - val = inb(port + CPU5WDT_STATUS_REG); - val = (val >> 2) & 1; - if (!val) - pr_info("sorry, was my fault\n"); - - err = misc_register(&cpu5wdt_misc); - if (err < 0) { - pr_err("misc_register failed\n"); - goto no_misc; - } - - - pr_info("init success\n"); - return 0; - -no_misc: - release_region(port, CPU5WDT_EXTENT); -no_port: - return err; -} - -static int cpu5wdt_init_module(void) -{ - return cpu5wdt_init(); -} - -static void cpu5wdt_exit(void) -{ - if (cpu5wdt_device.queue) { - cpu5wdt_device.queue = 0; - wait_for_completion(&cpu5wdt_device.stop); - timer_shutdown_sync(&cpu5wdt_device.timer); - } - - misc_deregister(&cpu5wdt_misc); - - release_region(port, CPU5WDT_EXTENT); - -} - -static void cpu5wdt_exit_module(void) -{ - cpu5wdt_exit(); -} - -/* module entry points */ - -module_init(cpu5wdt_init_module); -module_exit(cpu5wdt_exit_module); - -MODULE_AUTHOR("Heiko Ronsdorf "); -MODULE_DESCRIPTION("sma cpu5 watchdog driver"); -MODULE_LICENSE("GPL"); - -module_param_hw(port, int, ioport, 0); -MODULE_PARM_DESC(port, "base address of watchdog card, default is 0x91"); - -module_param(verbose, int, 0); -MODULE_PARM_DESC(verbose, "be verbose, default is 0 (no)"); - -module_param(ticks, int, 0); -MODULE_PARM_DESC(ticks, "count down ticks, default is 10000"); From 076354a4d4a73cb792a680a7f40f603c9b145a76 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 18 Oct 2024 10:58:20 -0300 Subject: [PATCH 023/366] watchdog: da9063: Do not use a global variable Using the 'use_sw_pm' variable as global is not recommended as it prevents multi instances of the driver to run. Make it a member of the da9063 structure instead. Signed-off-by: Fabio Estevam Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241018135821.274376-1-festevam@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/da9063_wdt.c | 9 +++++---- include/linux/mfd/da9063/core.h | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c index 684667469b10c..69f884cf1a7bc 100644 --- a/drivers/watchdog/da9063_wdt.c +++ b/drivers/watchdog/da9063_wdt.c @@ -27,7 +27,6 @@ * others: timeout = 2048 ms * 2^(TWDSCALE-1). */ static const unsigned int wdt_timeout[] = { 0, 2, 4, 8, 16, 32, 65, 131 }; -static bool use_sw_pm; #define DA9063_TWDSCALE_DISABLE 0 #define DA9063_TWDSCALE_MIN 1 @@ -230,7 +229,7 @@ static int da9063_wdt_probe(struct platform_device *pdev) if (!wdd) return -ENOMEM; - use_sw_pm = device_property_present(dev, "dlg,use-sw-pm"); + da9063->use_sw_pm = device_property_present(dev, "dlg,use-sw-pm"); wdd->info = &da9063_watchdog_info; wdd->ops = &da9063_watchdog_ops; @@ -267,8 +266,9 @@ static int da9063_wdt_probe(struct platform_device *pdev) static int __maybe_unused da9063_wdt_suspend(struct device *dev) { struct watchdog_device *wdd = dev_get_drvdata(dev); + struct da9063 *da9063 = watchdog_get_drvdata(wdd); - if (!use_sw_pm) + if (!da9063->use_sw_pm) return 0; if (watchdog_active(wdd)) @@ -280,8 +280,9 @@ static int __maybe_unused da9063_wdt_suspend(struct device *dev) static int __maybe_unused da9063_wdt_resume(struct device *dev) { struct watchdog_device *wdd = dev_get_drvdata(dev); + struct da9063 *da9063 = watchdog_get_drvdata(wdd); - if (!use_sw_pm) + if (!da9063->use_sw_pm) return 0; if (watchdog_active(wdd)) diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h index 8db52324f4169..eae82f421414e 100644 --- a/include/linux/mfd/da9063/core.h +++ b/include/linux/mfd/da9063/core.h @@ -78,6 +78,7 @@ struct da9063 { enum da9063_type type; unsigned char variant_code; unsigned int flags; + bool use_sw_pm; /* Control interface */ struct regmap *regmap; From 90fc2c8e720b149af6b937f702aca273d00c670e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 18 Oct 2024 10:58:21 -0300 Subject: [PATCH 024/366] watchdog: da9063: Remove __maybe_unused notations Use the DEFINE_SIMPLE_DEV_PM_OPS() and pm_sleep_ptr() macros to handle the .suspend/.resume callbacks. These macros allow the suspend and resume functions to be automatically dropped by the compiler when CONFIG_SUSPEND is disabled, without having to use __maybe_unused notation. Signed-off-by: Fabio Estevam Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241018135821.274376-2-festevam@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/da9063_wdt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c index 69f884cf1a7bc..92e1b78ff4810 100644 --- a/drivers/watchdog/da9063_wdt.c +++ b/drivers/watchdog/da9063_wdt.c @@ -263,7 +263,7 @@ static int da9063_wdt_probe(struct platform_device *pdev) return devm_watchdog_register_device(dev, wdd); } -static int __maybe_unused da9063_wdt_suspend(struct device *dev) +static int da9063_wdt_suspend(struct device *dev) { struct watchdog_device *wdd = dev_get_drvdata(dev); struct da9063 *da9063 = watchdog_get_drvdata(wdd); @@ -277,7 +277,7 @@ static int __maybe_unused da9063_wdt_suspend(struct device *dev) return 0; } -static int __maybe_unused da9063_wdt_resume(struct device *dev) +static int da9063_wdt_resume(struct device *dev) { struct watchdog_device *wdd = dev_get_drvdata(dev); struct da9063 *da9063 = watchdog_get_drvdata(wdd); @@ -291,14 +291,14 @@ static int __maybe_unused da9063_wdt_resume(struct device *dev) return 0; } -static SIMPLE_DEV_PM_OPS(da9063_wdt_pm_ops, - da9063_wdt_suspend, da9063_wdt_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(da9063_wdt_pm_ops, da9063_wdt_suspend, + da9063_wdt_resume); static struct platform_driver da9063_wdt_driver = { .probe = da9063_wdt_probe, .driver = { .name = DA9063_DRVNAME_WATCHDOG, - .pm = &da9063_wdt_pm_ops, + .pm = pm_sleep_ptr(&da9063_wdt_pm_ops), }, }; module_platform_driver(da9063_wdt_driver); From 43439076383a7611300334d1357c0f8883f40816 Mon Sep 17 00:00:00 2001 From: James Hilliard Date: Fri, 25 Oct 2024 00:34:40 -0600 Subject: [PATCH 025/366] watchdog: it87_wdt: add PWRGD enable quirk for Qotom QCML04 For the watchdog timer to work properly on the QCML04 board we need to set PWRGD enable in the Environment Controller Configuration Registers Special Configuration Register 1 when it is not already set, this may be the case when the watchdog is not enabled from within the BIOS. Signed-off-by: James Hilliard Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241025063441.3494837-1-james.hilliard1@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/it87_wdt.c | 39 +++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c index 676cd134e6778..a1e23dce88103 100644 --- a/drivers/watchdog/it87_wdt.c +++ b/drivers/watchdog/it87_wdt.c @@ -20,6 +20,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include #include #include #include @@ -40,6 +42,7 @@ #define VAL 0x2f /* Logical device Numbers LDN */ +#define EC 0x04 #define GPIO 0x07 /* Configuration Registers and Functions */ @@ -73,6 +76,12 @@ #define IT8784_ID 0x8784 #define IT8786_ID 0x8786 +/* Environment Controller Configuration Registers LDN=0x04 */ +#define SCR1 0xfa + +/* Environment Controller Bits SCR1 */ +#define WDT_PWRGD 0x20 + /* GPIO Configuration Registers LDN=0x07 */ #define WDTCTRL 0x71 #define WDTCFG 0x72 @@ -240,6 +249,21 @@ static int wdt_set_timeout(struct watchdog_device *wdd, unsigned int t) return ret; } +enum { + IT87_WDT_OUTPUT_THROUGH_PWRGD = BIT(0), +}; + +static const struct dmi_system_id it87_quirks[] = { + { + /* Qotom Q30900P (IT8786) */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_NAME, "QCML04"), + }, + .driver_data = (void *)IT87_WDT_OUTPUT_THROUGH_PWRGD, + }, + {} +}; + static const struct watchdog_info ident = { .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING, .firmware_version = 1, @@ -261,8 +285,10 @@ static struct watchdog_device wdt_dev = { static int __init it87_wdt_init(void) { + const struct dmi_system_id *dmi_id; u8 chip_rev; u8 ctrl; + int quirks = 0; int rc; rc = superio_enter(); @@ -273,6 +299,10 @@ static int __init it87_wdt_init(void) chip_rev = superio_inb(CHIPREV) & 0x0f; superio_exit(); + dmi_id = dmi_first_match(it87_quirks); + if (dmi_id) + quirks = (long)dmi_id->driver_data; + switch (chip_type) { case IT8702_ID: max_units = 255; @@ -333,6 +363,15 @@ static int __init it87_wdt_init(void) superio_outb(0x00, WDTCTRL); } + if (quirks & IT87_WDT_OUTPUT_THROUGH_PWRGD) { + superio_select(EC); + ctrl = superio_inb(SCR1); + if (!(ctrl & WDT_PWRGD)) { + ctrl |= WDT_PWRGD; + superio_outb(ctrl, SCR1); + } + } + superio_exit(); if (timeout < 1 || timeout > max_units * 60) { From 562b0b03193b567cd55334b25e5c8d624cd6a06f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 10 Oct 2024 22:36:22 +0200 Subject: [PATCH 026/366] watchdog: Switch back to struct platform_driver::remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 0edb555a65d1 ("platform: Make platform_driver::remove() return void") .remove() is (again) the right callback to implement for platform drivers. Convert all platform drivers below drivers/watchdog/ to use .remove(), with the eventual goal to drop struct platform_driver::remove_new(). As .remove() and .remove_new() have the same prototypes, conversion is done by just changing the structure member name in the driver initializer. While touching these files, make indention of the struct initializer consistent in several files. Signed-off-by: Uwe Kleine-König Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20241010203622.839625-4-u.kleine-koenig@baylibre.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/acquirewdt.c | 2 +- drivers/watchdog/advantechwdt.c | 2 +- drivers/watchdog/at91rm9200_wdt.c | 2 +- drivers/watchdog/at91sam9_wdt.c | 2 +- drivers/watchdog/ath79_wdt.c | 2 +- drivers/watchdog/bcm2835_wdt.c | 2 +- drivers/watchdog/bcm_kona_wdt.c | 2 +- drivers/watchdog/cpwd.c | 2 +- drivers/watchdog/dw_wdt.c | 2 +- drivers/watchdog/gef_wdt.c | 2 +- drivers/watchdog/geodewdt.c | 2 +- drivers/watchdog/ib700wdt.c | 2 +- drivers/watchdog/ie6xx_wdt.c | 2 +- drivers/watchdog/lpc18xx_wdt.c | 2 +- drivers/watchdog/mtx-1_wdt.c | 2 +- drivers/watchdog/nic7018_wdt.c | 2 +- drivers/watchdog/nv_tco.c | 2 +- drivers/watchdog/omap_wdt.c | 2 +- drivers/watchdog/orion_wdt.c | 2 +- drivers/watchdog/rc32434_wdt.c | 2 +- drivers/watchdog/rdc321x_wdt.c | 2 +- drivers/watchdog/renesas_wdt.c | 2 +- drivers/watchdog/riowd.c | 2 +- drivers/watchdog/rti_wdt.c | 2 +- drivers/watchdog/sa1100_wdt.c | 4 ++-- drivers/watchdog/sch311x_wdt.c | 2 +- drivers/watchdog/shwdt.c | 2 +- drivers/watchdog/st_lpc_wdt.c | 2 +- drivers/watchdog/starfive-wdt.c | 2 +- drivers/watchdog/stmp3xxx_rtc_wdt.c | 2 +- drivers/watchdog/txx9wdt.c | 2 +- 31 files changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/watchdog/acquirewdt.c b/drivers/watchdog/acquirewdt.c index 08ca18e91124d..052f65c48a703 100644 --- a/drivers/watchdog/acquirewdt.c +++ b/drivers/watchdog/acquirewdt.c @@ -285,7 +285,7 @@ static void acq_shutdown(struct platform_device *dev) } static struct platform_driver acquirewdt_driver = { - .remove_new = acq_remove, + .remove = acq_remove, .shutdown = acq_shutdown, .driver = { .name = DRV_NAME, diff --git a/drivers/watchdog/advantechwdt.c b/drivers/watchdog/advantechwdt.c index e41cd3ba4e0e9..42d3f37717810 100644 --- a/drivers/watchdog/advantechwdt.c +++ b/drivers/watchdog/advantechwdt.c @@ -293,7 +293,7 @@ static void advwdt_shutdown(struct platform_device *dev) } static struct platform_driver advwdt_driver = { - .remove_new = advwdt_remove, + .remove = advwdt_remove, .shutdown = advwdt_shutdown, .driver = { .name = DRV_NAME, diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c index 17382512a6096..1795aaf1ec45e 100644 --- a/drivers/watchdog/at91rm9200_wdt.c +++ b/drivers/watchdog/at91rm9200_wdt.c @@ -295,7 +295,7 @@ MODULE_DEVICE_TABLE(of, at91_wdt_dt_ids); static struct platform_driver at91wdt_driver = { .probe = at91wdt_probe, - .remove_new = at91wdt_remove, + .remove = at91wdt_remove, .shutdown = at91wdt_shutdown, .suspend = pm_ptr(at91wdt_suspend), .resume = pm_ptr(at91wdt_resume), diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index 2c6474cb858b7..7be70b98d0912 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -392,7 +392,7 @@ MODULE_DEVICE_TABLE(of, at91_wdt_dt_ids); static struct platform_driver at91wdt_driver = { .probe = at91wdt_probe, - .remove_new = at91wdt_remove, + .remove = at91wdt_remove, .driver = { .name = "at91_wdt", .of_match_table = of_match_ptr(at91_wdt_dt_ids), diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index d16b2c583fa40..7df703e9852a6 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -305,7 +305,7 @@ MODULE_DEVICE_TABLE(of, ath79_wdt_match); static struct platform_driver ath79_wdt_driver = { .probe = ath79_wdt_probe, - .remove_new = ath79_wdt_remove, + .remove = ath79_wdt_remove, .shutdown = ath79_wdt_shutdown, .driver = { .name = DRIVER_NAME, diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c index bb001c5d7f17f..9fcfee63905b9 100644 --- a/drivers/watchdog/bcm2835_wdt.c +++ b/drivers/watchdog/bcm2835_wdt.c @@ -227,7 +227,7 @@ static void bcm2835_wdt_remove(struct platform_device *pdev) static struct platform_driver bcm2835_wdt_driver = { .probe = bcm2835_wdt_probe, - .remove_new = bcm2835_wdt_remove, + .remove = bcm2835_wdt_remove, .driver = { .name = "bcm2835-wdt", }, diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c index 49e12d47b073d..66bd0324fd68d 100644 --- a/drivers/watchdog/bcm_kona_wdt.c +++ b/drivers/watchdog/bcm_kona_wdt.c @@ -328,7 +328,7 @@ static struct platform_driver bcm_kona_wdt_driver = { .of_match_table = bcm_kona_wdt_of_match, }, .probe = bcm_kona_wdt_probe, - .remove_new = bcm_kona_wdt_remove, + .remove = bcm_kona_wdt_remove, }; module_platform_driver(bcm_kona_wdt_driver); diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c index 8ee81f018dda0..4fb92c9e046af 100644 --- a/drivers/watchdog/cpwd.c +++ b/drivers/watchdog/cpwd.c @@ -653,7 +653,7 @@ static struct platform_driver cpwd_driver = { .of_match_table = cpwd_match, }, .probe = cpwd_probe, - .remove_new = cpwd_remove, + .remove = cpwd_remove, }; module_platform_driver(cpwd_driver); diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index 84dca3695f862..26efca9ae0e7d 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -684,7 +684,7 @@ MODULE_DEVICE_TABLE(of, dw_wdt_of_match); static struct platform_driver dw_wdt_driver = { .probe = dw_wdt_drv_probe, - .remove_new = dw_wdt_drv_remove, + .remove = dw_wdt_drv_remove, .driver = { .name = "dw_wdt", .of_match_table = of_match_ptr(dw_wdt_of_match), diff --git a/drivers/watchdog/gef_wdt.c b/drivers/watchdog/gef_wdt.c index d854fcfbfa5bc..bf6f733dfb5f7 100644 --- a/drivers/watchdog/gef_wdt.c +++ b/drivers/watchdog/gef_wdt.c @@ -305,7 +305,7 @@ static struct platform_driver gef_wdt_driver = { .of_match_table = gef_wdt_ids, }, .probe = gef_wdt_probe, - .remove_new = gef_wdt_remove, + .remove = gef_wdt_remove, }; static int __init gef_wdt_init(void) diff --git a/drivers/watchdog/geodewdt.c b/drivers/watchdog/geodewdt.c index 4ed6d139320b9..5b80ade1c6814 100644 --- a/drivers/watchdog/geodewdt.c +++ b/drivers/watchdog/geodewdt.c @@ -248,7 +248,7 @@ static void geodewdt_shutdown(struct platform_device *dev) } static struct platform_driver geodewdt_driver = { - .remove_new = geodewdt_remove, + .remove = geodewdt_remove, .shutdown = geodewdt_shutdown, .driver = { .name = DRV_NAME, diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c index b041ad90a62c5..5ce6101d236d7 100644 --- a/drivers/watchdog/ib700wdt.c +++ b/drivers/watchdog/ib700wdt.c @@ -331,7 +331,7 @@ static void ibwdt_shutdown(struct platform_device *dev) } static struct platform_driver ibwdt_driver = { - .remove_new = ibwdt_remove, + .remove = ibwdt_remove, .shutdown = ibwdt_shutdown, .driver = { .name = DRV_NAME, diff --git a/drivers/watchdog/ie6xx_wdt.c b/drivers/watchdog/ie6xx_wdt.c index e5cbb409df252..5a7bb7e84653f 100644 --- a/drivers/watchdog/ie6xx_wdt.c +++ b/drivers/watchdog/ie6xx_wdt.c @@ -280,7 +280,7 @@ static void ie6xx_wdt_remove(struct platform_device *pdev) static struct platform_driver ie6xx_wdt_driver = { .probe = ie6xx_wdt_probe, - .remove_new = ie6xx_wdt_remove, + .remove = ie6xx_wdt_remove, .driver = { .name = DRIVER_NAME, }, diff --git a/drivers/watchdog/lpc18xx_wdt.c b/drivers/watchdog/lpc18xx_wdt.c index 19535f4a2fd2e..f19580e1b3182 100644 --- a/drivers/watchdog/lpc18xx_wdt.c +++ b/drivers/watchdog/lpc18xx_wdt.c @@ -281,7 +281,7 @@ static struct platform_driver lpc18xx_wdt_driver = { .of_match_table = lpc18xx_wdt_match, }, .probe = lpc18xx_wdt_probe, - .remove_new = lpc18xx_wdt_remove, + .remove = lpc18xx_wdt_remove, }; module_platform_driver(lpc18xx_wdt_driver); diff --git a/drivers/watchdog/mtx-1_wdt.c b/drivers/watchdog/mtx-1_wdt.c index 11f05024a1812..f75426cfa4252 100644 --- a/drivers/watchdog/mtx-1_wdt.c +++ b/drivers/watchdog/mtx-1_wdt.c @@ -233,7 +233,7 @@ static void mtx1_wdt_remove(struct platform_device *pdev) static struct platform_driver mtx1_wdt_driver = { .probe = mtx1_wdt_probe, - .remove_new = mtx1_wdt_remove, + .remove = mtx1_wdt_remove, .driver.name = "mtx1-wdt", }; diff --git a/drivers/watchdog/nic7018_wdt.c b/drivers/watchdog/nic7018_wdt.c index c3f0a4926667e..44982b37ba6f5 100644 --- a/drivers/watchdog/nic7018_wdt.c +++ b/drivers/watchdog/nic7018_wdt.c @@ -236,7 +236,7 @@ MODULE_DEVICE_TABLE(acpi, nic7018_device_ids); static struct platform_driver watchdog_driver = { .probe = nic7018_probe, - .remove_new = nic7018_remove, + .remove = nic7018_remove, .driver = { .name = KBUILD_MODNAME, .acpi_match_table = ACPI_PTR(nic7018_device_ids), diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c index f8eb1f65a59e6..f16cee5173d50 100644 --- a/drivers/watchdog/nv_tco.c +++ b/drivers/watchdog/nv_tco.c @@ -466,7 +466,7 @@ static void nv_tco_shutdown(struct platform_device *dev) static struct platform_driver nv_tco_driver = { .probe = nv_tco_init, - .remove_new = nv_tco_remove, + .remove = nv_tco_remove, .shutdown = nv_tco_shutdown, .driver = { .name = TCO_MODULE_NAME, diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index b6e0236509bbd..d523428a8d220 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -357,7 +357,7 @@ MODULE_DEVICE_TABLE(of, omap_wdt_of_match); static struct platform_driver omap_wdt_driver = { .probe = omap_wdt_probe, - .remove_new = omap_wdt_remove, + .remove = omap_wdt_remove, .shutdown = omap_wdt_shutdown, .suspend = pm_ptr(omap_wdt_suspend), .resume = pm_ptr(omap_wdt_resume), diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c index 1fe583e8a95b2..0e145f762f6f2 100644 --- a/drivers/watchdog/orion_wdt.c +++ b/drivers/watchdog/orion_wdt.c @@ -665,7 +665,7 @@ static void orion_wdt_shutdown(struct platform_device *pdev) static struct platform_driver orion_wdt_driver = { .probe = orion_wdt_probe, - .remove_new = orion_wdt_remove, + .remove = orion_wdt_remove, .shutdown = orion_wdt_shutdown, .driver = { .name = "orion_wdt", diff --git a/drivers/watchdog/rc32434_wdt.c b/drivers/watchdog/rc32434_wdt.c index efadbb9d7ce78..0e5c5c96af58d 100644 --- a/drivers/watchdog/rc32434_wdt.c +++ b/drivers/watchdog/rc32434_wdt.c @@ -309,7 +309,7 @@ static void rc32434_wdt_shutdown(struct platform_device *pdev) static struct platform_driver rc32434_wdt_driver = { .probe = rc32434_wdt_probe, - .remove_new = rc32434_wdt_remove, + .remove = rc32434_wdt_remove, .shutdown = rc32434_wdt_shutdown, .driver = { .name = "rc32434_wdt", diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c index 80490316a27f8..8955177072fa4 100644 --- a/drivers/watchdog/rdc321x_wdt.c +++ b/drivers/watchdog/rdc321x_wdt.c @@ -268,7 +268,7 @@ static void rdc321x_wdt_remove(struct platform_device *pdev) static struct platform_driver rdc321x_wdt_driver = { .probe = rdc321x_wdt_probe, - .remove_new = rdc321x_wdt_remove, + .remove = rdc321x_wdt_remove, .driver = { .name = "rdc321x-wdt", }, diff --git a/drivers/watchdog/renesas_wdt.c b/drivers/watchdog/renesas_wdt.c index 12c41d6e5cd6f..c0b2a9c5250dd 100644 --- a/drivers/watchdog/renesas_wdt.c +++ b/drivers/watchdog/renesas_wdt.c @@ -337,7 +337,7 @@ static struct platform_driver rwdt_driver = { .pm = &rwdt_pm_ops, }, .probe = rwdt_probe, - .remove_new = rwdt_remove, + .remove = rwdt_remove, }; module_platform_driver(rwdt_driver); diff --git a/drivers/watchdog/riowd.c b/drivers/watchdog/riowd.c index f47d90d01c199..83806ccf06d1a 100644 --- a/drivers/watchdog/riowd.c +++ b/drivers/watchdog/riowd.c @@ -238,7 +238,7 @@ static struct platform_driver riowd_driver = { .of_match_table = riowd_match, }, .probe = riowd_probe, - .remove_new = riowd_remove, + .remove = riowd_remove, }; module_platform_driver(riowd_driver); diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index e319fa0787c27..f410b6e39fb6f 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -378,7 +378,7 @@ static struct platform_driver rti_wdt_driver = { .of_match_table = rti_wdt_of_match, }, .probe = rti_wdt_probe, - .remove_new = rti_wdt_remove, + .remove = rti_wdt_remove, }; module_platform_driver(rti_wdt_driver); diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c index 6e91ee3fbfb5f..729a8508b31d2 100644 --- a/drivers/watchdog/sa1100_wdt.c +++ b/drivers/watchdog/sa1100_wdt.c @@ -236,8 +236,8 @@ static void sa1100dog_remove(struct platform_device *pdev) static struct platform_driver sa1100dog_driver = { .driver.name = "sa1100_wdt", - .probe = sa1100dog_probe, - .remove_new = sa1100dog_remove, + .probe = sa1100dog_probe, + .remove = sa1100dog_remove, }; module_platform_driver(sa1100dog_driver); diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c index 76053158d259e..9670a1ea57cbd 100644 --- a/drivers/watchdog/sch311x_wdt.c +++ b/drivers/watchdog/sch311x_wdt.c @@ -445,7 +445,7 @@ static void sch311x_wdt_shutdown(struct platform_device *dev) static struct platform_driver sch311x_wdt_driver = { .probe = sch311x_wdt_probe, - .remove_new = sch311x_wdt_remove, + .remove = sch311x_wdt_remove, .shutdown = sch311x_wdt_shutdown, .driver = { .name = DRV_NAME, diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c index 10f1fba78ec2d..7f0150c394216 100644 --- a/drivers/watchdog/shwdt.c +++ b/drivers/watchdog/shwdt.c @@ -297,7 +297,7 @@ static struct platform_driver sh_wdt_driver = { }, .probe = sh_wdt_probe, - .remove_new = sh_wdt_remove, + .remove = sh_wdt_remove, .shutdown = sh_wdt_shutdown, }; diff --git a/drivers/watchdog/st_lpc_wdt.c b/drivers/watchdog/st_lpc_wdt.c index 4c5b8d98a4f30..d206452072ae2 100644 --- a/drivers/watchdog/st_lpc_wdt.c +++ b/drivers/watchdog/st_lpc_wdt.c @@ -286,7 +286,7 @@ static struct platform_driver st_wdog_driver = { .of_match_table = st_wdog_match, }, .probe = st_wdog_probe, - .remove_new = st_wdog_remove, + .remove = st_wdog_remove, }; module_platform_driver(st_wdog_driver); diff --git a/drivers/watchdog/starfive-wdt.c b/drivers/watchdog/starfive-wdt.c index a8b6cf767117f..355918d62f63d 100644 --- a/drivers/watchdog/starfive-wdt.c +++ b/drivers/watchdog/starfive-wdt.c @@ -597,7 +597,7 @@ MODULE_DEVICE_TABLE(of, starfive_wdt_match); static struct platform_driver starfive_wdt_driver = { .probe = starfive_wdt_probe, - .remove_new = starfive_wdt_remove, + .remove = starfive_wdt_remove, .shutdown = starfive_wdt_shutdown, .driver = { .name = "starfive-wdt", diff --git a/drivers/watchdog/stmp3xxx_rtc_wdt.c b/drivers/watchdog/stmp3xxx_rtc_wdt.c index 4b2caa9807ac8..060447101f486 100644 --- a/drivers/watchdog/stmp3xxx_rtc_wdt.c +++ b/drivers/watchdog/stmp3xxx_rtc_wdt.c @@ -143,7 +143,7 @@ static struct platform_driver stmp3xxx_wdt_driver = { .pm = &stmp3xxx_wdt_pm_ops, }, .probe = stmp3xxx_wdt_probe, - .remove_new = stmp3xxx_wdt_remove, + .remove = stmp3xxx_wdt_remove, }; module_platform_driver(stmp3xxx_wdt_driver); diff --git a/drivers/watchdog/txx9wdt.c b/drivers/watchdog/txx9wdt.c index 8d5f67acbff2a..305349844b4f1 100644 --- a/drivers/watchdog/txx9wdt.c +++ b/drivers/watchdog/txx9wdt.c @@ -159,7 +159,7 @@ static void txx9wdt_shutdown(struct platform_device *dev) static struct platform_driver txx9wdt_driver = { .probe = txx9wdt_probe, - .remove_new = txx9wdt_remove, + .remove = txx9wdt_remove, .shutdown = txx9wdt_shutdown, .driver = { .name = "txx9wdt", From bad201b2ac4e238c6d4b6966a220240e3861640c Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 15 Oct 2024 19:47:32 +0300 Subject: [PATCH 027/366] watchdog: rzg2l_wdt: Power on the watchdog domain in the restart handler On RZ/G3S the watchdog can be part of a software-controlled PM domain. In this case, the watchdog device need to be powered on in struct watchdog_ops::restart API. This can be done though pm_runtime_resume_and_get() API if the watchdog PM domain and watchdog device are marked as IRQ safe. We mark the watchdog PM domain as IRQ safe with GENPD_FLAG_IRQ_SAFE when the watchdog PM domain is registered and the watchdog device though pm_runtime_irq_safe(). Before commit e4cf89596c1f ("watchdog: rzg2l_wdt: Fix 'BUG: Invalid wait context'") pm_runtime_get_sync() was used in watchdog restart handler (which is similar to pm_runtime_resume_and_get() except the later one handles the runtime resume errors). Commit e4cf89596c1f ("watchdog: rzg2l_wdt: Fix 'BUG: Invalid wait context'") dropped the pm_runtime_get_sync() and replaced it with clk_prepare_enable() to avoid invalid wait context due to genpd_lock() in genpd_runtime_resume() being called from atomic context. But clk_prepare_enable() doesn't fit for this either (as reported by Ulf Hansson) as clk_prepare() can also sleep (it just not throw invalid wait context warning as it is not written for this). Because the watchdog device is marked now as IRQ safe (though this patch) the irq_safe_dev_in_sleep_domain() call from genpd_runtime_resume() returns 1 for devices not registering an IRQ safe PM domain for watchdog (as the watchdog device is IRQ safe, PM domain is not and watchdog PM domain is always-on), this being the case for RZ/G3S with old device trees and the rest of the SoCs that use this driver, we can now drop also the clk_prepare_enable() calls in restart handler and rely on pm_runtime_resume_and_get(). Thus, drop clk_prepare_enable() and use pm_runtime_resume_and_get() in watchdog restart handler. Signed-off-by: Claudiu Beznea Reviewed-by: Ulf Hansson Reviewed-by: Geert Uytterhoeven Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20241015164732.4085249-5-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/rzg2l_wdt.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/rzg2l_wdt.c b/drivers/watchdog/rzg2l_wdt.c index 2a35f890a2883..11bbe48160ec9 100644 --- a/drivers/watchdog/rzg2l_wdt.c +++ b/drivers/watchdog/rzg2l_wdt.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -166,8 +167,22 @@ static int rzg2l_wdt_restart(struct watchdog_device *wdev, struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); int ret; - clk_prepare_enable(priv->pclk); - clk_prepare_enable(priv->osc_clk); + /* + * In case of RZ/G3S the watchdog device may be part of an IRQ safe power + * domain that is currently powered off. In this case we need to power + * it on before accessing registers. Along with this the clocks will be + * enabled. We don't undo the pm_runtime_resume_and_get() as the device + * need to be on for the reboot to happen. + * + * For the rest of SoCs not registering a watchdog IRQ safe power + * domain it is safe to call pm_runtime_resume_and_get() as the + * irq_safe_dev_in_sleep_domain() call in genpd_runtime_resume() + * returns non zero value and the genpd_lock() is avoided, thus, there + * will be no invalid wait context reported by lockdep. + */ + ret = pm_runtime_resume_and_get(wdev->parent); + if (ret) + return ret; if (priv->devtype == WDT_RZG2L) { ret = reset_control_deassert(priv->rstc); @@ -275,6 +290,7 @@ static int rzg2l_wdt_probe(struct platform_device *pdev) priv->devtype = (uintptr_t)of_device_get_match_data(dev); + pm_runtime_irq_safe(&pdev->dev); pm_runtime_enable(&pdev->dev); priv->wdev.info = &rzg2l_wdt_ident; From ccfb765944bb66813398958983cb8141e2624a6b Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Tue, 29 Oct 2024 19:11:31 +0000 Subject: [PATCH 028/366] Revert "watchdog: s3c2410_wdt: use exynos_get_pmu_regmap_by_phandle() for PMU regs" This reverts commit 746f0770f916e6c48e422d6a34e67eae16707f0e. Now that we can register a SoC specific regmap with syscon using of_syscon_register_regmap() api we can switch back to using syscon_regmap_lookup_by_phandle() in the client drivers. Signed-off-by: Peter Griffin Reviewed-by: Sam Protsenko Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241029191131.2329414-1-peter.griffin@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + drivers/watchdog/s3c2410_wdt.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index b0010f5943f5e..1f1eae0e175ff 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -557,6 +557,7 @@ config S3C2410_WATCHDOG tristate "S3C6410/S5Pv210/Exynos Watchdog" depends on ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST select WATCHDOG_CORE + select MFD_SYSCON if ARCH_EXYNOS help Watchdog timer block in the Samsung S3C64xx, S5Pv210 and Exynos SoCs. This will reboot the system when the timer expires with diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 686cf544d0ae7..349d30462c8c0 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -24,9 +24,9 @@ #include #include #include +#include #include #include -#include #define S3C2410_WTCON 0x00 #define S3C2410_WTDAT 0x04 @@ -699,11 +699,11 @@ static int s3c2410wdt_probe(struct platform_device *pdev) return ret; if (wdt->drv_data->quirks & QUIRKS_HAVE_PMUREG) { - wdt->pmureg = exynos_get_pmu_regmap_by_phandle(dev->of_node, - "samsung,syscon-phandle"); + wdt->pmureg = syscon_regmap_lookup_by_phandle(dev->of_node, + "samsung,syscon-phandle"); if (IS_ERR(wdt->pmureg)) return dev_err_probe(dev, PTR_ERR(wdt->pmureg), - "PMU regmap lookup failed.\n"); + "syscon regmap lookup failed.\n"); } wdt_irq = platform_get_irq(pdev, 0); From a5ee1ca57c157fb7b289f9377c08848dae4eb821 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 30 Oct 2024 10:26:24 +0000 Subject: [PATCH 029/366] docs: ABI: Fix spelling mistake in pretimeout_avaialable_governors There is a spelling mistake, pretimeout_avaialable_governors should be pretimeout_available_governors. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241030102624.3085369-1-colin.i.king@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/ABI/testing/sysfs-class-watchdog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-class-watchdog b/Documentation/ABI/testing/sysfs-class-watchdog index 94fb746159512..70eabccf05578 100644 --- a/Documentation/ABI/testing/sysfs-class-watchdog +++ b/Documentation/ABI/testing/sysfs-class-watchdog @@ -76,7 +76,7 @@ Description: timeout when the pretimeout interrupt is delivered. Pretimeout is an optional feature. -What: /sys/class/watchdog/watchdogn/pretimeout_avaialable_governors +What: /sys/class/watchdog/watchdogn/pretimeout_available_governors Date: February 2017 Contact: Wim Van Sebroeck Description: From 24a2f4d106a57d2ea8dd6ce2dc690396d9077ab2 Mon Sep 17 00:00:00 2001 From: Xin Liu Date: Tue, 29 Oct 2024 11:12:20 +0800 Subject: [PATCH 030/366] dt-bindings: watchdog: Document Qualcomm QCS8300 Add devicetree binding for watchdog present on Qualcomm QCS8300 SoC. Signed-off-by: Xin Liu Acked-by: Krzysztof Kozlowski Acked-by: Wim Van Sebroeck Link: https://lore.kernel.org/r/20241029031222.1653123-2-quic_liuxin@quicinc.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml index 32eaf43aadb3a..34896a39fa91f 100644 --- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml @@ -27,6 +27,7 @@ properties: - qcom,apss-wdt-qcm2290 - qcom,apss-wdt-qcs404 - qcom,apss-wdt-qcs615 + - qcom,apss-wdt-qcs8300 - qcom,apss-wdt-sa8255p - qcom,apss-wdt-sa8775p - qcom,apss-wdt-sc7180 From f6fe9b628f0ba530b02315fd23429c495b0388d6 Mon Sep 17 00:00:00 2001 From: Animesh Agarwal Date: Mon, 7 Oct 2024 17:24:33 -0400 Subject: [PATCH 031/366] dt-bindings: watchdog: fsl-imx-wdt: Add missing 'big-endian' property Add missing big-endian property in watchdog/fsl-imx-wdt.yaml schema. Only allow big-endian property for ls1012a and ls1043a. Fix dtbs_check errors. arch/arm64/boot/dts/freescale/fsl-ls1012a-frwy.dtb: watchdog@2ad0000: Unevaluated properties are not allowed ('big-endian' was unexpected) Cc: Daniel Baluta Signed-off-by: Animesh Agarwal Signed-off-by: Frank Li Reviewed-by: Krzysztof Kozlowski Reviewed-by: Wim Van Sebroeck Link: https://lore.kernel.org/r/20241007212434.895521-1-Frank.Li@nxp.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/fsl-imx-wdt.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.yaml index 36b836d0620c9..0da953cb71272 100644 --- a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.yaml @@ -48,6 +48,8 @@ properties: clocks: maxItems: 1 + big-endian: true + fsl,ext-reset-output: $ref: /schemas/types.yaml#/definitions/flag description: | @@ -93,6 +95,18 @@ allOf: properties: fsl,suspend-in-wait: false + - if: + not: + properties: + compatible: + contains: + enum: + - fsl,ls1012a-wdt + - fsl,ls1043a-wdt + then: + properties: + big-endian: false + unevaluatedProperties: false examples: From a1495a21e0b8aad92132dfcf9c6fffc1bde9d5b2 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Wed, 6 Nov 2024 10:47:51 +0000 Subject: [PATCH 032/366] watchdog: mediatek: Make sure system reset gets asserted in mtk_wdt_restart() Clear the IRQ enable bit of WDT_MODE before asserting software reset in order to make TOPRGU issue a system reset signal instead of an IRQ. Fixes: a44a45536f7b ("watchdog: Add driver for Mediatek watchdog") Signed-off-by: Yassine Oudjana Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241106104738.195968-2-y.oudjana@protonmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/mtk_wdt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index c35f85ce8d69c..e2d7a57d6ea2e 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -225,9 +225,15 @@ static int mtk_wdt_restart(struct watchdog_device *wdt_dev, { struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev); void __iomem *wdt_base; + u32 reg; wdt_base = mtk_wdt->wdt_base; + /* Enable reset in order to issue a system reset instead of an IRQ */ + reg = readl(wdt_base + WDT_MODE); + reg &= ~WDT_MODE_IRQ_EN; + writel(reg | WDT_MODE_KEY, wdt_base + WDT_MODE); + while (1) { writel(WDT_SWRST_KEY, wdt_base + WDT_SWRST); mdelay(5); From 15ddf704f56f8c95ff74dfd1157ed8646b322fa1 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Wed, 6 Nov 2024 10:47:55 +0000 Subject: [PATCH 033/366] watchdog: mediatek: Add support for MT6735 TOPRGU/WDT Add support for the Top Reset Generation Unit/Watchdog Timer found on MT6735. Signed-off-by: Yassine Oudjana Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241106104738.195968-3-y.oudjana@protonmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/mtk_wdt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index e2d7a57d6ea2e..91d110646e16f 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -87,6 +88,10 @@ static const struct mtk_wdt_data mt2712_data = { .toprgu_sw_rst_num = MT2712_TOPRGU_SW_RST_NUM, }; +static const struct mtk_wdt_data mt6735_data = { + .toprgu_sw_rst_num = MT6735_TOPRGU_RST_NUM, +}; + static const struct mtk_wdt_data mt6795_data = { .toprgu_sw_rst_num = MT6795_TOPRGU_SW_RST_NUM, }; @@ -489,6 +494,7 @@ static int mtk_wdt_resume(struct device *dev) static const struct of_device_id mtk_wdt_dt_ids[] = { { .compatible = "mediatek,mt2712-wdt", .data = &mt2712_data }, { .compatible = "mediatek,mt6589-wdt" }, + { .compatible = "mediatek,mt6735-wdt", .data = &mt6735_data }, { .compatible = "mediatek,mt6795-wdt", .data = &mt6795_data }, { .compatible = "mediatek,mt7986-wdt", .data = &mt7986_data }, { .compatible = "mediatek,mt7988-wdt", .data = &mt7988_data }, From c63e0ee729c8426a04675602965d4fcb795038b9 Mon Sep 17 00:00:00 2001 From: Byoungtae Cho Date: Mon, 21 Oct 2024 15:39:01 +0900 Subject: [PATCH 034/366] dt-bindings: watchdog: Document ExynosAutoV920 watchdog bindings Add "samsung-exynosautov920-wdt" compatible to the dt-schema document. ExynosAutoV920 is new SoC for automotive, similar to exynosautov9 but some CPU configurations are quite different. Signed-off-by: Byoungtae Cho Signed-off-by: Taewan Kim Acked-by: Rob Herring (Arm) Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241021063903.793166-2-trunixs.kim@samsung.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml index 77a5ddd0426eb..d175ae9683366 100644 --- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml @@ -26,6 +26,7 @@ properties: - samsung,exynos7-wdt # for Exynos7 - samsung,exynos850-wdt # for Exynos850 - samsung,exynosautov9-wdt # for Exynosautov9 + - samsung,exynosautov920-wdt # for Exynosautov920 - items: - enum: - tesla,fsd-wdt @@ -77,6 +78,7 @@ allOf: - samsung,exynos7-wdt - samsung,exynos850-wdt - samsung,exynosautov9-wdt + - samsung,exynosautov920-wdt then: required: - samsung,syscon-phandle @@ -88,6 +90,7 @@ allOf: - google,gs101-wdt - samsung,exynos850-wdt - samsung,exynosautov9-wdt + - samsung,exynosautov920-wdt then: properties: clocks: From a5cb13980e00e9c4fbc382d68eda250ab6a14d7c Mon Sep 17 00:00:00 2001 From: Byoungtae Cho Date: Mon, 21 Oct 2024 15:39:02 +0900 Subject: [PATCH 035/366] watchdog: s3c2410_wdt: add support for exynosautov920 SoC Adds the compatibles and drvdata for the ExynosAuto V920 SoC. This SoC is almost similar to ExynosAutoV9, but some CPU configurations are quite different, so it should be added. Plus it also support DBGACK like as GS101 SoC. Signed-off-by: Byoungtae Cho Signed-off-by: Taewan Kim Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241021063903.793166-3-trunixs.kim@samsung.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 37 +++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 349d30462c8c0..30450e99e5e9d 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -63,6 +63,10 @@ #define EXYNOS850_CLUSTER1_NONCPU_INT_EN 0x1644 #define EXYNOSAUTOV9_CLUSTER1_NONCPU_OUT 0x1520 #define EXYNOSAUTOV9_CLUSTER1_NONCPU_INT_EN 0x1544 +#define EXYNOSAUTOV920_CLUSTER0_NONCPU_OUT 0x1420 +#define EXYNOSAUTOV920_CLUSTER0_NONCPU_INT_EN 0x1444 +#define EXYNOSAUTOV920_CLUSTER1_NONCPU_OUT 0x1720 +#define EXYNOSAUTOV920_CLUSTER1_NONCPU_INT_EN 0x1744 #define EXYNOS850_CLUSTER0_WDTRESET_BIT 24 #define EXYNOS850_CLUSTER1_WDTRESET_BIT 23 @@ -303,6 +307,32 @@ static const struct s3c2410_wdt_variant drv_data_gs101_cl1 = { QUIRK_HAS_DBGACK_BIT, }; +static const struct s3c2410_wdt_variant drv_data_exynosautov920_cl0 = { + .mask_reset_reg = EXYNOSAUTOV920_CLUSTER0_NONCPU_INT_EN, + .mask_bit = 2, + .mask_reset_inv = true, + .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, + .rst_stat_bit = EXYNOSAUTOV9_CLUSTER0_WDTRESET_BIT, + .cnt_en_reg = EXYNOSAUTOV920_CLUSTER0_NONCPU_OUT, + .cnt_en_bit = 7, + .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | + QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN | + QUIRK_HAS_DBGACK_BIT, +}; + +static const struct s3c2410_wdt_variant drv_data_exynosautov920_cl1 = { + .mask_reset_reg = EXYNOSAUTOV920_CLUSTER1_NONCPU_INT_EN, + .mask_bit = 2, + .mask_reset_inv = true, + .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, + .rst_stat_bit = EXYNOSAUTOV9_CLUSTER1_WDTRESET_BIT, + .cnt_en_reg = EXYNOSAUTOV920_CLUSTER1_NONCPU_OUT, + .cnt_en_bit = 7, + .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | + QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN | + QUIRK_HAS_DBGACK_BIT, +}; + static const struct of_device_id s3c2410_wdt_match[] = { { .compatible = "google,gs101-wdt", .data = &drv_data_gs101_cl0 }, @@ -320,6 +350,8 @@ static const struct of_device_id s3c2410_wdt_match[] = { .data = &drv_data_exynos850_cl0 }, { .compatible = "samsung,exynosautov9-wdt", .data = &drv_data_exynosautov9_cl0 }, + { .compatible = "samsung,exynosautov920-wdt", + .data = &drv_data_exynosautov920_cl0 }, {}, }; MODULE_DEVICE_TABLE(of, s3c2410_wdt_match); @@ -643,7 +675,8 @@ s3c2410_get_wdt_drv_data(struct platform_device *pdev, struct s3c2410_wdt *wdt) /* Choose Exynos850/ExynosAutov9 driver data w.r.t. cluster index */ if (variant == &drv_data_exynos850_cl0 || variant == &drv_data_exynosautov9_cl0 || - variant == &drv_data_gs101_cl0) { + variant == &drv_data_gs101_cl0 || + variant == &drv_data_exynosautov920_cl0) { u32 index; int err; @@ -662,6 +695,8 @@ s3c2410_get_wdt_drv_data(struct platform_device *pdev, struct s3c2410_wdt *wdt) variant = &drv_data_exynosautov9_cl1; else if (variant == &drv_data_gs101_cl0) variant = &drv_data_gs101_cl1; + else if (variant == &drv_data_exynosautov920_cl0) + variant = &drv_data_exynosautov920_cl1; break; default: return dev_err_probe(dev, -EINVAL, "wrong cluster index: %u\n", index); From 949291c5314009b4f6e252391edbb40fdd5d5414 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 15 Nov 2024 10:21:49 +0000 Subject: [PATCH 036/366] dma-fence: Fix reference leak on fence merge failure path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Release all fence references if the output dma-fence-array could not be allocated. Signed-off-by: Tvrtko Ursulin Fixes: 245a4a7b531c ("dma-buf: generalize dma_fence unwrap & merging v3") Cc: Christian König Cc: Daniel Vetter Cc: Sumit Semwal Cc: Gustavo Padovan Cc: Friedrich Vock Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: # v6.0+ Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20241115102153.1980-2-tursulin@igalia.com --- drivers/dma-buf/dma-fence-unwrap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 628af51c81af3..b19d0adf6086e 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -164,6 +164,8 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, dma_fence_context_alloc(1), 1, false); if (!result) { + for (i = 0; i < count; i++) + dma_fence_put(array[i]); tmp = NULL; goto return_tmp; } From fe52c649438b8489c9456681d93a9b3de3d38263 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 15 Nov 2024 10:21:50 +0000 Subject: [PATCH 037/366] dma-fence: Use kernel's sort for merging fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One alternative to the fix Christian proposed in https://lore.kernel.org/dri-devel/20241024124159.4519-3-christian.koenig@amd.com/ is to replace the rather complex open coded sorting loops with the kernel standard sort followed by a context squashing pass. Proposed advantage of this would be readability but one concern Christian raised was that there could be many fences, that they are typically mostly sorted, and so the kernel's heap sort would be much worse by the proposed algorithm. I had a look running some games and vkcube to see what are the typical number of input fences. Tested scenarios: 1) Hogwarts Legacy under Gamescope 450 calls per second to __dma_fence_unwrap_merge. Percentages per number of fences buckets, before and after checking for signalled status, sorting and flattening: N Before After 0 0.91% 1 69.40% 2-3 28.72% 9.4% (90.6% resolved to one fence) 4-5 0.93% 6-9 0.03% 10+ 2) Cyberpunk 2077 under Gamescope 1050 calls per second, amounting to 0.01% CPU time according to perf top. N Before After 0 1.13% 1 52.30% 2-3 40.34% 55.57% 4-5 1.46% 0.50% 6-9 2.44% 10+ 2.34% 3) vkcube under Plasma 90 calls per second. N Before After 0 1 2-3 100% 0% (Ie. all resolved to a single fence) 4-5 6-9 10+ In the case of vkcube all invocations in the 2-3 bucket were actually just two input fences. From these numbers it looks like the heap sort should not be a disadvantage, given how the dominant case is <= 2 input fences which heap sort solves with just one compare and swap. (And for the case of one input fence we have a fast path in the previous patch.) A complementary possibility is to implement a different sorting algorithm under the same API as the kernel's sort() and so keep the simplicity, potentially moving the new sort under lib/ if it would be found more widely useful. v2: * Hold on to fence references and reduce commentary. (Christian) * Record and use latest signaled timestamp in the 2nd loop too. * Consolidate zero or one fences fast paths. v3: * Reverse the seqno sort order for a simpler squashing pass. (Christian) Signed-off-by: Tvrtko Ursulin Fixes: 245a4a7b531c ("dma-buf: generalize dma_fence unwrap & merging v3") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3617 Cc: Christian König Cc: Daniel Vetter Cc: Sumit Semwal Cc: Gustavo Padovan Cc: Friedrich Vock Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: # v6.0+ Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20241115102153.1980-3-tursulin@igalia.com --- drivers/dma-buf/dma-fence-unwrap.c | 128 ++++++++++++++--------------- 1 file changed, 61 insertions(+), 67 deletions(-) diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index b19d0adf6086e..6345062731f15 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -12,6 +12,7 @@ #include #include #include +#include /* Internal helper to start new array iteration, don't use directly */ static struct dma_fence * @@ -59,6 +60,25 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) } EXPORT_SYMBOL_GPL(dma_fence_unwrap_next); + +static int fence_cmp(const void *_a, const void *_b) +{ + struct dma_fence *a = *(struct dma_fence **)_a; + struct dma_fence *b = *(struct dma_fence **)_b; + + if (a->context < b->context) + return -1; + else if (a->context > b->context) + return 1; + + if (dma_fence_is_later(b, a)) + return 1; + else if (dma_fence_is_later(a, b)) + return -1; + + return 0; +} + /* Implementation for the dma_fence_merge() marco, don't use directly */ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, @@ -67,8 +87,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence_array *result; struct dma_fence *tmp, **array; ktime_t timestamp; - unsigned int i; - size_t count; + int i, j, count; count = 0; timestamp = ns_to_ktime(0); @@ -96,80 +115,55 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, if (!array) return NULL; - /* - * This trashes the input fence array and uses it as position for the - * following merge loop. This works because the dma_fence_merge() - * wrapper macro is creating this temporary array on the stack together - * with the iterators. - */ - for (i = 0; i < num_fences; ++i) - fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]); - count = 0; - do { - unsigned int sel; - -restart: - tmp = NULL; - for (i = 0; i < num_fences; ++i) { - struct dma_fence *next; - - while (fences[i] && dma_fence_is_signaled(fences[i])) - fences[i] = dma_fence_unwrap_next(&iter[i]); - - next = fences[i]; - if (!next) - continue; - - /* - * We can't guarantee that inpute fences are ordered by - * context, but it is still quite likely when this - * function is used multiple times. So attempt to order - * the fences by context as we pass over them and merge - * fences with the same context. - */ - if (!tmp || tmp->context > next->context) { - tmp = next; - sel = i; - - } else if (tmp->context < next->context) { - continue; - - } else if (dma_fence_is_later(tmp, next)) { - fences[i] = dma_fence_unwrap_next(&iter[i]); - goto restart; + for (i = 0; i < num_fences; ++i) { + dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) { + if (!dma_fence_is_signaled(tmp)) { + array[count++] = dma_fence_get(tmp); } else { - fences[sel] = dma_fence_unwrap_next(&iter[sel]); - goto restart; - } - } + ktime_t t = dma_fence_timestamp(tmp); - if (tmp) { - array[count++] = dma_fence_get(tmp); - fences[sel] = dma_fence_unwrap_next(&iter[sel]); + if (ktime_after(t, timestamp)) + timestamp = t; + } } - } while (tmp); - - if (count == 0) { - tmp = dma_fence_allocate_private_stub(ktime_get()); - goto return_tmp; } - if (count == 1) { - tmp = array[0]; - goto return_tmp; - } + if (count == 0 || count == 1) + goto return_fastpath; + + sort(array, count, sizeof(*array), fence_cmp, NULL); - result = dma_fence_array_create(count, array, - dma_fence_context_alloc(1), - 1, false); - if (!result) { - for (i = 0; i < count; i++) + /* + * Only keep the most recent fence for each context. + */ + j = 0; + for (i = 1; i < count; i++) { + if (array[i]->context == array[j]->context) dma_fence_put(array[i]); - tmp = NULL; - goto return_tmp; + else + array[++j] = array[i]; + } + count = ++j; + + if (count > 1) { + result = dma_fence_array_create(count, array, + dma_fence_context_alloc(1), + 1, false); + if (!result) { + for (i = 0; i < count; i++) + dma_fence_put(array[i]); + tmp = NULL; + goto return_tmp; + } + return &result->base; } - return &result->base; + +return_fastpath: + if (count == 0) + tmp = dma_fence_allocate_private_stub(timestamp); + else + tmp = array[0]; return_tmp: kfree(array); From 4962ee045d8f06638714d801ab0fb72f89c16690 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 7 Nov 2024 21:38:28 +0100 Subject: [PATCH 038/366] watchdog: rti: of: honor timeout-sec property Currently "timeout-sec" Device Tree property is being silently ignored: even though watchdog_init_timeout() is being used, the driver always passes "heartbeat" == DEFAULT_HEARTBEAT == 60 as argument. Fix this by setting struct watchdog_device::timeout to DEFAULT_HEARTBEAT and passing real module parameter value to watchdog_init_timeout() (which may now be 0 if not specified). Cc: stable@vger.kernel.org Fixes: 2d63908bdbfb ("watchdog: Add K3 RTI watchdog support") Signed-off-by: Alexander Sverdlin Reviewed-by: Vignesh Raghavendra Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20241107203830.1068456-1-alexander.sverdlin@siemens.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/rti_wdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index f410b6e39fb6f..58c9445c0f885 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -61,7 +61,7 @@ #define MAX_HW_ERROR 250 -static int heartbeat = DEFAULT_HEARTBEAT; +static int heartbeat; /* * struct to hold data for each WDT device @@ -252,6 +252,7 @@ static int rti_wdt_probe(struct platform_device *pdev) wdd->min_timeout = 1; wdd->max_hw_heartbeat_ms = (WDT_PRELOAD_MAX << WDT_PRELOAD_SHIFT) / wdt->freq * 1000; + wdd->timeout = DEFAULT_HEARTBEAT; wdd->parent = dev; watchdog_set_drvdata(wdd, wdt); From c064de86d2a3909222d5996c5047f64c7a8f791b Mon Sep 17 00:00:00 2001 From: Magnus Lindholm Date: Wed, 13 Nov 2024 23:51:49 +0100 Subject: [PATCH 039/366] scsi: qla1280: Fix hw revision numbering for ISP1020/1040 Fix the hardware revision numbering for Qlogic ISP1020/1040 boards. HWMASK suggests that the revision number only needs four bits, this is consistent with how NetBSD does things in their ISP driver. Verified on a IPS1040B which is seen as rev 5 not as BIT_4. Signed-off-by: Magnus Lindholm Link: https://lore.kernel.org/r/20241113225636.2276-1-linmag7@gmail.com Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/qla1280.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla1280.h b/drivers/scsi/qla1280.h index d309e2ca14deb..dea2290b37d4d 100644 --- a/drivers/scsi/qla1280.h +++ b/drivers/scsi/qla1280.h @@ -116,12 +116,12 @@ struct device_reg { uint16_t id_h; /* ID high */ uint16_t cfg_0; /* Configuration 0 */ #define ISP_CFG0_HWMSK 0x000f /* Hardware revision mask */ -#define ISP_CFG0_1020 BIT_0 /* ISP1020 */ -#define ISP_CFG0_1020A BIT_1 /* ISP1020A */ -#define ISP_CFG0_1040 BIT_2 /* ISP1040 */ -#define ISP_CFG0_1040A BIT_3 /* ISP1040A */ -#define ISP_CFG0_1040B BIT_4 /* ISP1040B */ -#define ISP_CFG0_1040C BIT_5 /* ISP1040C */ +#define ISP_CFG0_1020 1 /* ISP1020 */ +#define ISP_CFG0_1020A 2 /* ISP1020A */ +#define ISP_CFG0_1040 3 /* ISP1040 */ +#define ISP_CFG0_1040A 4 /* ISP1040A */ +#define ISP_CFG0_1040B 5 /* ISP1040B */ +#define ISP_CFG0_1040C 6 /* ISP1040C */ uint16_t cfg_1; /* Configuration 1 */ #define ISP_CFG1_F128 BIT_6 /* 128-byte FIFO threshold */ #define ISP_CFG1_F64 BIT_4|BIT_5 /* 128-byte FIFO threshold */ From e953835c1d41bbb660d7069dc9e61ea6bd7874f3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 15 Nov 2024 22:12:42 +0100 Subject: [PATCH 040/366] scsi: message: fusion: Constify struct pci_device_id 'struct pci_device_id' is not modified in these drivers. Constifying this structure moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 36999 2451 88 39538 9a72 drivers/message/fusion/mptfc.o After: ===== text data bss dec hex filename 37415 2043 88 39546 9a7a drivers/message/fusion/mptfc.o Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/fe8f17a999b6def2649b2ef52ea5c9ee61e28bd0.1731705152.git.christophe.jaillet@wanadoo.fr Signed-off-by: Martin K. Petersen --- drivers/message/fusion/mptfc.c | 2 +- drivers/message/fusion/mptsas.c | 2 +- drivers/message/fusion/mptspi.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c index 91242f26defb0..ee61b70aa677e 100644 --- a/drivers/message/fusion/mptfc.c +++ b/drivers/message/fusion/mptfc.c @@ -137,7 +137,7 @@ static const struct scsi_host_template mptfc_driver_template = { * Supported hardware */ -static struct pci_device_id mptfc_pci_table[] = { +static const struct pci_device_id mptfc_pci_table[] = { { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVICEID_FC909, PCI_ANY_ID, PCI_ANY_ID }, { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVICEID_FC919, diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index a798e26c6402d..d0549a4daf76a 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -5377,7 +5377,7 @@ static void mptsas_remove(struct pci_dev *pdev) mptscsih_remove(pdev); } -static struct pci_device_id mptsas_pci_table[] = { +static const struct pci_device_id mptsas_pci_table[] = { { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1064, PCI_ANY_ID, PCI_ANY_ID }, { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1068, diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c index 574b882c9a854..4184d0c70ac31 100644 --- a/drivers/message/fusion/mptspi.c +++ b/drivers/message/fusion/mptspi.c @@ -1238,7 +1238,7 @@ static struct spi_function_template mptspi_transport_functions = { * Supported hardware */ -static struct pci_device_id mptspi_pci_table[] = { +static const struct pci_device_id mptspi_pci_table[] = { { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_53C1030, PCI_ANY_ID, PCI_ANY_ID }, { PCI_VENDOR_ID_ATTO, MPI_MANUFACTPAGE_DEVID_53C1030, From 52172a352c97d39ee2658f6503b065b9896d48ad Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 17 Nov 2024 13:52:14 +0000 Subject: [PATCH 041/366] scsi: bfa: Remove unused structure builders bfa has a large set of structure builders, of which only about 60% are used; remove the rest. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20241117135215.38771-2-linux@treblig.org Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_fcbuild.c | 348 --------------------------------- drivers/scsi/bfa/bfa_fcbuild.h | 55 ------ 2 files changed, 403 deletions(-) diff --git a/drivers/scsi/bfa/bfa_fcbuild.c b/drivers/scsi/bfa/bfa_fcbuild.c index 52303e8c716dd..0f5d8a9cdf143 100644 --- a/drivers/scsi/bfa/bfa_fcbuild.c +++ b/drivers/scsi/bfa/bfa_fcbuild.c @@ -219,44 +219,6 @@ fc_plogi_x_build(struct fchs_s *fchs, void *pld, u32 d_id, u32 s_id, return sizeof(struct fc_logi_s); } -u16 -fc_flogi_build(struct fchs_s *fchs, struct fc_logi_s *flogi, u32 s_id, - u16 ox_id, wwn_t port_name, wwn_t node_name, u16 pdu_size, - u8 set_npiv, u8 set_auth, u16 local_bb_credits) -{ - u32 d_id = bfa_hton3b(FC_FABRIC_PORT); - __be32 *vvl_info; - - memcpy(flogi, &plogi_tmpl, sizeof(struct fc_logi_s)); - - flogi->els_cmd.els_code = FC_ELS_FLOGI; - fc_els_req_build(fchs, d_id, s_id, ox_id); - - flogi->csp.rxsz = flogi->class3.rxsz = cpu_to_be16(pdu_size); - flogi->port_name = port_name; - flogi->node_name = node_name; - - /* - * Set the NPIV Capability Bit ( word 1, bit 31) of Common - * Service Parameters. - */ - flogi->csp.ciro = set_npiv; - - /* set AUTH capability */ - flogi->csp.security = set_auth; - - flogi->csp.bbcred = cpu_to_be16(local_bb_credits); - - /* Set brcd token in VVL */ - vvl_info = (u32 *)&flogi->vvl[0]; - - /* set the flag to indicate the presence of VVL */ - flogi->csp.npiv_supp = 1; /* @todo. field name is not correct */ - vvl_info[0] = cpu_to_be32(FLOGI_VVL_BRCD); - - return sizeof(struct fc_logi_s); -} - u16 fc_flogi_acc_build(struct fchs_s *fchs, struct fc_logi_s *flogi, u32 s_id, __be16 ox_id, wwn_t port_name, wwn_t node_name, @@ -279,24 +241,6 @@ fc_flogi_acc_build(struct fchs_s *fchs, struct fc_logi_s *flogi, u32 s_id, return sizeof(struct fc_logi_s); } -u16 -fc_fdisc_build(struct fchs_s *fchs, struct fc_logi_s *flogi, u32 s_id, - u16 ox_id, wwn_t port_name, wwn_t node_name, u16 pdu_size) -{ - u32 d_id = bfa_hton3b(FC_FABRIC_PORT); - - memcpy(flogi, &plogi_tmpl, sizeof(struct fc_logi_s)); - - flogi->els_cmd.els_code = FC_ELS_FDISC; - fc_els_req_build(fchs, d_id, s_id, ox_id); - - flogi->csp.rxsz = flogi->class3.rxsz = cpu_to_be16(pdu_size); - flogi->port_name = port_name; - flogi->node_name = node_name; - - return sizeof(struct fc_logi_s); -} - u16 fc_plogi_build(struct fchs_s *fchs, void *pld, u32 d_id, u32 s_id, u16 ox_id, wwn_t port_name, wwn_t node_name, @@ -545,18 +489,6 @@ fc_pdisc_parse(struct fchs_s *fchs, wwn_t node_name, wwn_t port_name) return FC_PARSE_OK; } -u16 -fc_abts_build(struct fchs_s *fchs, u32 d_id, u32 s_id, u16 ox_id) -{ - memcpy(fchs, &fc_bls_req_tmpl, sizeof(struct fchs_s)); - fchs->cat_info = FC_CAT_ABTS; - fchs->d_id = (d_id); - fchs->s_id = (s_id); - fchs->ox_id = cpu_to_be16(ox_id); - - return sizeof(struct fchs_s); -} - enum fc_parse_status fc_abts_rsp_parse(struct fchs_s *fchs, int len) { @@ -567,23 +499,6 @@ fc_abts_rsp_parse(struct fchs_s *fchs, int len) return FC_PARSE_FAILURE; } -u16 -fc_rrq_build(struct fchs_s *fchs, struct fc_rrq_s *rrq, u32 d_id, u32 s_id, - u16 ox_id, u16 rrq_oxid) -{ - fc_els_req_build(fchs, d_id, s_id, ox_id); - - /* - * build rrq payload - */ - memcpy(rrq, &rrq_tmpl, sizeof(struct fc_rrq_s)); - rrq->s_id = (s_id); - rrq->ox_id = cpu_to_be16(rrq_oxid); - rrq->rx_id = FC_RXID_ANY; - - return sizeof(struct fc_rrq_s); -} - u16 fc_logo_acc_build(struct fchs_s *fchs, void *pld, u32 d_id, u32 s_id, __be16 ox_id) @@ -658,30 +573,6 @@ fc_logout_params_pages(struct fchs_s *fc_frame, u8 els_code) return num_pages; } -u16 -fc_tprlo_acc_build(struct fchs_s *fchs, struct fc_tprlo_acc_s *tprlo_acc, - u32 d_id, u32 s_id, __be16 ox_id, int num_pages) -{ - int page; - - fc_els_rsp_build(fchs, d_id, s_id, ox_id); - - memset(tprlo_acc, 0, (num_pages * 16) + 4); - tprlo_acc->command = FC_ELS_ACC; - - tprlo_acc->page_len = 0x10; - tprlo_acc->payload_len = cpu_to_be16((num_pages * 16) + 4); - - for (page = 0; page < num_pages; page++) { - tprlo_acc->tprlo_acc_params[page].opa_valid = 0; - tprlo_acc->tprlo_acc_params[page].rpa_valid = 0; - tprlo_acc->tprlo_acc_params[page].fc4type_csp = FC_TYPE_FCP; - tprlo_acc->tprlo_acc_params[page].orig_process_assc = 0; - tprlo_acc->tprlo_acc_params[page].resp_process_assc = 0; - } - return be16_to_cpu(tprlo_acc->payload_len); -} - u16 fc_prlo_acc_build(struct fchs_s *fchs, struct fc_prlo_acc_s *prlo_acc, u32 d_id, u32 s_id, __be16 ox_id, int num_pages) @@ -706,20 +597,6 @@ fc_prlo_acc_build(struct fchs_s *fchs, struct fc_prlo_acc_s *prlo_acc, u32 d_id, return be16_to_cpu(prlo_acc->payload_len); } -u16 -fc_rnid_build(struct fchs_s *fchs, struct fc_rnid_cmd_s *rnid, u32 d_id, - u32 s_id, u16 ox_id, u32 data_format) -{ - fc_els_req_build(fchs, d_id, s_id, ox_id); - - memset(rnid, 0, sizeof(struct fc_rnid_cmd_s)); - - rnid->els_cmd.els_code = FC_ELS_RNID; - rnid->node_id_data_format = data_format; - - return sizeof(struct fc_rnid_cmd_s); -} - u16 fc_rnid_acc_build(struct fchs_s *fchs, struct fc_rnid_acc_s *rnid_acc, u32 d_id, u32 s_id, __be16 ox_id, u32 data_format, @@ -748,18 +625,6 @@ fc_rnid_acc_build(struct fchs_s *fchs, struct fc_rnid_acc_s *rnid_acc, u32 d_id, } -u16 -fc_rpsc_build(struct fchs_s *fchs, struct fc_rpsc_cmd_s *rpsc, u32 d_id, - u32 s_id, u16 ox_id) -{ - fc_els_req_build(fchs, d_id, s_id, ox_id); - - memset(rpsc, 0, sizeof(struct fc_rpsc_cmd_s)); - - rpsc->els_cmd.els_code = FC_ELS_RPSC; - return sizeof(struct fc_rpsc_cmd_s); -} - u16 fc_rpsc2_build(struct fchs_s *fchs, struct fc_rpsc2_cmd_s *rpsc2, u32 d_id, u32 s_id, u32 *pid_list, u16 npids) @@ -801,24 +666,6 @@ fc_rpsc_acc_build(struct fchs_s *fchs, struct fc_rpsc_acc_s *rpsc_acc, return sizeof(struct fc_rpsc_acc_s); } -u16 -fc_pdisc_build(struct fchs_s *fchs, u32 d_id, u32 s_id, u16 ox_id, - wwn_t port_name, wwn_t node_name, u16 pdu_size) -{ - struct fc_logi_s *pdisc = (struct fc_logi_s *) (fchs + 1); - - memcpy(pdisc, &plogi_tmpl, sizeof(struct fc_logi_s)); - - pdisc->els_cmd.els_code = FC_ELS_PDISC; - fc_els_req_build(fchs, d_id, s_id, ox_id); - - pdisc->csp.rxsz = pdisc->class3.rxsz = cpu_to_be16(pdu_size); - pdisc->port_name = port_name; - pdisc->node_name = node_name; - - return sizeof(struct fc_logi_s); -} - u16 fc_pdisc_rsp_parse(struct fchs_s *fchs, int len, wwn_t port_name) { @@ -842,74 +689,6 @@ fc_pdisc_rsp_parse(struct fchs_s *fchs, int len, wwn_t port_name) return FC_PARSE_OK; } -u16 -fc_prlo_build(struct fchs_s *fchs, u32 d_id, u32 s_id, u16 ox_id, - int num_pages) -{ - struct fc_prlo_s *prlo = (struct fc_prlo_s *) (fchs + 1); - int page; - - fc_els_req_build(fchs, d_id, s_id, ox_id); - memset(prlo, 0, (num_pages * 16) + 4); - prlo->command = FC_ELS_PRLO; - prlo->page_len = 0x10; - prlo->payload_len = cpu_to_be16((num_pages * 16) + 4); - - for (page = 0; page < num_pages; page++) { - prlo->prlo_params[page].type = FC_TYPE_FCP; - prlo->prlo_params[page].opa_valid = 0; - prlo->prlo_params[page].rpa_valid = 0; - prlo->prlo_params[page].orig_process_assc = 0; - prlo->prlo_params[page].resp_process_assc = 0; - } - - return be16_to_cpu(prlo->payload_len); -} - -u16 -fc_tprlo_build(struct fchs_s *fchs, u32 d_id, u32 s_id, u16 ox_id, - int num_pages, enum fc_tprlo_type tprlo_type, u32 tpr_id) -{ - struct fc_tprlo_s *tprlo = (struct fc_tprlo_s *) (fchs + 1); - int page; - - fc_els_req_build(fchs, d_id, s_id, ox_id); - memset(tprlo, 0, (num_pages * 16) + 4); - tprlo->command = FC_ELS_TPRLO; - tprlo->page_len = 0x10; - tprlo->payload_len = cpu_to_be16((num_pages * 16) + 4); - - for (page = 0; page < num_pages; page++) { - tprlo->tprlo_params[page].type = FC_TYPE_FCP; - tprlo->tprlo_params[page].opa_valid = 0; - tprlo->tprlo_params[page].rpa_valid = 0; - tprlo->tprlo_params[page].orig_process_assc = 0; - tprlo->tprlo_params[page].resp_process_assc = 0; - if (tprlo_type == FC_GLOBAL_LOGO) { - tprlo->tprlo_params[page].global_process_logout = 1; - } else if (tprlo_type == FC_TPR_LOGO) { - tprlo->tprlo_params[page].tpo_nport_valid = 1; - tprlo->tprlo_params[page].tpo_nport_id = (tpr_id); - } - } - - return be16_to_cpu(tprlo->payload_len); -} - -u16 -fc_ba_rjt_build(struct fchs_s *fchs, u32 d_id, u32 s_id, __be16 ox_id, - u32 reason_code, u32 reason_expl) -{ - struct fc_ba_rjt_s *ba_rjt = (struct fc_ba_rjt_s *) (fchs + 1); - - fc_bls_rsp_build(fchs, d_id, s_id, ox_id); - - fchs->cat_info = FC_CAT_BA_RJT; - ba_rjt->reason_code = reason_code; - ba_rjt->reason_expl = reason_expl; - return sizeof(struct fc_ba_rjt_s); -} - static void fc_gs_cthdr_build(struct ct_hdr_s *cthdr, u32 s_id, u16 cmd_code) { @@ -973,22 +752,6 @@ fc_gpnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, return sizeof(fcgs_gpnid_req_t) + sizeof(struct ct_hdr_s); } -u16 -fc_gnnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, - u32 port_id) -{ - struct ct_hdr_s *cthdr = (struct ct_hdr_s *) pyld; - fcgs_gnnid_req_t *gnnid = (fcgs_gnnid_req_t *) (cthdr + 1); - u32 d_id = bfa_hton3b(FC_NAME_SERVER); - - fc_gs_fchdr_build(fchs, d_id, s_id, ox_id); - fc_gs_cthdr_build(cthdr, s_id, GS_GNN_ID); - - memset(gnnid, 0, sizeof(fcgs_gnnid_req_t)); - gnnid->dap = port_id; - return sizeof(fcgs_gnnid_req_t) + sizeof(struct ct_hdr_s); -} - u16 fc_ct_rsp_parse(struct ct_hdr_s *cthdr) { @@ -1034,26 +797,6 @@ fc_scr_build(struct fchs_s *fchs, struct fc_scr_s *scr, return sizeof(struct fc_scr_s); } -u16 -fc_rscn_build(struct fchs_s *fchs, struct fc_rscn_pl_s *rscn, - u32 s_id, u16 ox_id) -{ - u32 d_id = bfa_hton3b(FC_FABRIC_CONTROLLER); - u16 payldlen; - - fc_els_req_build(fchs, d_id, s_id, ox_id); - rscn->command = FC_ELS_RSCN; - rscn->pagelen = sizeof(rscn->event[0]); - - payldlen = sizeof(u32) + rscn->pagelen; - rscn->payldlen = cpu_to_be16(payldlen); - - rscn->event[0].format = FC_RSCN_FORMAT_PORTID; - rscn->event[0].portid = s_id; - - return struct_size(rscn, event, 1); -} - u16 fc_rftid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, enum bfa_lport_role roles) @@ -1078,26 +821,6 @@ fc_rftid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, return sizeof(struct fcgs_rftid_req_s) + sizeof(struct ct_hdr_s); } -u16 -fc_rftid_build_sol(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, - u8 *fc4_bitmap, u32 bitmap_size) -{ - struct ct_hdr_s *cthdr = (struct ct_hdr_s *) pyld; - struct fcgs_rftid_req_s *rftid = (struct fcgs_rftid_req_s *)(cthdr + 1); - u32 d_id = bfa_hton3b(FC_NAME_SERVER); - - fc_gs_fchdr_build(fchs, d_id, s_id, ox_id); - fc_gs_cthdr_build(cthdr, s_id, GS_RFT_ID); - - memset(rftid, 0, sizeof(struct fcgs_rftid_req_s)); - - rftid->dap = s_id; - memcpy((void *)rftid->fc4_type, (void *)fc4_bitmap, - (bitmap_size < 32 ? bitmap_size : 32)); - - return sizeof(struct fcgs_rftid_req_s) + sizeof(struct ct_hdr_s); -} - u16 fc_rffid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, u8 fc4_type, u8 fc4_ftrs) @@ -1181,24 +904,6 @@ fc_gid_ft_build(struct fchs_s *fchs, void *pyld, u32 s_id, u8 fc4_type) return sizeof(struct fcgs_gidft_req_s) + sizeof(struct ct_hdr_s); } -u16 -fc_rpnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u32 port_id, - wwn_t port_name) -{ - struct ct_hdr_s *cthdr = (struct ct_hdr_s *) pyld; - struct fcgs_rpnid_req_s *rpnid = (struct fcgs_rpnid_req_s *)(cthdr + 1); - u32 d_id = bfa_hton3b(FC_NAME_SERVER); - - fc_gs_fchdr_build(fchs, d_id, s_id, 0); - fc_gs_cthdr_build(cthdr, s_id, GS_RPN_ID); - - memset(rpnid, 0, sizeof(struct fcgs_rpnid_req_s)); - rpnid->port_id = port_id; - rpnid->port_name = port_name; - - return sizeof(struct fcgs_rpnid_req_s) + sizeof(struct ct_hdr_s); -} - u16 fc_rnnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u32 port_id, wwn_t node_name) @@ -1217,59 +922,6 @@ fc_rnnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u32 port_id, return sizeof(struct fcgs_rnnid_req_s) + sizeof(struct ct_hdr_s); } -u16 -fc_rcsid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u32 port_id, - u32 cos) -{ - struct ct_hdr_s *cthdr = (struct ct_hdr_s *) pyld; - struct fcgs_rcsid_req_s *rcsid = - (struct fcgs_rcsid_req_s *) (cthdr + 1); - u32 d_id = bfa_hton3b(FC_NAME_SERVER); - - fc_gs_fchdr_build(fchs, d_id, s_id, 0); - fc_gs_cthdr_build(cthdr, s_id, GS_RCS_ID); - - memset(rcsid, 0, sizeof(struct fcgs_rcsid_req_s)); - rcsid->port_id = port_id; - rcsid->cos = cos; - - return sizeof(struct fcgs_rcsid_req_s) + sizeof(struct ct_hdr_s); -} - -u16 -fc_rptid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u32 port_id, - u8 port_type) -{ - struct ct_hdr_s *cthdr = (struct ct_hdr_s *) pyld; - struct fcgs_rptid_req_s *rptid = (struct fcgs_rptid_req_s *)(cthdr + 1); - u32 d_id = bfa_hton3b(FC_NAME_SERVER); - - fc_gs_fchdr_build(fchs, d_id, s_id, 0); - fc_gs_cthdr_build(cthdr, s_id, GS_RPT_ID); - - memset(rptid, 0, sizeof(struct fcgs_rptid_req_s)); - rptid->port_id = port_id; - rptid->port_type = port_type; - - return sizeof(struct fcgs_rptid_req_s) + sizeof(struct ct_hdr_s); -} - -u16 -fc_ganxt_build(struct fchs_s *fchs, void *pyld, u32 s_id, u32 port_id) -{ - struct ct_hdr_s *cthdr = (struct ct_hdr_s *) pyld; - struct fcgs_ganxt_req_s *ganxt = (struct fcgs_ganxt_req_s *)(cthdr + 1); - u32 d_id = bfa_hton3b(FC_NAME_SERVER); - - fc_gs_fchdr_build(fchs, d_id, s_id, 0); - fc_gs_cthdr_build(cthdr, s_id, GS_GA_NXT); - - memset(ganxt, 0, sizeof(struct fcgs_ganxt_req_s)); - ganxt->port_id = port_id; - - return sizeof(struct ct_hdr_s) + sizeof(struct fcgs_ganxt_req_s); -} - /* * Builds fc hdr and ct hdr for FDMI requests. */ diff --git a/drivers/scsi/bfa/bfa_fcbuild.h b/drivers/scsi/bfa/bfa_fcbuild.h index 49e0ee4a73347..26646106da4c5 100644 --- a/drivers/scsi/bfa/bfa_fcbuild.h +++ b/drivers/scsi/bfa/bfa_fcbuild.h @@ -127,15 +127,6 @@ struct fc_templates_s { void fcbuild_init(void); -u16 fc_flogi_build(struct fchs_s *fchs, struct fc_logi_s *flogi, - u32 s_id, u16 ox_id, wwn_t port_name, wwn_t node_name, - u16 pdu_size, u8 set_npiv, u8 set_auth, - u16 local_bb_credits); - -u16 fc_fdisc_build(struct fchs_s *buf, struct fc_logi_s *flogi, u32 s_id, - u16 ox_id, wwn_t port_name, wwn_t node_name, - u16 pdu_size); - u16 fc_flogi_acc_build(struct fchs_s *fchs, struct fc_logi_s *flogi, u32 s_id, __be16 ox_id, wwn_t port_name, wwn_t node_name, @@ -148,14 +139,8 @@ u16 fc_plogi_build(struct fchs_s *fchs, void *pld, u32 d_id, enum fc_parse_status fc_plogi_parse(struct fchs_s *fchs); -u16 fc_abts_build(struct fchs_s *buf, u32 d_id, u32 s_id, - u16 ox_id); - enum fc_parse_status fc_abts_rsp_parse(struct fchs_s *buf, int len); -u16 fc_rrq_build(struct fchs_s *buf, struct fc_rrq_s *rrq, u32 d_id, - u32 s_id, u16 ox_id, u16 rrq_oxid); - u16 fc_rspnid_build(struct fchs_s *fchs, void *pld, u32 s_id, u16 ox_id, u8 *name); u16 fc_rsnn_nn_build(struct fchs_s *fchs, void *pld, u32 s_id, @@ -164,10 +149,6 @@ u16 fc_rsnn_nn_build(struct fchs_s *fchs, void *pld, u32 s_id, u16 fc_rftid_build(struct fchs_s *fchs, void *pld, u32 s_id, u16 ox_id, enum bfa_lport_role role); -u16 fc_rftid_build_sol(struct fchs_s *fchs, void *pyld, u32 s_id, - u16 ox_id, u8 *fc4_bitmap, - u32 bitmap_size); - u16 fc_rffid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, u8 fc4_type, u8 fc4_ftrs); @@ -216,10 +197,6 @@ u16 fc_prli_acc_build(struct fchs_s *fchs, void *pld, u32 d_id, u32 s_id, __be16 ox_id, enum bfa_lport_role role); -u16 fc_rnid_build(struct fchs_s *fchs, struct fc_rnid_cmd_s *rnid, - u32 d_id, u32 s_id, u16 ox_id, - u32 data_format); - u16 fc_rnid_acc_build(struct fchs_s *fchs, struct fc_rnid_acc_s *rnid_acc, u32 d_id, u32 s_id, __be16 ox_id, u32 data_format, @@ -228,29 +205,15 @@ u16 fc_rnid_acc_build(struct fchs_s *fchs, u16 fc_rpsc2_build(struct fchs_s *fchs, struct fc_rpsc2_cmd_s *rps2c, u32 d_id, u32 s_id, u32 *pid_list, u16 npids); -u16 fc_rpsc_build(struct fchs_s *fchs, struct fc_rpsc_cmd_s *rpsc, - u32 d_id, u32 s_id, u16 ox_id); u16 fc_rpsc_acc_build(struct fchs_s *fchs, struct fc_rpsc_acc_s *rpsc_acc, u32 d_id, u32 s_id, __be16 ox_id, struct fc_rpsc_speed_info_s *oper_speed); u16 fc_gid_ft_build(struct fchs_s *fchs, void *pld, u32 s_id, u8 fc4_type); -u16 fc_rpnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, - u32 port_id, wwn_t port_name); - u16 fc_rnnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u32 port_id, wwn_t node_name); -u16 fc_rcsid_build(struct fchs_s *fchs, void *pyld, u32 s_id, - u32 port_id, u32 cos); - -u16 fc_rptid_build(struct fchs_s *fchs, void *pyld, u32 s_id, - u32 port_id, u8 port_type); - -u16 fc_ganxt_build(struct fchs_s *fchs, void *pyld, u32 s_id, - u32 port_id); - u16 fc_logo_build(struct fchs_s *fchs, struct fc_logo_s *logo, u32 d_id, u32 s_id, u16 ox_id, wwn_t port_name); @@ -280,33 +243,15 @@ u16 fc_ba_acc_build(struct fchs_s *fchs, struct fc_ba_acc_s *ba_acc, u32 d_id, int fc_logout_params_pages(struct fchs_s *fc_frame, u8 els_code); -u16 fc_tprlo_acc_build(struct fchs_s *fchs, struct fc_tprlo_acc_s *tprlo_acc, - u32 d_id, u32 s_id, __be16 ox_id, int num_pages); - u16 fc_prlo_acc_build(struct fchs_s *fchs, struct fc_prlo_acc_s *prlo_acc, u32 d_id, u32 s_id, __be16 ox_id, int num_pages); -u16 fc_pdisc_build(struct fchs_s *fchs, u32 d_id, u32 s_id, - u16 ox_id, wwn_t port_name, wwn_t node_name, - u16 pdu_size); - u16 fc_pdisc_rsp_parse(struct fchs_s *fchs, int len, wwn_t port_name); -u16 fc_prlo_build(struct fchs_s *fchs, u32 d_id, u32 s_id, - u16 ox_id, int num_pages); - u16 fc_tprlo_build(struct fchs_s *fchs, u32 d_id, u32 s_id, u16 ox_id, int num_pages, enum fc_tprlo_type tprlo_type, u32 tpr_id); -u16 fc_ba_rjt_build(struct fchs_s *fchs, u32 d_id, u32 s_id, - __be16 ox_id, u32 reason_code, u32 reason_expl); - -u16 fc_gnnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, - u32 port_id); - u16 fc_ct_rsp_parse(struct ct_hdr_s *cthdr); -u16 fc_rscn_build(struct fchs_s *fchs, struct fc_rscn_pl_s *rscn, u32 s_id, - u16 ox_id); #endif From 575143abcbbc7e291e5ef5c4fc031d94f8501918 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 17 Nov 2024 13:52:15 +0000 Subject: [PATCH 042/366] scsi: bfa: Remove unused parsers bfa has a set of structure parsers, of which quite a few are unused. Remove the unused set. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20241117135215.38771-3-linux@treblig.org Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_fcbuild.c | 134 --------------------------------- drivers/scsi/bfa/bfa_fcbuild.h | 17 ----- 2 files changed, 151 deletions(-) diff --git a/drivers/scsi/bfa/bfa_fcbuild.c b/drivers/scsi/bfa/bfa_fcbuild.c index 0f5d8a9cdf143..c44fd096ee688 100644 --- a/drivers/scsi/bfa/bfa_fcbuild.c +++ b/drivers/scsi/bfa/bfa_fcbuild.c @@ -259,40 +259,6 @@ fc_plogi_acc_build(struct fchs_s *fchs, void *pld, u32 d_id, u32 s_id, node_name, pdu_size, bb_cr, FC_ELS_ACC); } -enum fc_parse_status -fc_plogi_rsp_parse(struct fchs_s *fchs, int len, wwn_t port_name) -{ - struct fc_els_cmd_s *els_cmd = (struct fc_els_cmd_s *) (fchs + 1); - struct fc_logi_s *plogi; - struct fc_ls_rjt_s *ls_rjt; - - switch (els_cmd->els_code) { - case FC_ELS_LS_RJT: - ls_rjt = (struct fc_ls_rjt_s *) (fchs + 1); - if (ls_rjt->reason_code == FC_LS_RJT_RSN_LOGICAL_BUSY) - return FC_PARSE_BUSY; - else - return FC_PARSE_FAILURE; - case FC_ELS_ACC: - plogi = (struct fc_logi_s *) (fchs + 1); - if (len < sizeof(struct fc_logi_s)) - return FC_PARSE_FAILURE; - - if (!wwn_is_equal(plogi->port_name, port_name)) - return FC_PARSE_FAILURE; - - if (!plogi->class3.class_valid) - return FC_PARSE_FAILURE; - - if (be16_to_cpu(plogi->class3.rxsz) < (FC_MIN_PDUSZ)) - return FC_PARSE_FAILURE; - - return FC_PARSE_OK; - default: - return FC_PARSE_FAILURE; - } -} - enum fc_parse_status fc_plogi_parse(struct fchs_s *fchs) { @@ -365,21 +331,6 @@ fc_prli_rsp_parse(struct fc_prli_s *prli, int len) return FC_PARSE_OK; } -enum fc_parse_status -fc_prli_parse(struct fc_prli_s *prli) -{ - if (prli->parampage.type != FC_TYPE_FCP) - return FC_PARSE_FAILURE; - - if (!prli->parampage.imagepair) - return FC_PARSE_FAILURE; - - if (!prli->parampage.servparams.initiator) - return FC_PARSE_FAILURE; - - return FC_PARSE_OK; -} - u16 fc_logo_build(struct fchs_s *fchs, struct fc_logo_s *logo, u32 d_id, u32 s_id, u16 ox_id, wwn_t port_name) @@ -450,55 +401,6 @@ fc_adisc_rsp_parse(struct fc_adisc_s *adisc, int len, wwn_t port_name, return FC_PARSE_OK; } -enum fc_parse_status -fc_adisc_parse(struct fchs_s *fchs, void *pld, u32 host_dap, wwn_t node_name, - wwn_t port_name) -{ - struct fc_adisc_s *adisc = (struct fc_adisc_s *) pld; - - if (adisc->els_cmd.els_code != FC_ELS_ACC) - return FC_PARSE_FAILURE; - - if ((adisc->nport_id == (host_dap)) - && wwn_is_equal(adisc->orig_port_name, port_name) - && wwn_is_equal(adisc->orig_node_name, node_name)) - return FC_PARSE_OK; - - return FC_PARSE_FAILURE; -} - -enum fc_parse_status -fc_pdisc_parse(struct fchs_s *fchs, wwn_t node_name, wwn_t port_name) -{ - struct fc_logi_s *pdisc = (struct fc_logi_s *) (fchs + 1); - - if (pdisc->class3.class_valid != 1) - return FC_PARSE_FAILURE; - - if ((be16_to_cpu(pdisc->class3.rxsz) < - (FC_MIN_PDUSZ - sizeof(struct fchs_s))) - || (pdisc->class3.rxsz == 0)) - return FC_PARSE_FAILURE; - - if (!wwn_is_equal(pdisc->port_name, port_name)) - return FC_PARSE_FAILURE; - - if (!wwn_is_equal(pdisc->node_name, node_name)) - return FC_PARSE_FAILURE; - - return FC_PARSE_OK; -} - -enum fc_parse_status -fc_abts_rsp_parse(struct fchs_s *fchs, int len) -{ - if ((fchs->cat_info == FC_CAT_BA_ACC) - || (fchs->cat_info == FC_CAT_BA_RJT)) - return FC_PARSE_OK; - - return FC_PARSE_FAILURE; -} - u16 fc_logo_acc_build(struct fchs_s *fchs, void *pld, u32 d_id, u32 s_id, __be16 ox_id) @@ -666,29 +568,6 @@ fc_rpsc_acc_build(struct fchs_s *fchs, struct fc_rpsc_acc_s *rpsc_acc, return sizeof(struct fc_rpsc_acc_s); } -u16 -fc_pdisc_rsp_parse(struct fchs_s *fchs, int len, wwn_t port_name) -{ - struct fc_logi_s *pdisc = (struct fc_logi_s *) (fchs + 1); - - if (len < sizeof(struct fc_logi_s)) - return FC_PARSE_LEN_INVAL; - - if (pdisc->els_cmd.els_code != FC_ELS_ACC) - return FC_PARSE_ACC_INVAL; - - if (!wwn_is_equal(pdisc->port_name, port_name)) - return FC_PARSE_PWWN_NOT_EQUAL; - - if (!pdisc->class3.class_valid) - return FC_PARSE_NWWN_NOT_EQUAL; - - if (be16_to_cpu(pdisc->class3.rxsz) < (FC_MIN_PDUSZ)) - return FC_PARSE_RXSZ_INVAL; - - return FC_PARSE_OK; -} - static void fc_gs_cthdr_build(struct ct_hdr_s *cthdr, u32 s_id, u16 cmd_code) { @@ -752,19 +631,6 @@ fc_gpnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, return sizeof(fcgs_gpnid_req_t) + sizeof(struct ct_hdr_s); } -u16 -fc_ct_rsp_parse(struct ct_hdr_s *cthdr) -{ - if (be16_to_cpu(cthdr->cmd_rsp_code) != CT_RSP_ACCEPT) { - if (cthdr->reason_code == CT_RSN_LOGICAL_BUSY) - return FC_PARSE_BUSY; - else - return FC_PARSE_FAILURE; - } - - return FC_PARSE_OK; -} - u16 fc_gs_rjt_build(struct fchs_s *fchs, struct ct_hdr_s *cthdr, u32 d_id, u32 s_id, u16 ox_id, u8 reason_code, diff --git a/drivers/scsi/bfa/bfa_fcbuild.h b/drivers/scsi/bfa/bfa_fcbuild.h index 26646106da4c5..51da37b2ae6b5 100644 --- a/drivers/scsi/bfa/bfa_fcbuild.h +++ b/drivers/scsi/bfa/bfa_fcbuild.h @@ -139,8 +139,6 @@ u16 fc_plogi_build(struct fchs_s *fchs, void *pld, u32 d_id, enum fc_parse_status fc_plogi_parse(struct fchs_s *fchs); -enum fc_parse_status fc_abts_rsp_parse(struct fchs_s *buf, int len); - u16 fc_rspnid_build(struct fchs_s *fchs, void *pld, u32 s_id, u16 ox_id, u8 *name); u16 fc_rsnn_nn_build(struct fchs_s *fchs, void *pld, u32 s_id, @@ -174,9 +172,6 @@ u16 fc_adisc_build(struct fchs_s *fchs, struct fc_adisc_s *adisc, u32 d_id, u32 s_id, __be16 ox_id, wwn_t port_name, wwn_t node_name); -enum fc_parse_status fc_adisc_parse(struct fchs_s *fchs, void *pld, - u32 host_dap, wwn_t node_name, wwn_t port_name); - enum fc_parse_status fc_adisc_rsp_parse(struct fc_adisc_s *adisc, int len, wwn_t port_name, wwn_t node_name); @@ -230,14 +225,6 @@ void fc_get_fc4type_bitmask(u8 fc4_type, u8 *bit_mask); void fc_els_req_build(struct fchs_s *fchs, u32 d_id, u32 s_id, __be16 ox_id); -enum fc_parse_status fc_plogi_rsp_parse(struct fchs_s *fchs, int len, - wwn_t port_name); - -enum fc_parse_status fc_prli_parse(struct fc_prli_s *prli); - -enum fc_parse_status fc_pdisc_parse(struct fchs_s *fchs, wwn_t node_name, - wwn_t port_name); - u16 fc_ba_acc_build(struct fchs_s *fchs, struct fc_ba_acc_s *ba_acc, u32 d_id, u32 s_id, __be16 ox_id, u16 rx_id); @@ -246,12 +233,8 @@ int fc_logout_params_pages(struct fchs_s *fc_frame, u8 els_code); u16 fc_prlo_acc_build(struct fchs_s *fchs, struct fc_prlo_acc_s *prlo_acc, u32 d_id, u32 s_id, __be16 ox_id, int num_pages); -u16 fc_pdisc_rsp_parse(struct fchs_s *fchs, int len, wwn_t port_name); - u16 fc_tprlo_build(struct fchs_s *fchs, u32 d_id, u32 s_id, u16 ox_id, int num_pages, enum fc_tprlo_type tprlo_type, u32 tpr_id); -u16 fc_ct_rsp_parse(struct ct_hdr_s *cthdr); - #endif From 5fe4e16fdbb8ca7dd6daea0031a0a84e6eb1e18d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 19 Nov 2024 19:28:56 +0100 Subject: [PATCH 043/366] scsi: ufs: Switch back to struct platform_driver::remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 0edb555a65d1 ("platform: Make platform_driver::remove() return void") .remove() is (again) the right callback to implement for platform drivers. Convert all platform drivers below drivers/ufs to use .remove(), with the eventual goal to drop struct platform_driver::remove_new(). As .remove() and .remove_new() have the same prototypes, conversion is done by just changing the structure member name in the driver initializer. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20241119182856.55743-2-u.kleine-koenig@baylibre.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/cdns-pltfrm.c | 2 +- drivers/ufs/host/tc-dwc-g210-pltfrm.c | 2 +- drivers/ufs/host/ti-j721e-ufs.c | 2 +- drivers/ufs/host/ufs-exynos.c | 2 +- drivers/ufs/host/ufs-hisi.c | 2 +- drivers/ufs/host/ufs-mediatek.c | 2 +- drivers/ufs/host/ufs-qcom.c | 2 +- drivers/ufs/host/ufs-renesas.c | 2 +- drivers/ufs/host/ufs-sprd.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/ufs/host/cdns-pltfrm.c b/drivers/ufs/host/cdns-pltfrm.c index 66811d8d1929c..c80f770a62854 100644 --- a/drivers/ufs/host/cdns-pltfrm.c +++ b/drivers/ufs/host/cdns-pltfrm.c @@ -321,7 +321,7 @@ static const struct dev_pm_ops cdns_ufs_dev_pm_ops = { static struct platform_driver cdns_ufs_pltfrm_driver = { .probe = cdns_ufs_pltfrm_probe, - .remove_new = cdns_ufs_pltfrm_remove, + .remove = cdns_ufs_pltfrm_remove, .driver = { .name = "cdns-ufshcd", .pm = &cdns_ufs_dev_pm_ops, diff --git a/drivers/ufs/host/tc-dwc-g210-pltfrm.c b/drivers/ufs/host/tc-dwc-g210-pltfrm.c index a3877592604d5..9bfaa36cc8986 100644 --- a/drivers/ufs/host/tc-dwc-g210-pltfrm.c +++ b/drivers/ufs/host/tc-dwc-g210-pltfrm.c @@ -89,7 +89,7 @@ static const struct dev_pm_ops tc_dwc_g210_pltfm_pm_ops = { static struct platform_driver tc_dwc_g210_pltfm_driver = { .probe = tc_dwc_g210_pltfm_probe, - .remove_new = tc_dwc_g210_pltfm_remove, + .remove = tc_dwc_g210_pltfm_remove, .driver = { .name = "tc-dwc-g210-pltfm", .pm = &tc_dwc_g210_pltfm_pm_ops, diff --git a/drivers/ufs/host/ti-j721e-ufs.c b/drivers/ufs/host/ti-j721e-ufs.c index 250c22df000d5..21214e5d58964 100644 --- a/drivers/ufs/host/ti-j721e-ufs.c +++ b/drivers/ufs/host/ti-j721e-ufs.c @@ -83,7 +83,7 @@ MODULE_DEVICE_TABLE(of, ti_j721e_ufs_of_match); static struct platform_driver ti_j721e_ufs_driver = { .probe = ti_j721e_ufs_probe, - .remove_new = ti_j721e_ufs_remove, + .remove = ti_j721e_ufs_remove, .driver = { .name = "ti-j721e-ufs", .of_match_table = ti_j721e_ufs_of_match, diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 91827b3e582bb..4a9173cf44041 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -2166,7 +2166,7 @@ static const struct dev_pm_ops exynos_ufs_pm_ops = { static struct platform_driver exynos_ufs_pltform = { .probe = exynos_ufs_probe, - .remove_new = exynos_ufs_remove, + .remove = exynos_ufs_remove, .driver = { .name = "exynos-ufshc", .pm = &exynos_ufs_pm_ops, diff --git a/drivers/ufs/host/ufs-hisi.c b/drivers/ufs/host/ufs-hisi.c index 5ee73ff052512..494f593702a34 100644 --- a/drivers/ufs/host/ufs-hisi.c +++ b/drivers/ufs/host/ufs-hisi.c @@ -590,7 +590,7 @@ static const struct dev_pm_ops ufs_hisi_pm_ops = { static struct platform_driver ufs_hisi_pltform = { .probe = ufs_hisi_probe, - .remove_new = ufs_hisi_remove, + .remove = ufs_hisi_remove, .driver = { .name = "ufshcd-hisi", .pm = &ufs_hisi_pm_ops, diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 06ab1e5e8b6fb..6fc848d0ada8b 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -1962,7 +1962,7 @@ static const struct dev_pm_ops ufs_mtk_pm_ops = { static struct platform_driver ufs_mtk_pltform = { .probe = ufs_mtk_probe, - .remove_new = ufs_mtk_remove, + .remove = ufs_mtk_remove, .driver = { .name = "ufshcd-mtk", .pm = &ufs_mtk_pm_ops, diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 3b592492e1520..d2b8d97b480eb 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1897,7 +1897,7 @@ static const struct dev_pm_ops ufs_qcom_pm_ops = { static struct platform_driver ufs_qcom_pltform = { .probe = ufs_qcom_probe, - .remove_new = ufs_qcom_remove, + .remove = ufs_qcom_remove, .driver = { .name = "ufshcd-qcom", .pm = &ufs_qcom_pm_ops, diff --git a/drivers/ufs/host/ufs-renesas.c b/drivers/ufs/host/ufs-renesas.c index 3ff97112e1f6d..f404019dc5d96 100644 --- a/drivers/ufs/host/ufs-renesas.c +++ b/drivers/ufs/host/ufs-renesas.c @@ -404,7 +404,7 @@ static void ufs_renesas_remove(struct platform_device *pdev) static struct platform_driver ufs_renesas_platform = { .probe = ufs_renesas_probe, - .remove_new = ufs_renesas_remove, + .remove = ufs_renesas_remove, .driver = { .name = "ufshcd-renesas", .of_match_table = of_match_ptr(ufs_renesas_of_match), diff --git a/drivers/ufs/host/ufs-sprd.c b/drivers/ufs/host/ufs-sprd.c index d8b165908809d..b1ffb9b05fa75 100644 --- a/drivers/ufs/host/ufs-sprd.c +++ b/drivers/ufs/host/ufs-sprd.c @@ -442,7 +442,7 @@ static const struct dev_pm_ops ufs_sprd_pm_ops = { static struct platform_driver ufs_sprd_pltform = { .probe = ufs_sprd_probe, - .remove_new = ufs_sprd_remove, + .remove = ufs_sprd_remove, .driver = { .name = "ufshcd-sprd", .pm = &ufs_sprd_pm_ops, From 2c354d12307e58a748ade2802b4d26fd0d8c34a9 Mon Sep 17 00:00:00 2001 From: liujing Date: Fri, 8 Nov 2024 15:09:35 +0800 Subject: [PATCH 044/366] scsi: lpfc: Fix spelling errors 'asynchronously' Signed-off-by: liujing Link: https://lore.kernel.org/r/20241108070935.10427-1-liujing@cmss.chinamobile.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 6e0b27460c24b..240cbb5c13f01 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -242,7 +242,7 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport) * @phba: pointer to lpfc hba data structure. * @axchg: pointer to exchange context for the NVME LS request * - * This routine is used for processing an asychronously received NVME LS + * This routine is used for processing an asynchronously received NVME LS * request. Any remaining validation is done and the LS is then forwarded * to the nvme-fc transport via nvme_fc_rcv_ls_req(). * diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 3b29d57d77ac5..49e1a9defefc3 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -2142,7 +2142,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) * @phba: pointer to lpfc hba data structure. * @axchg: pointer to exchange context for the NVME LS request * - * This routine is used for processing an asychronously received NVME LS + * This routine is used for processing an asynchronously received NVME LS * request. Any remaining validation is done and the LS is then forwarded * to the nvmet-fc transport via nvmet_fc_rcv_ls_req(). * From 50740f4dc78b41dec7c8e39772619d5ba841ddd7 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Mon, 23 Sep 2024 19:48:33 +0200 Subject: [PATCH 045/366] scsi: megaraid_sas: Fix for a potential deadlock This fixes a 'possible circular locking dependency detected' warning CPU0 CPU1 ---- ---- lock(&instance->reset_mutex); lock(&shost->scan_mutex); lock(&instance->reset_mutex); lock(&shost->scan_mutex); Fix this by temporarily releasing the reset_mutex. Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20240923174833.45345-1-thenzl@redhat.com Acked-by: Chandrakanth Patil Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 4ecf5284c0fc3..6e7e83eaef60f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -8907,8 +8907,11 @@ megasas_aen_polling(struct work_struct *work) (ld_target_id / MEGASAS_MAX_DEV_PER_CHANNEL), (ld_target_id % MEGASAS_MAX_DEV_PER_CHANNEL), 0); - if (sdev1) + if (sdev1) { + mutex_unlock(&instance->reset_mutex); megasas_remove_scsi_device(sdev1); + mutex_lock(&instance->reset_mutex); + } event_type = SCAN_VD_CHANNEL; break; From 5cd3167a5ebbbe49516f29e5dd16317ab6ff479a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 11 Nov 2024 14:10:56 +0100 Subject: [PATCH 046/366] scsi: target: tcmu: Constify some structures 'struct nla_policy' and 'struct match_table_t' are not modified in this driver. Constifying these structures moves some data to a read-only section, so increase overall security, especially when the structure holds some function pointers, which is the case of struct nla_policy. On a x86_64, with allmodconfig: Before: ====== text data bss dec hex filename 93188 6933 338 100459 1886b drivers/target/target_core_user.o After: ===== text data bss dec hex filename 93508 6581 338 100427 1884b drivers/target/target_core_user.o Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/f83cd8469cc17391178e1181e8c26c4c1fb6028f.1731330634.git.christophe.jaillet@wanadoo.fr Reviewed-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 7eb94894bd68f..6b55ef38fff08 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -361,7 +361,7 @@ static const struct genl_multicast_group tcmu_mcgrps[] = { [TCMU_MCGRP_CONFIG] = { .name = "config", }, }; -static struct nla_policy tcmu_attr_policy[TCMU_ATTR_MAX+1] = { +static const struct nla_policy tcmu_attr_policy[TCMU_ATTR_MAX + 1] = { [TCMU_ATTR_DEVICE] = { .type = NLA_STRING }, [TCMU_ATTR_MINOR] = { .type = NLA_U32 }, [TCMU_ATTR_CMD_STATUS] = { .type = NLA_S32 }, @@ -2430,7 +2430,7 @@ enum { Opt_cmd_ring_size_mb, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_dev_config, "dev_config=%s"}, {Opt_dev_size, "dev_size=%s"}, {Opt_hw_block_size, "hw_block_size=%d"}, From 60b4dd1460f6d65739acb0f28d12bd9abaeb34b4 Mon Sep 17 00:00:00 2001 From: Ziqi Chen Date: Tue, 19 Nov 2024 17:56:04 +0800 Subject: [PATCH 047/366] scsi: ufs: core: Add ufshcd_send_bsg_uic_cmd() for UFS BSG User layer applications can send UIC GET/SET commands via the BSG framework, and if the user layer application sends a UIC SET command to the PA_PWRMODE attribute, a power mode change shall be initiated in UniPro and two interrupts shall be triggered if the power mode is successfully changed, i.e., UIC Command Completion interrupt and UIC Power Mode interrupt. The current UFS BSG code calls ufshcd_send_uic_cmd() directly, with which the second interrupt, i.e., UIC Power Mode interrupt, shall be treated as unhandled interrupt. In addition, after the UIC command is completed, user layer application has to poll UniPro and/or M-PHY state machine to confirm the power mode change is finished. Add a new wrapper function ufshcd_send_bsg_uic_cmd() and call it from ufs_bsg_request() so that if a UIC SET command is targeting the PA_PWRMODE attribute it can be redirected to ufshcd_uic_pwr_ctrl(). Fixes: e77044c5a842 ("scsi: ufs-bsg: Add support for uic commands in ufs_bsg_request()") Co-developed-by: Can Guo Signed-off-by: Can Guo Signed-off-by: Ziqi Chen Link: https://lore.kernel.org/r/20241119095613.121385-1-quic_ziqichen@quicinc.com Reviewed-by: Bean Huo Reviewed-by: Avri Altman Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs_bsg.c | 2 +- drivers/ufs/core/ufshcd-priv.h | 1 + drivers/ufs/core/ufshcd.c | 36 ++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufs_bsg.c b/drivers/ufs/core/ufs_bsg.c index 433d0480391ea..6c09d97ae0065 100644 --- a/drivers/ufs/core/ufs_bsg.c +++ b/drivers/ufs/core/ufs_bsg.c @@ -170,7 +170,7 @@ static int ufs_bsg_request(struct bsg_job *job) break; case UPIU_TRANSACTION_UIC_CMD: memcpy(&uc, &bsg_request->upiu_req.uc, UIC_CMD_SIZE); - ret = ufshcd_send_uic_cmd(hba, &uc); + ret = ufshcd_send_bsg_uic_cmd(hba, &uc); if (ret) dev_err(hba->dev, "send uic cmd: error code %d\n", ret); diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index 7aea8fbaeee88..9ffd94ddf8c7c 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -84,6 +84,7 @@ int ufshcd_read_string_desc(struct ufs_hba *hba, u8 desc_index, u8 **buf, bool ascii); int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd); +int ufshcd_send_bsg_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd); int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba, struct utp_upiu_req *req_upiu, diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 8135a6dad7c6f..dd31aa7c67856 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4319,6 +4319,42 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) return ret; } +/** + * ufshcd_send_bsg_uic_cmd - Send UIC commands requested via BSG layer and retrieve the result + * @hba: per adapter instance + * @uic_cmd: UIC command + * + * Return: 0 only if success. + */ +int ufshcd_send_bsg_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) +{ + int ret; + + if (hba->quirks & UFSHCD_QUIRK_BROKEN_UIC_CMD) + return 0; + + ufshcd_hold(hba); + + if (uic_cmd->argument1 == UIC_ARG_MIB(PA_PWRMODE) && + uic_cmd->command == UIC_CMD_DME_SET) { + ret = ufshcd_uic_pwr_ctrl(hba, uic_cmd); + goto out; + } + + mutex_lock(&hba->uic_cmd_mutex); + ufshcd_add_delay_before_dme_cmd(hba); + + ret = __ufshcd_send_uic_cmd(hba, uic_cmd); + if (!ret) + ret = ufshcd_wait_for_uic_cmd(hba, uic_cmd); + + mutex_unlock(&hba->uic_cmd_mutex); + +out: + ufshcd_release(hba); + return ret; +} + /** * ufshcd_uic_change_pwr_mode - Perform the UIC power mode chage * using DME_SET primitives. From 1695c4361d35b7bdadd7b34f99c9c07741e181e5 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 11 Nov 2024 23:18:30 +0530 Subject: [PATCH 048/366] scsi: ufs: core: Cancel RTC work during ufshcd_remove() Currently, RTC work is only cancelled during __ufshcd_wl_suspend(). When ufshcd is removed in ufshcd_remove(), RTC work is not cancelled. Due to this, any further trigger of the RTC work after ufshcd_remove() would result in a NULL pointer dereference as below: Unable to handle kernel NULL pointer dereference at virtual address 00000000000002a4 Workqueue: events ufshcd_rtc_work Call trace: _raw_spin_lock_irqsave+0x34/0x8c pm_runtime_get_if_active+0x24/0xb4 ufshcd_rtc_work+0x124/0x19c process_scheduled_works+0x18c/0x2d8 worker_thread+0x144/0x280 kthread+0x11c/0x128 ret_from_fork+0x10/0x20 Since RTC work accesses the ufshcd internal structures, it should be cancelled when ufshcd is removed. So do that in ufshcd_remove(), as per the order in ufshcd_init(). Cc: stable@vger.kernel.org # 6.8 Fixes: 6bf999e0eb41 ("scsi: ufs: core: Add UFS RTC support") Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241111-ufs_bug_fix-v1-1-45ad8b62f02e@linaro.org Reviewed-by: Peter Wang Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index e338867bc96ca..216d1ed60d1af 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10189,6 +10189,7 @@ void ufshcd_remove(struct ufs_hba *hba) ufs_hwmon_remove(hba); ufs_bsg_remove(hba); ufs_sysfs_remove_nodes(hba->dev); + cancel_delayed_work_sync(&hba->ufs_rtc_update_work); blk_mq_destroy_queue(hba->tmf_queue); blk_put_queue(hba->tmf_queue); blk_mq_free_tag_set(&hba->tmf_tag_set); From 64506b3d23a337e98a74b18dcb10c8619365f2bd Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 11 Nov 2024 23:18:31 +0530 Subject: [PATCH 049/366] scsi: ufs: qcom: Only free platform MSIs when ESI is enabled Otherwise, it will result in a NULL pointer dereference as below: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: mutex_lock+0xc/0x54 platform_device_msi_free_irqs_all+0x14/0x20 ufs_qcom_remove+0x34/0x48 [ufs_qcom] platform_remove+0x28/0x44 device_remove+0x4c/0x80 device_release_driver_internal+0xd8/0x178 driver_detach+0x50/0x9c bus_remove_driver+0x6c/0xbc driver_unregister+0x30/0x60 platform_driver_unregister+0x14/0x20 ufs_qcom_pltform_exit+0x18/0xb94 [ufs_qcom] __arm64_sys_delete_module+0x180/0x260 invoke_syscall+0x44/0x100 el0_svc_common.constprop.0+0xc0/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x34/0xdc el0t_64_sync_handler+0xc0/0xc4 el0t_64_sync+0x190/0x194 Cc: stable@vger.kernel.org # 6.3 Fixes: 519b6274a777 ("scsi: ufs: qcom: Add MCQ ESI config vendor specific ops") Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241111-ufs_bug_fix-v1-2-45ad8b62f02e@linaro.org Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 3b592492e1520..5220ec78021d6 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1861,10 +1861,12 @@ static int ufs_qcom_probe(struct platform_device *pdev) static void ufs_qcom_remove(struct platform_device *pdev) { struct ufs_hba *hba = platform_get_drvdata(pdev); + struct ufs_qcom_host *host = ufshcd_get_variant(hba); pm_runtime_get_sync(&(pdev)->dev); ufshcd_remove(hba); - platform_device_msi_free_irqs_all(hba->dev); + if (host->esi_enabled) + platform_device_msi_free_irqs_all(hba->dev); } static const struct of_device_id ufs_qcom_of_match[] __maybe_unused = { From d3326e6a3f9bf1e075be2201fb704c2fdf19e2b7 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 11 Nov 2024 23:18:32 +0530 Subject: [PATCH 050/366] scsi: ufs: pltfrm: Disable runtime PM during removal of glue drivers When the UFSHCD platform glue drivers are removed, runtime PM should be disabled using pm_runtime_disable() to balance the enablement done in ufshcd_pltfrm_init(). This is also reported by PM core when the glue driver is removed and inserted again: ufshcd-qcom 1d84000.ufshc: Unbalanced pm_runtime_enable! So disable runtime PM using a new helper API ufshcd_pltfrm_remove(), that also takes care of removing ufshcd. This helper should be called during the remove() stage of glue drivers. Cc: stable@vger.kernel.org # 3.12 Fixes: 62694735ca95 ("[SCSI] ufs: Add runtime PM support for UFS host controller driver") Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241111-ufs_bug_fix-v1-3-45ad8b62f02e@linaro.org Reviewed-by: Peter Wang Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/cdns-pltfrm.c | 4 +--- drivers/ufs/host/tc-dwc-g210-pltfrm.c | 4 +--- drivers/ufs/host/ufs-exynos.c | 2 +- drivers/ufs/host/ufs-hisi.c | 4 +--- drivers/ufs/host/ufs-mediatek.c | 4 +--- drivers/ufs/host/ufs-qcom.c | 2 +- drivers/ufs/host/ufs-renesas.c | 4 +--- drivers/ufs/host/ufs-sprd.c | 4 +--- drivers/ufs/host/ufshcd-pltfrm.c | 13 +++++++++++++ drivers/ufs/host/ufshcd-pltfrm.h | 1 + 10 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/ufs/host/cdns-pltfrm.c b/drivers/ufs/host/cdns-pltfrm.c index 66811d8d1929c..b31aa84111511 100644 --- a/drivers/ufs/host/cdns-pltfrm.c +++ b/drivers/ufs/host/cdns-pltfrm.c @@ -307,9 +307,7 @@ static int cdns_ufs_pltfrm_probe(struct platform_device *pdev) */ static void cdns_ufs_pltfrm_remove(struct platform_device *pdev) { - struct ufs_hba *hba = platform_get_drvdata(pdev); - - ufshcd_remove(hba); + ufshcd_pltfrm_remove(pdev); } static const struct dev_pm_ops cdns_ufs_dev_pm_ops = { diff --git a/drivers/ufs/host/tc-dwc-g210-pltfrm.c b/drivers/ufs/host/tc-dwc-g210-pltfrm.c index a3877592604d5..113e0ef7b2cf8 100644 --- a/drivers/ufs/host/tc-dwc-g210-pltfrm.c +++ b/drivers/ufs/host/tc-dwc-g210-pltfrm.c @@ -76,10 +76,8 @@ static int tc_dwc_g210_pltfm_probe(struct platform_device *pdev) */ static void tc_dwc_g210_pltfm_remove(struct platform_device *pdev) { - struct ufs_hba *hba = platform_get_drvdata(pdev); - pm_runtime_get_sync(&(pdev)->dev); - ufshcd_remove(hba); + ufshcd_pltfrm_remove(pdev); } static const struct dev_pm_ops tc_dwc_g210_pltfm_pm_ops = { diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 91827b3e582bb..b20f6526777a7 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1993,7 +1993,7 @@ static void exynos_ufs_remove(struct platform_device *pdev) struct exynos_ufs *ufs = ufshcd_get_variant(hba); pm_runtime_get_sync(&(pdev)->dev); - ufshcd_remove(hba); + ufshcd_pltfrm_remove(pdev); phy_power_off(ufs->phy); phy_exit(ufs->phy); diff --git a/drivers/ufs/host/ufs-hisi.c b/drivers/ufs/host/ufs-hisi.c index 5ee73ff052512..501609521b260 100644 --- a/drivers/ufs/host/ufs-hisi.c +++ b/drivers/ufs/host/ufs-hisi.c @@ -576,9 +576,7 @@ static int ufs_hisi_probe(struct platform_device *pdev) static void ufs_hisi_remove(struct platform_device *pdev) { - struct ufs_hba *hba = platform_get_drvdata(pdev); - - ufshcd_remove(hba); + ufshcd_pltfrm_remove(pdev); } static const struct dev_pm_ops ufs_hisi_pm_ops = { diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 06ab1e5e8b6fb..b444146419dea 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -1879,10 +1879,8 @@ static int ufs_mtk_probe(struct platform_device *pdev) */ static void ufs_mtk_remove(struct platform_device *pdev) { - struct ufs_hba *hba = platform_get_drvdata(pdev); - pm_runtime_get_sync(&(pdev)->dev); - ufshcd_remove(hba); + ufshcd_pltfrm_remove(pdev); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 5220ec78021d6..3762337d75765 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1864,7 +1864,7 @@ static void ufs_qcom_remove(struct platform_device *pdev) struct ufs_qcom_host *host = ufshcd_get_variant(hba); pm_runtime_get_sync(&(pdev)->dev); - ufshcd_remove(hba); + ufshcd_pltfrm_remove(pdev); if (host->esi_enabled) platform_device_msi_free_irqs_all(hba->dev); } diff --git a/drivers/ufs/host/ufs-renesas.c b/drivers/ufs/host/ufs-renesas.c index 3ff97112e1f6d..21a64b34397d8 100644 --- a/drivers/ufs/host/ufs-renesas.c +++ b/drivers/ufs/host/ufs-renesas.c @@ -397,9 +397,7 @@ static int ufs_renesas_probe(struct platform_device *pdev) static void ufs_renesas_remove(struct platform_device *pdev) { - struct ufs_hba *hba = platform_get_drvdata(pdev); - - ufshcd_remove(hba); + ufshcd_pltfrm_remove(pdev); } static struct platform_driver ufs_renesas_platform = { diff --git a/drivers/ufs/host/ufs-sprd.c b/drivers/ufs/host/ufs-sprd.c index d8b165908809d..e455890cf7d49 100644 --- a/drivers/ufs/host/ufs-sprd.c +++ b/drivers/ufs/host/ufs-sprd.c @@ -427,10 +427,8 @@ static int ufs_sprd_probe(struct platform_device *pdev) static void ufs_sprd_remove(struct platform_device *pdev) { - struct ufs_hba *hba = platform_get_drvdata(pdev); - pm_runtime_get_sync(&(pdev)->dev); - ufshcd_remove(hba); + ufshcd_pltfrm_remove(pdev); } static const struct dev_pm_ops ufs_sprd_pm_ops = { diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c index 1f4f30d6cb423..bad5b1303eb68 100644 --- a/drivers/ufs/host/ufshcd-pltfrm.c +++ b/drivers/ufs/host/ufshcd-pltfrm.c @@ -524,6 +524,19 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, } EXPORT_SYMBOL_GPL(ufshcd_pltfrm_init); +/** + * ufshcd_pltfrm_remove - Remove ufshcd platform + * @pdev: pointer to Platform device handle + */ +void ufshcd_pltfrm_remove(struct platform_device *pdev) +{ + struct ufs_hba *hba = platform_get_drvdata(pdev); + + ufshcd_remove(hba); + pm_runtime_disable(&pdev->dev); +} +EXPORT_SYMBOL_GPL(ufshcd_pltfrm_remove); + MODULE_AUTHOR("Santosh Yaragnavi "); MODULE_AUTHOR("Vinayak Holikatti "); MODULE_DESCRIPTION("UFS host controller Platform bus based glue driver"); diff --git a/drivers/ufs/host/ufshcd-pltfrm.h b/drivers/ufs/host/ufshcd-pltfrm.h index df387be5216bd..3017f8e8f93c6 100644 --- a/drivers/ufs/host/ufshcd-pltfrm.h +++ b/drivers/ufs/host/ufshcd-pltfrm.h @@ -31,6 +31,7 @@ int ufshcd_negotiate_pwr_params(const struct ufs_host_params *host_params, void ufshcd_init_host_params(struct ufs_host_params *host_params); int ufshcd_pltfrm_init(struct platform_device *pdev, const struct ufs_hba_variant_ops *vops); +void ufshcd_pltfrm_remove(struct platform_device *pdev); int ufshcd_populate_vreg(struct device *dev, const char *name, struct ufs_vreg **out_vreg, bool skip_current); From 1745dcdb7227102e16248a324c600b9121c8f6df Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 11 Nov 2024 23:18:33 +0530 Subject: [PATCH 051/366] scsi: ufs: pltfrm: Drop PM runtime reference count after ufshcd_remove() During the remove stage of glue drivers, some of them are incrementing the reference count using pm_runtime_get_sync(), before removing the ufshcd using ufshcd_remove(). But they are not dropping that reference count after ufshcd_remove() to balance the refcount. So drop the reference count by calling pm_runtime_put_noidle() after ufshcd_remove(). Since the behavior is applicable to all glue drivers, move the PM handling to ufshcd_pltfrm_remove(). Cc: stable@vger.kernel.org # 3.12 Fixes: 62694735ca95 ("[SCSI] ufs: Add runtime PM support for UFS host controller driver") Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241111-ufs_bug_fix-v1-4-45ad8b62f02e@linaro.org Reviewed-by: Peter Wang Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/tc-dwc-g210-pltfrm.c | 1 - drivers/ufs/host/ufs-exynos.c | 1 - drivers/ufs/host/ufs-mediatek.c | 1 - drivers/ufs/host/ufs-qcom.c | 1 - drivers/ufs/host/ufs-sprd.c | 1 - drivers/ufs/host/ufshcd-pltfrm.c | 2 ++ 6 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/host/tc-dwc-g210-pltfrm.c b/drivers/ufs/host/tc-dwc-g210-pltfrm.c index 113e0ef7b2cf8..c6f8565ede21a 100644 --- a/drivers/ufs/host/tc-dwc-g210-pltfrm.c +++ b/drivers/ufs/host/tc-dwc-g210-pltfrm.c @@ -76,7 +76,6 @@ static int tc_dwc_g210_pltfm_probe(struct platform_device *pdev) */ static void tc_dwc_g210_pltfm_remove(struct platform_device *pdev) { - pm_runtime_get_sync(&(pdev)->dev); ufshcd_pltfrm_remove(pdev); } diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index b20f6526777a7..9d4db13e142d5 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1992,7 +1992,6 @@ static void exynos_ufs_remove(struct platform_device *pdev) struct ufs_hba *hba = platform_get_drvdata(pdev); struct exynos_ufs *ufs = ufshcd_get_variant(hba); - pm_runtime_get_sync(&(pdev)->dev); ufshcd_pltfrm_remove(pdev); phy_power_off(ufs->phy); diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index b444146419dea..ffe4d03a0f383 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -1879,7 +1879,6 @@ static int ufs_mtk_probe(struct platform_device *pdev) */ static void ufs_mtk_remove(struct platform_device *pdev) { - pm_runtime_get_sync(&(pdev)->dev); ufshcd_pltfrm_remove(pdev); } diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 3762337d75765..73b4fec8221a0 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1863,7 +1863,6 @@ static void ufs_qcom_remove(struct platform_device *pdev) struct ufs_hba *hba = platform_get_drvdata(pdev); struct ufs_qcom_host *host = ufshcd_get_variant(hba); - pm_runtime_get_sync(&(pdev)->dev); ufshcd_pltfrm_remove(pdev); if (host->esi_enabled) platform_device_msi_free_irqs_all(hba->dev); diff --git a/drivers/ufs/host/ufs-sprd.c b/drivers/ufs/host/ufs-sprd.c index e455890cf7d49..d220978c2d8c8 100644 --- a/drivers/ufs/host/ufs-sprd.c +++ b/drivers/ufs/host/ufs-sprd.c @@ -427,7 +427,6 @@ static int ufs_sprd_probe(struct platform_device *pdev) static void ufs_sprd_remove(struct platform_device *pdev) { - pm_runtime_get_sync(&(pdev)->dev); ufshcd_pltfrm_remove(pdev); } diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c index bad5b1303eb68..b8dadd0a2f4c0 100644 --- a/drivers/ufs/host/ufshcd-pltfrm.c +++ b/drivers/ufs/host/ufshcd-pltfrm.c @@ -532,8 +532,10 @@ void ufshcd_pltfrm_remove(struct platform_device *pdev) { struct ufs_hba *hba = platform_get_drvdata(pdev); + pm_runtime_get_sync(&pdev->dev); ufshcd_remove(hba); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); } EXPORT_SYMBOL_GPL(ufshcd_pltfrm_remove); From 897df60c16d54ad515a3d0887edab5c63da06d1f Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 11 Nov 2024 23:18:34 +0530 Subject: [PATCH 052/366] scsi: ufs: pltfrm: Dellocate HBA during ufshcd_pltfrm_remove() This will ensure that the scsi host is cleaned up properly using scsi_host_dev_release(). Otherwise, it may lead to memory leaks. Cc: stable@vger.kernel.org # 4.4 Fixes: 03b1781aa978 ("[SCSI] ufs: Add Platform glue driver for ufshcd") Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241111-ufs_bug_fix-v1-5-45ad8b62f02e@linaro.org Reviewed-by: Peter Wang Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pltfrm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c index b8dadd0a2f4c0..505572d4fa878 100644 --- a/drivers/ufs/host/ufshcd-pltfrm.c +++ b/drivers/ufs/host/ufshcd-pltfrm.c @@ -534,6 +534,7 @@ void ufshcd_pltfrm_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); ufshcd_remove(hba); + ufshcd_dealloc_host(hba); pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); } From 45f69d091bab64a332fe751da9829dcd136348fd Mon Sep 17 00:00:00 2001 From: Long Li Date: Sat, 22 Jun 2024 16:26:31 +0800 Subject: [PATCH 053/366] xfs: eliminate lockdep false positives in xfs_attr_shortform_list xfs_attr_shortform_list() only called from a non-transactional context, it hold ilock before alloc memory and maybe trapped in memory reclaim. Since commit 204fae32d5f7("xfs: clean up remaining GFP_NOFS users") removed GFP_NOFS flag, lockdep warning will be report as [1]. Eliminate lockdep false positives by use __GFP_NOLOCKDEP to alloc memory in xfs_attr_shortform_list(). [1] https://lore.kernel.org/linux-xfs/000000000000e33add0616358204@google.com/ Reported-by: syzbot+4248e91deb3db78358a2@syzkaller.appspotmail.com Signed-off-by: Long Li Reviewed-by: Dave Chinner Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_attr_list.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 7db3863048751..379b48d015d25 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -114,7 +114,8 @@ xfs_attr_shortform_list( * It didn't all fit, so we have to sort everything on hashval. */ sbsize = sf->count * sizeof(*sbuf); - sbp = sbuf = kmalloc(sbsize, GFP_KERNEL | __GFP_NOFAIL); + sbp = sbuf = kmalloc(sbsize, + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); /* * Scan the attribute list for the rest of the entries, storing From 652f03db897ba24f9c4b269e254ccc6cc01ff1b7 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 13 Nov 2024 17:17:15 +0800 Subject: [PATCH 054/366] xfs: remove unknown compat feature check in superblock write validation Compat features are new features that older kernels can safely ignore, allowing read-write mounts without issues. The current sb write validation implementation returns -EFSCORRUPTED for unknown compat features, preventing filesystem write operations and contradicting the feature's definition. Additionally, if the mounted image is unclean, the log recovery may need to write to the superblock. Returning an error for unknown compat features during sb write validation can cause mount failures. Although XFS currently does not use compat feature flags, this issue affects current kernels' ability to mount images that may use compat feature flags in the future. Since superblock read validation already warns about unknown compat features, it's unnecessary to repeat this warning during write validation. Therefore, the relevant code in write validation is being removed. Fixes: 9e037cb7972f ("xfs: check for unknown v5 feature bits in superblock write verifier") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Long Li Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_sb.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index e81b240b71583..a809513a290cf 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -326,13 +326,6 @@ xfs_validate_sb_write( * the kernel cannot support since we checked for unsupported bits in * the read verifier, which means that memory is corrupt. */ - if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { - xfs_warn(mp, -"Corruption detected in superblock compatible features (0x%x)!", - (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN)); - return -EFSCORRUPTED; - } - if (!xfs_is_readonly(mp) && xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_alert(mp, From 13325333582d4820d39b9e8f63d6a54e745585d9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 13 Nov 2024 11:11:20 +0100 Subject: [PATCH 055/366] xfs: fix sparse inode limits on runt AG The runt AG at the end of a filesystem is almost always smaller than the mp->m_sb.sb_agblocks. Unfortunately, when setting the max_agbno limit for the inode chunk allocation, we do not take this into account. This means we can allocate a sparse inode chunk that overlaps beyond the end of an AG. When we go to allocate an inode from that sparse chunk, the irec fails validation because the agbno of the start of the irec is beyond valid limits for the runt AG. Prevent this from happening by taking into account the size of the runt AG when allocating inode chunks. Also convert the various checks for valid inode chunk agbnos to use xfs_ag_block_count() so that they will also catch such issues in the future. Fixes: 56d1115c9bc7 ("xfs: allocate sparse inode chunks on full chunk allocation failure") Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_ialloc.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 8b84e2cf711b1..f3a840a425f51 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -853,7 +853,8 @@ xfs_ialloc_ag_alloc( * the end of the AG. */ args.min_agbno = args.mp->m_sb.sb_inoalignmt; - args.max_agbno = round_down(args.mp->m_sb.sb_agblocks, + args.max_agbno = round_down(xfs_ag_block_count(args.mp, + pag_agno(pag)), args.mp->m_sb.sb_inoalignmt) - igeo->ialloc_blks; @@ -2349,9 +2350,9 @@ xfs_difree( return -EINVAL; } agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (agbno >= mp->m_sb.sb_agblocks) { - xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", - __func__, agbno, mp->m_sb.sb_agblocks); + if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) { + xfs_warn(mp, "%s: agbno >= xfs_ag_block_count (%d >= %d).", + __func__, agbno, xfs_ag_block_count(mp, pag_agno(pag))); ASSERT(0); return -EINVAL; } @@ -2474,7 +2475,7 @@ xfs_imap( */ agino = XFS_INO_TO_AGINO(mp, ino); agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (agbno >= mp->m_sb.sb_agblocks || + if (agbno >= xfs_ag_block_count(mp, pag_agno(pag)) || ino != xfs_agino_to_ino(pag, agino)) { error = -EINVAL; #ifdef DEBUG @@ -2484,11 +2485,12 @@ xfs_imap( */ if (flags & XFS_IGET_UNTRUSTED) return error; - if (agbno >= mp->m_sb.sb_agblocks) { + if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) { xfs_alert(mp, "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", __func__, (unsigned long long)agbno, - (unsigned long)mp->m_sb.sb_agblocks); + (unsigned long)xfs_ag_block_count(mp, + pag_agno(pag))); } if (ino != xfs_agino_to_ino(pag, agino)) { xfs_alert(mp, From c9c293240e4351aa2678186cd88a08141fc6ce9e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 May 1996 22:44:44 +0000 Subject: [PATCH 056/366] xfs: delalloc and quota softlimit timers are incoherent I've been seeing this failure on during xfs/050 recently: XFS: Assertion failed: dst->d_spc_timer != 0, file: fs/xfs/xfs_qm_syscalls.c, line: 435 .... Call Trace: xfs_qm_scall_getquota_fill_qc+0x2a2/0x2b0 xfs_qm_scall_getquota_next+0x69/0xa0 xfs_fs_get_nextdqblk+0x62/0xf0 quota_getnextxquota+0xbf/0x320 do_quotactl+0x1a1/0x410 __se_sys_quotactl+0x126/0x310 __x64_sys_quotactl+0x21/0x30 x64_sys_call+0x2819/0x2ee0 do_syscall_64+0x68/0x130 entry_SYSCALL_64_after_hwframe+0x76/0x7e It turns out that the _qmount call has silently been failing to unmount and mount the filesystem, so when the softlimit is pushed past with a buffered write, it is not getting synced to disk before the next quota report is being run. Hence when the quota report runs, we have 300 blocks of delalloc data on an inode, with a soft limit of 200 blocks. XFS dquots account delalloc reservations as used space, hence the dquot is over the soft limit. However, we don't update the soft limit timers until we do a transactional update of the dquot. That is, the dquot sits over the soft limit without a softlimit timer being started until writeback occurs and the allocation modifies the dquot and we call xfs_qm_adjust_dqtimers() from xfs_trans_apply_dquot_deltas() in xfs_trans_commit() context. This isn't really a problem, except for this debug code in xfs_qm_scall_getquota_fill_qc(): if (xfs_dquot_is_enforced(dqp) && dqp->q_id != 0) { if ((dst->d_space > dst->d_spc_softlimit) && (dst->d_spc_softlimit > 0)) { ASSERT(dst->d_spc_timer != 0); } .... It asserts taht if the used block count is over the soft limit, it *must* have a soft limit timer running. This is clearly not the case, because we haven't committed the delalloc space to disk yet. Hence the soft limit is only exceeded temporarily in memory (which isn't an issue) and we start the timer the moment we exceed the soft limit in journalled metadata. This debug was introduced in: commit 0d5ad8383061fbc0a9804fbb98218750000fe032 Author: Supriya Wickrematillake Date: Wed May 15 22:44:44 1996 +0000 initial checkin quotactl syscall functions. The very first quota support commit back in 1996. This is zero-day debug for Irix and, as it turns out, a zero-day bug in the debug code because the delalloc code on Irix didn't update the softlimit timers, either. IOWs, this issue has been in the code for 28 years. We obviously don't care if soft limit timers are a bit rubbery when we have delalloc reservations in memory. Production systems running quota reports have been exposed to this situation for 28 years and nobody has noticed it, so the debug code is essentially worthless at this point in time. We also have the on-disk dquot verifiers checking that the soft limit timer is running whenever the dquot is over the soft limit before we write it to disk and after we read it from disk. These aren't firing, so it is clear the issue is purely a temporary in-memory incoherency that I never would have noticed had the test not silently failed to unmount the filesystem. Hence I'm simply going to trash this runtime debug because it isn't useful in the slightest for catching quota bugs. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_qm_syscalls.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 4eda50ae2d1cb..0c78f30fa4a3f 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -427,19 +427,6 @@ xfs_qm_scall_getquota_fill_qc( dst->d_ino_timer = 0; dst->d_rt_spc_timer = 0; } - -#ifdef DEBUG - if (xfs_dquot_is_enforced(dqp) && dqp->q_id != 0) { - if ((dst->d_space > dst->d_spc_softlimit) && - (dst->d_spc_softlimit > 0)) { - ASSERT(dst->d_spc_timer != 0); - } - if ((dst->d_ino_count > dqp->q_ino.softlimit) && - (dqp->q_ino.softlimit > 0)) { - ASSERT(dst->d_ino_timer != 0); - } - } -#endif } /* Return the quota information for the dquot matching id. */ From a8581099604dfa609a34a3fac8ef5af0d300d2c1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 13 Nov 2024 11:11:40 +0100 Subject: [PATCH 057/366] xfs: prevent mount and log shutdown race I recently had an fstests hang where there were two internal tasks stuck like so: [ 6559.010870] task:kworker/24:45 state:D stack:12152 pid:631308 tgid:631308 ppid:2 flags:0x00004000 [ 6559.016984] Workqueue: xfs-buf/dm-2 xfs_buf_ioend_work [ 6559.020349] Call Trace: [ 6559.022002] [ 6559.023426] __schedule+0x650/0xb10 [ 6559.025734] schedule+0x6d/0xf0 [ 6559.027835] schedule_timeout+0x31/0x180 [ 6559.030582] wait_for_common+0x10c/0x1e0 [ 6559.033495] wait_for_completion+0x1d/0x30 [ 6559.036463] __flush_workqueue+0xeb/0x490 [ 6559.039479] ? mempool_alloc_slab+0x15/0x20 [ 6559.042537] xlog_cil_force_seq+0xa1/0x2f0 [ 6559.045498] ? bio_alloc_bioset+0x1d8/0x510 [ 6559.048578] ? submit_bio_noacct+0x2f2/0x380 [ 6559.051665] ? xlog_force_shutdown+0x3b/0x170 [ 6559.054819] xfs_log_force+0x77/0x230 [ 6559.057455] xlog_force_shutdown+0x3b/0x170 [ 6559.060507] xfs_do_force_shutdown+0xd4/0x200 [ 6559.063798] ? xfs_buf_rele+0x1bd/0x580 [ 6559.066541] xfs_buf_ioend_handle_error+0x163/0x2e0 [ 6559.070099] xfs_buf_ioend+0x61/0x200 [ 6559.072728] xfs_buf_ioend_work+0x15/0x20 [ 6559.075706] process_scheduled_works+0x1d4/0x400 [ 6559.078814] worker_thread+0x234/0x2e0 [ 6559.081300] kthread+0x147/0x170 [ 6559.083462] ? __pfx_worker_thread+0x10/0x10 [ 6559.086295] ? __pfx_kthread+0x10/0x10 [ 6559.088771] ret_from_fork+0x3e/0x50 [ 6559.091153] ? __pfx_kthread+0x10/0x10 [ 6559.093624] ret_from_fork_asm+0x1a/0x30 [ 6559.096227] [ 6559.109304] Workqueue: xfs-cil/dm-2 xlog_cil_push_work [ 6559.112673] Call Trace: [ 6559.114333] [ 6559.115760] __schedule+0x650/0xb10 [ 6559.118084] schedule+0x6d/0xf0 [ 6559.120175] schedule_timeout+0x31/0x180 [ 6559.122776] ? call_rcu+0xee/0x2f0 [ 6559.125034] __down_common+0xbe/0x1f0 [ 6559.127470] __down+0x1d/0x30 [ 6559.129458] down+0x48/0x50 [ 6559.131343] ? xfs_buf_item_unpin+0x8d/0x380 [ 6559.134213] xfs_buf_lock+0x3d/0xe0 [ 6559.136544] xfs_buf_item_unpin+0x8d/0x380 [ 6559.139253] xlog_cil_committed+0x287/0x520 [ 6559.142019] ? sched_clock+0x10/0x30 [ 6559.144384] ? sched_clock_cpu+0x10/0x190 [ 6559.147039] ? psi_group_change+0x48/0x310 [ 6559.149735] ? _raw_spin_unlock+0xe/0x30 [ 6559.152340] ? finish_task_switch+0xbc/0x310 [ 6559.155163] xlog_cil_process_committed+0x6d/0x90 [ 6559.158265] xlog_state_shutdown_callbacks+0x53/0x110 [ 6559.161564] ? xlog_cil_push_work+0xa70/0xaf0 [ 6559.164441] xlog_state_release_iclog+0xba/0x1b0 [ 6559.167483] xlog_cil_push_work+0xa70/0xaf0 [ 6559.170260] process_scheduled_works+0x1d4/0x400 [ 6559.173286] worker_thread+0x234/0x2e0 [ 6559.175779] kthread+0x147/0x170 [ 6559.177933] ? __pfx_worker_thread+0x10/0x10 [ 6559.180748] ? __pfx_kthread+0x10/0x10 [ 6559.183231] ret_from_fork+0x3e/0x50 [ 6559.185601] ? __pfx_kthread+0x10/0x10 [ 6559.188092] ret_from_fork_asm+0x1a/0x30 [ 6559.190692] This is an ABBA deadlock where buffer IO completion is triggering a forced shutdown with the buffer lock held. It is waiting for the CIL to flush as part of the log force. The CIL flush is blocked doing shutdown processing of all it's objects, trying to unpin a buffer item. That requires taking the buffer lock.... For the CIL to be doing shutdown processing, the log must be marked with XLOG_IO_ERROR, but that doesn't happen until after the log force is issued. Hence for xfs_do_force_shutdown() to be forcing the log on a shut down log, we must have had a racing xlog_force_shutdown and xfs_force_shutdown like so: p0 p1 CIL push xlog_force_shutdown xfs_log_force test_and_set_bit(XLOG_IO_ERROR) xlog_state_release_iclog() sees XLOG_IO_ERROR xlog_state_shutdown_callbacks .... xfs_buf_item_unpin xfs_buf_lock xfs_force_shutdown xfs_set_shutdown(mp) wins xlog_force_shutdown xfs_log_force xfs_set_shutdown(mp) fails The deadlock can be mitigated by avoiding the log force on the second pass through xlog_force_shutdown. Do this by adding another atomic state bit (XLOG_OP_PENDING_SHUTDOWN) that is set on entry to xlog_force_shutdown() but doesn't mark the log as shutdown. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log.c | 11 +++++++++++ fs/xfs/xfs_log_priv.h | 1 + 2 files changed, 12 insertions(+) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 26b2f5887b881..05daad8a8d34d 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3455,6 +3455,16 @@ xlog_force_shutdown( if (!log) return false; + /* + * Ensure that there is only ever one log shutdown being processed. + * If we allow the log force below on a second pass after shutting + * down the log, we risk deadlocking the CIL push as it may require + * locks on objects the current shutdown context holds (e.g. taking + * buffer locks to abort buffers on last unpin of buf log items). + */ + if (test_and_set_bit(XLOG_SHUTDOWN_STARTED, &log->l_opstate)) + return false; + /* * Flush all the completed transactions to disk before marking the log * being shut down. We need to do this first as shutting down the log @@ -3487,6 +3497,7 @@ xlog_force_shutdown( spin_lock(&log->l_icloglock); if (test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate)) { spin_unlock(&log->l_icloglock); + ASSERT(0); return false; } spin_unlock(&log->l_icloglock); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b8778a4fd6b64..f3d78869e5e5a 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -458,6 +458,7 @@ struct xlog { #define XLOG_IO_ERROR 2 /* log hit an I/O error, and being shutdown */ #define XLOG_TAIL_WARN 3 /* log tail verify warning issued */ +#define XLOG_SHUTDOWN_STARTED 4 /* xlog_force_shutdown() exclusion */ static inline bool xlog_recovery_needed(struct xlog *log) From 78ac1c3558810486d90aa533b0039aa70487a3da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 8 Nov 2024 09:29:48 +0100 Subject: [PATCH 058/366] dma-buf: fix dma_fence_array_signaled v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function silently assumed that signaling was already enabled for the dma_fence_array. This meant that without enabling signaling first we would never see forward progress. Fix that by falling back to testing each individual fence when signaling isn't enabled yet. v2: add the comment suggested by Boris why this is done this way v3: fix the underflow pointed out by Tvrtko v4: atomic_read_acquire() as suggested by Tvrtko Signed-off-by: Christian König Reviewed-by: Boris Brezillon Tested-by: Chia-I Wu Reviewed-by: Tvrtko Ursulin Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12094 Cc: Link: https://patchwork.freedesktop.org/patch/msgid/20241112121925.18464-1-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 8a08ffde31e75..6657d4b30af9d 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -103,10 +103,36 @@ static bool dma_fence_array_enable_signaling(struct dma_fence *fence) static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); + int num_pending; + unsigned int i; - if (atomic_read(&array->num_pending) > 0) + /* + * We need to read num_pending before checking the enable_signal bit + * to avoid racing with the enable_signaling() implementation, which + * might decrement the counter, and cause a partial check. + * atomic_read_acquire() pairs with atomic_dec_and_test() in + * dma_fence_array_enable_signaling() + * + * The !--num_pending check is here to account for the any_signaled case + * if we race with enable_signaling(), that means the !num_pending check + * in the is_signalling_enabled branch might be outdated (num_pending + * might have been decremented), but that's fine. The user will get the + * right value when testing again later. + */ + num_pending = atomic_read_acquire(&array->num_pending); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) { + if (num_pending <= 0) + goto signal; return false; + } + + for (i = 0; i < array->num_fences; ++i) { + if (dma_fence_is_signaled(array->fences[i]) && !--num_pending) + goto signal; + } + return false; +signal: dma_fence_array_clear_pending_error(array); return true; } From 8618f5ffba4d381610f6bb4c472a6148c2bfde96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 25 Nov 2024 20:53:07 +0100 Subject: [PATCH 059/366] bpf, lsm: Remove getlsmprop hooks BTF IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These hooks are not useful for BPF LSM currently. Furthermore a recent renaming introduced build warnings: BTFIDS vmlinux WARN: resolve_btfids: unresolved symbol bpf_lsm_task_getsecid_obj WARN: resolve_btfids: unresolved symbol bpf_lsm_current_getsecid_subj Link: https://lore.kernel.org/lkml/20241123-bpf_lsm_task_getsecid_obj-v1-1-0d0f94649e05@weissschuh.net/ Fixes: 37f670aacd48 ("lsm: use lsm_prop in security_current_getsecid") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241125-bpf_lsm_task_getsecid_obj-v2-1-c8395bde84e0@weissschuh.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_lsm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 3bc61628ab251..967492b65185f 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -375,8 +375,6 @@ BTF_ID(func, bpf_lsm_socket_socketpair) BTF_ID(func, bpf_lsm_syslog) BTF_ID(func, bpf_lsm_task_alloc) -BTF_ID(func, bpf_lsm_current_getsecid_subj) -BTF_ID(func, bpf_lsm_task_getsecid_obj) BTF_ID(func, bpf_lsm_task_prctl) BTF_ID(func, bpf_lsm_task_setscheduler) BTF_ID(func, bpf_lsm_task_to_inode) From 9f0fc98145218ff8f50d8cfa3b393785056c53e1 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 18 Nov 2024 22:03:41 +0100 Subject: [PATCH 060/366] bpf, vsock: Fix poll() missing a queue When a verdict program simply passes a packet without redirection, sk_msg is enqueued on sk_psock::ingress_msg. Add a missing check to poll(). Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Michal Luczaj Reviewed-by: Stefano Garzarella Reviewed-by: Luigi Leonardi Link: https://lore.kernel.org/r/20241118-vsock-bpf-poll-close-v1-1-f1b9669cacdc@rbox.co Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend --- net/vmw_vsock/af_vsock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 25b28b1434f5e..725da7203f2de 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1054,6 +1054,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, mask |= EPOLLRDHUP; } + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; + if (sock->type == SOCK_DGRAM) { /* For datagram sockets we can read if there is something in * the queue and write as long as the socket isn't shutdown for From 9c2a2a45136de428b73907195a4a99eb78dc3aca Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 18 Nov 2024 22:03:42 +0100 Subject: [PATCH 061/366] selftest/bpf: Add test for af_vsock poll() Verify that vsock's poll() notices when sk_psock::ingress_msg isn't empty. Signed-off-by: Michal Luczaj Acked-by: Stefano Garzarella Link: https://lore.kernel.org/r/20241118-vsock-bpf-poll-close-v1-2-f1b9669cacdc@rbox.co Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend --- .../selftests/bpf/prog_tests/sockmap_basic.c | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index a2041f8e32eb9..6a9aab2d051ac 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -937,6 +937,50 @@ static void test_sockmap_same_sock(void) test_sockmap_pass_prog__destroy(skel); } +static void test_sockmap_skb_verdict_vsock_poll(void) +{ + struct test_sockmap_pass_prog *skel; + int err, map, conn, peer; + struct bpf_program *prog; + struct bpf_link *link; + char buf = 'x'; + int zero = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (create_pair(AF_VSOCK, SOCK_STREAM, &conn, &peer)) + goto destroy; + + prog = skel->progs.prog_skb_verdict; + map = bpf_map__fd(skel->maps.sock_map_rx); + link = bpf_program__attach_sockmap(prog, map); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) + goto close; + + err = bpf_map_update_elem(map, &zero, &conn, BPF_ANY); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto detach; + + if (xsend(peer, &buf, 1, 0) != 1) + goto detach; + + err = poll_read(conn, IO_TIMEOUT_SEC); + if (!ASSERT_OK(err, "poll")) + goto detach; + + if (xrecv_nonblock(conn, &buf, 1, 0) != 1) + FAIL("xrecv_nonblock"); +detach: + bpf_link__detach(link); +close: + xclose(conn); + xclose(peer); +destroy: + test_sockmap_pass_prog__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -997,4 +1041,6 @@ void test_sockmap_basic(void) test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link")) test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap skb_verdict vsock poll")) + test_sockmap_skb_verdict_vsock_poll(); } From 135ffc7becc82cfb84936ae133da7969220b43b2 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 18 Nov 2024 22:03:43 +0100 Subject: [PATCH 062/366] bpf, vsock: Invoke proto::close on close() vsock defines a BPF callback to be invoked when close() is called. However, this callback is never actually executed. As a result, a closed vsock socket is not automatically removed from the sockmap/sockhash. Introduce a dummy vsock_close() and make vsock_release() call proto::close. Note: changes in __vsock_release() look messy, but it's only due to indent level reduction and variables xmas tree reorder. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Michal Luczaj Reviewed-by: Stefano Garzarella Reviewed-by: Luigi Leonardi Link: https://lore.kernel.org/r/20241118-vsock-bpf-poll-close-v1-3-f1b9669cacdc@rbox.co Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend --- net/vmw_vsock/af_vsock.c | 67 ++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 725da7203f2de..5cf8109f672a5 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -117,12 +117,14 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); static void vsock_sk_destruct(struct sock *sk); static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +static void vsock_close(struct sock *sk, long timeout); /* Protocol family. */ struct proto vsock_proto = { .name = "AF_VSOCK", .owner = THIS_MODULE, .obj_size = sizeof(struct vsock_sock), + .close = vsock_close, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = vsock_bpf_update_proto, #endif @@ -797,39 +799,37 @@ static bool sock_type_connectible(u16 type) static void __vsock_release(struct sock *sk, int level) { - if (sk) { - struct sock *pending; - struct vsock_sock *vsk; - - vsk = vsock_sk(sk); - pending = NULL; /* Compiler warning. */ + struct vsock_sock *vsk; + struct sock *pending; - /* When "level" is SINGLE_DEPTH_NESTING, use the nested - * version to avoid the warning "possible recursive locking - * detected". When "level" is 0, lock_sock_nested(sk, level) - * is the same as lock_sock(sk). - */ - lock_sock_nested(sk, level); + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ - if (vsk->transport) - vsk->transport->release(vsk); - else if (sock_type_connectible(sk->sk_type)) - vsock_remove_sock(vsk); + /* When "level" is SINGLE_DEPTH_NESTING, use the nested + * version to avoid the warning "possible recursive locking + * detected". When "level" is 0, lock_sock_nested(sk, level) + * is the same as lock_sock(sk). + */ + lock_sock_nested(sk, level); - sock_orphan(sk); - sk->sk_shutdown = SHUTDOWN_MASK; + if (vsk->transport) + vsk->transport->release(vsk); + else if (sock_type_connectible(sk->sk_type)) + vsock_remove_sock(vsk); - skb_queue_purge(&sk->sk_receive_queue); + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; - /* Clean up any sockets that never were accepted. */ - while ((pending = vsock_dequeue_accept(sk)) != NULL) { - __vsock_release(pending, SINGLE_DEPTH_NESTING); - sock_put(pending); - } + skb_queue_purge(&sk->sk_receive_queue); - release_sock(sk); - sock_put(sk); + /* Clean up any sockets that never were accepted. */ + while ((pending = vsock_dequeue_accept(sk)) != NULL) { + __vsock_release(pending, SINGLE_DEPTH_NESTING); + sock_put(pending); } + + release_sock(sk); + sock_put(sk); } static void vsock_sk_destruct(struct sock *sk) @@ -901,9 +901,22 @@ void vsock_data_ready(struct sock *sk) } EXPORT_SYMBOL_GPL(vsock_data_ready); +/* Dummy callback required by sockmap. + * See unconditional call of saved_close() in sock_map_close(). + */ +static void vsock_close(struct sock *sk, long timeout) +{ +} + static int vsock_release(struct socket *sock) { - __vsock_release(sock->sk, 0); + struct sock *sk = sock->sk; + + if (!sk) + return 0; + + sk->sk_prot->close(sk, 0); + __vsock_release(sk, 0); sock->sk = NULL; sock->state = SS_FREE; From 515745445e92500e8916ffe29cc4893ad97517b8 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 18 Nov 2024 22:03:44 +0100 Subject: [PATCH 063/366] selftest/bpf: Add test for vsock removal from sockmap on close() Make sure the proto::close callback gets invoked on vsock release. Signed-off-by: Michal Luczaj Acked-by: Stefano Garzarella Link: https://lore.kernel.org/r/20241118-vsock-bpf-poll-close-v1-4-f1b9669cacdc@rbox.co Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend --- .../selftests/bpf/prog_tests/sockmap_basic.c | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 6a9aab2d051ac..fdff0652d7ef5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -108,6 +108,35 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type) close(s); } +static void test_sockmap_vsock_delete_on_close(void) +{ + int err, c, p, map; + const int zero = 0; + + err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); + if (!ASSERT_OK(err, "create_pair(AF_VSOCK)")) + return; + + map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), + sizeof(int), 1, NULL); + if (!ASSERT_GE(map, 0, "bpf_map_create")) { + close(c); + goto out; + } + + err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST); + close(c); + if (!ASSERT_OK(err, "bpf_map_update")) + goto out; + + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + ASSERT_OK(err, "after close(), bpf_map_update"); + +out: + close(p); + close(map); +} + static void test_skmsg_helpers(enum bpf_map_type map_type) { struct test_skmsg_load_helpers *skel; @@ -987,6 +1016,8 @@ void test_sockmap_basic(void) test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash create_update_free")) test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap vsock delete on close")) + test_sockmap_vsock_delete_on_close(); if (test__start_subtest("sockmap sk_msg load helpers")) test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash sk_msg load helpers")) From 32cd3db7de97c0c7a018756ce66244342fd583f0 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 22 Nov 2024 13:10:29 +0100 Subject: [PATCH 064/366] xsk: fix OOB map writes when deleting elements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jordy says: " In the xsk_map_delete_elem function an unsigned integer (map->max_entries) is compared with a user-controlled signed integer (k). Due to implicit type conversion, a large unsigned value for map->max_entries can bypass the intended bounds check: if (k >= map->max_entries) return -EINVAL; This allows k to hold a negative value (between -2147483648 and -2), which is then used as an array index in m->xsk_map[k], which results in an out-of-bounds access. spin_lock_bh(&m->lock); map_entry = &m->xsk_map[k]; // Out-of-bounds map_entry old_xs = unrcu_pointer(xchg(map_entry, NULL)); // Oob write if (old_xs) xsk_map_sock_delete(old_xs, map_entry); spin_unlock_bh(&m->lock); The xchg operation can then be used to cause an out-of-bounds write. Moreover, the invalid map_entry passed to xsk_map_sock_delete can lead to further memory corruption. " It indeed results in following splat: [76612.897343] BUG: unable to handle page fault for address: ffffc8fc2e461108 [76612.904330] #PF: supervisor write access in kernel mode [76612.909639] #PF: error_code(0x0002) - not-present page [76612.914855] PGD 0 P4D 0 [76612.917431] Oops: Oops: 0002 [#1] PREEMPT SMP [76612.921859] CPU: 11 UID: 0 PID: 10318 Comm: a.out Not tainted 6.12.0-rc1+ #470 [76612.929189] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [76612.939781] RIP: 0010:xsk_map_delete_elem+0x2d/0x60 [76612.944738] Code: 00 00 41 54 55 53 48 63 2e 3b 6f 24 73 38 4c 8d a7 f8 00 00 00 48 89 fb 4c 89 e7 e8 2d bf 05 00 48 8d b4 eb 00 01 00 00 31 ff <48> 87 3e 48 85 ff 74 05 e8 16 ff ff ff 4c 89 e7 e8 3e bc 05 00 31 [76612.963774] RSP: 0018:ffffc9002e407df8 EFLAGS: 00010246 [76612.969079] RAX: 0000000000000000 RBX: ffffc9002e461000 RCX: 0000000000000000 [76612.976323] RDX: 0000000000000001 RSI: ffffc8fc2e461108 RDI: 0000000000000000 [76612.983569] RBP: ffffffff80000001 R08: 0000000000000000 R09: 0000000000000007 [76612.990812] R10: ffffc9002e407e18 R11: ffff888108a38858 R12: ffffc9002e4610f8 [76612.998060] R13: ffff888108a38858 R14: 00007ffd1ae0ac78 R15: ffffc9002e4610c0 [76613.005303] FS: 00007f80b6f59740(0000) GS:ffff8897e0ec0000(0000) knlGS:0000000000000000 [76613.013517] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [76613.019349] CR2: ffffc8fc2e461108 CR3: 000000011e3ef001 CR4: 00000000007726f0 [76613.026595] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [76613.033841] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [76613.041086] PKRU: 55555554 [76613.043842] Call Trace: [76613.046331] [76613.048468] ? __die+0x20/0x60 [76613.051581] ? page_fault_oops+0x15a/0x450 [76613.055747] ? search_extable+0x22/0x30 [76613.059649] ? search_bpf_extables+0x5f/0x80 [76613.063988] ? exc_page_fault+0xa9/0x140 [76613.067975] ? asm_exc_page_fault+0x22/0x30 [76613.072229] ? xsk_map_delete_elem+0x2d/0x60 [76613.076573] ? xsk_map_delete_elem+0x23/0x60 [76613.080914] __sys_bpf+0x19b7/0x23c0 [76613.084555] __x64_sys_bpf+0x1a/0x20 [76613.088194] do_syscall_64+0x37/0xb0 [76613.091832] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [76613.096962] RIP: 0033:0x7f80b6d1e88d [76613.100592] Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 b5 0f 00 f7 d8 64 89 01 48 [76613.119631] RSP: 002b:00007ffd1ae0ac68 EFLAGS: 00000206 ORIG_RAX: 0000000000000141 [76613.131330] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f80b6d1e88d [76613.142632] RDX: 0000000000000098 RSI: 00007ffd1ae0ad20 RDI: 0000000000000003 [76613.153967] RBP: 00007ffd1ae0adc0 R08: 0000000000000000 R09: 0000000000000000 [76613.166030] R10: 00007f80b6f77040 R11: 0000000000000206 R12: 00007ffd1ae0aed8 [76613.177130] R13: 000055ddf42ce1e9 R14: 000055ddf42d0d98 R15: 00007f80b6fab040 [76613.188129] Fix this by simply changing key type from int to u32. Fixes: fbfc504a24f5 ("bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP") CC: stable@vger.kernel.org Reported-by: Jordy Zomer Suggested-by: Jordy Zomer Reviewed-by: Toke Høiland-Jørgensen Acked-by: John Fastabend Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20241122121030.716788-2-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- net/xdp/xskmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index e1c526f97ce31..afa457506274c 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -224,7 +224,7 @@ static long xsk_map_delete_elem(struct bpf_map *map, void *key) struct xsk_map *m = container_of(map, struct xsk_map, map); struct xdp_sock __rcu **map_entry; struct xdp_sock *old_xs; - int k = *(u32 *)key; + u32 k = *(u32 *)key; if (k >= map->max_entries) return -EINVAL; From ab244dd7cf4c291f82faacdc50b45cc0f55b674d Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 22 Nov 2024 13:10:30 +0100 Subject: [PATCH 065/366] bpf: fix OOB devmap writes when deleting elements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jordy reported issue against XSKMAP which also applies to DEVMAP - the index used for accessing map entry, due to being a signed integer, causes the OOB writes. Fix is simple as changing the type from int to u32, however, when compared to XSKMAP case, one more thing needs to be addressed. When map is released from system via dev_map_free(), we iterate through all of the entries and an iterator variable is also an int, which implies OOB accesses. Again, change it to be u32. Example splat below: [ 160.724676] BUG: unable to handle page fault for address: ffffc8fc2c001000 [ 160.731662] #PF: supervisor read access in kernel mode [ 160.736876] #PF: error_code(0x0000) - not-present page [ 160.742095] PGD 0 P4D 0 [ 160.744678] Oops: Oops: 0000 [#1] PREEMPT SMP [ 160.749106] CPU: 1 UID: 0 PID: 520 Comm: kworker/u145:12 Not tainted 6.12.0-rc1+ #487 [ 160.757050] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [ 160.767642] Workqueue: events_unbound bpf_map_free_deferred [ 160.773308] RIP: 0010:dev_map_free+0x77/0x170 [ 160.777735] Code: 00 e8 fd 91 ed ff e8 b8 73 ed ff 41 83 7d 18 19 74 6e 41 8b 45 24 49 8b bd f8 00 00 00 31 db 85 c0 74 48 48 63 c3 48 8d 04 c7 <48> 8b 28 48 85 ed 74 30 48 8b 7d 18 48 85 ff 74 05 e8 b3 52 fa ff [ 160.796777] RSP: 0018:ffffc9000ee1fe38 EFLAGS: 00010202 [ 160.802086] RAX: ffffc8fc2c001000 RBX: 0000000080000000 RCX: 0000000000000024 [ 160.809331] RDX: 0000000000000000 RSI: 0000000000000024 RDI: ffffc9002c001000 [ 160.816576] RBP: 0000000000000000 R08: 0000000000000023 R09: 0000000000000001 [ 160.823823] R10: 0000000000000001 R11: 00000000000ee6b2 R12: dead000000000122 [ 160.831066] R13: ffff88810c928e00 R14: ffff8881002df405 R15: 0000000000000000 [ 160.838310] FS: 0000000000000000(0000) GS:ffff8897e0c40000(0000) knlGS:0000000000000000 [ 160.846528] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 160.852357] CR2: ffffc8fc2c001000 CR3: 0000000005c32006 CR4: 00000000007726f0 [ 160.859604] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 160.866847] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 160.874092] PKRU: 55555554 [ 160.876847] Call Trace: [ 160.879338] [ 160.881477] ? __die+0x20/0x60 [ 160.884586] ? page_fault_oops+0x15a/0x450 [ 160.888746] ? search_extable+0x22/0x30 [ 160.892647] ? search_bpf_extables+0x5f/0x80 [ 160.896988] ? exc_page_fault+0xa9/0x140 [ 160.900973] ? asm_exc_page_fault+0x22/0x30 [ 160.905232] ? dev_map_free+0x77/0x170 [ 160.909043] ? dev_map_free+0x58/0x170 [ 160.912857] bpf_map_free_deferred+0x51/0x90 [ 160.917196] process_one_work+0x142/0x370 [ 160.921272] worker_thread+0x29e/0x3b0 [ 160.925082] ? rescuer_thread+0x4b0/0x4b0 [ 160.929157] kthread+0xd4/0x110 [ 160.932355] ? kthread_park+0x80/0x80 [ 160.936079] ret_from_fork+0x2d/0x50 [ 160.943396] ? kthread_park+0x80/0x80 [ 160.950803] ret_from_fork_asm+0x11/0x20 [ 160.958482] Fixes: 546ac1ffb70d ("bpf: add devmap, a map for storing net device references") CC: stable@vger.kernel.org Reported-by: Jordy Zomer Suggested-by: Jordy Zomer Reviewed-by: Toke Høiland-Jørgensen Acked-by: John Fastabend Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20241122121030.716788-3-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/devmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 7878be18e9d26..3aa002a47a966 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -184,7 +184,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) static void dev_map_free(struct bpf_map *map) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int i; + u32 i; /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, * so the programs (can be more than one that used this map) were @@ -821,7 +821,7 @@ static long dev_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *old_dev; - int k = *(u32 *)key; + u32 k = *(u32 *)key; if (k >= map->max_entries) return -EINVAL; @@ -838,7 +838,7 @@ static long dev_map_hash_delete_elem(struct bpf_map *map, void *key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *old_dev; - int k = *(u32 *)key; + u32 k = *(u32 *)key; unsigned long flags; int ret = -ENOENT; From ac9a48a6f1610b094072b815e884e1668aea4401 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 22 Nov 2024 12:29:09 +0100 Subject: [PATCH 066/366] xsk: always clear DMA mapping information when unmapping the pool When the umem is shared, the DMA mapping is also shared between the xsk pools, therefore it should stay valid as long as at least 1 user remains. However, the pool also keeps the copies of DMA-related information that are initialized in the same way in xp_init_dma_info(), but cleared by xp_dma_unmap() only for the last remaining pool, this causes the problems below. The first one is that the commit adbf5a42341f ("ice: remove af_xdp_zc_qps bitmap") relies on pool->dev to determine the presence of a ZC pool on a given queue, avoiding internal bookkeeping. This works perfectly fine if the UMEM is not shared, but reliably fails otherwise as stated in the linked report. The second one is pool->dma_pages which is dynamically allocated and only freed in xp_dma_unmap(), this leads to a small memory leak. kmemleak does not catch it, but by printing the allocation results after terminating the userspace program it is possible to see that all addresses except the one belonging to the last detached pool are still accessible through the kmemleak dump functionality. Always clear the DMA mapping information from the pool and free pool->dma_pages when unmapping the pool, so that the only difference between results of the last remaining user's call and the ones before would be the destruction of the DMA mapping. Fixes: adbf5a42341f ("ice: remove af_xdp_zc_qps bitmap") Fixes: 921b68692abb ("xsk: Enable sharing of dma mappings") Reported-by: Alasdair McWilliam Closes: https://lore.kernel.org/PA4P194MB10056F208AF221D043F57A3D86512@PA4P194MB1005.EURP194.PROD.OUTLOOK.COM Acked-by: Maciej Fijalkowski Signed-off-by: Larysa Zaremba Link: https://lore.kernel.org/r/20241122112912.89881-1-larysa.zaremba@intel.com Signed-off-by: Alexei Starovoitov --- net/xdp/xsk_buff_pool.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index ae71da7d2cd6a..1f7975b496579 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -387,10 +387,9 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) return; } - if (!refcount_dec_and_test(&dma_map->users)) - return; + if (refcount_dec_and_test(&dma_map->users)) + __xp_dma_unmap(dma_map, attrs); - __xp_dma_unmap(dma_map, attrs); kvfree(pool->dma_pages); pool->dma_pages = NULL; pool->dma_pages_cnt = 0; From ef3ba8c258ee368a5343fa9329df85b4bcb9e8b5 Mon Sep 17 00:00:00 2001 From: Amir Mohammadi Date: Thu, 21 Nov 2024 12:04:13 +0330 Subject: [PATCH 067/366] bpftool: fix potential NULL pointer dereferencing in prog_dump() A NULL pointer dereference could occur if ksyms is not properly checked before usage in the prog_dump() function. Fixes: b053b439b72a ("bpf: libbpf: bpftool: Print bpf_line_info during prog dump") Signed-off-by: Amir Mohammadi Reviewed-by: Quentin Monnet Acked-by: John Fastabend Link: https://lore.kernel.org/r/20241121083413.7214-1-amiremohamadi@yahoo.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/prog.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2ff949ea82fa6..e71be67f1d865 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -822,11 +822,18 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, printf("%s:\n", sym_name); } - if (disasm_print_insn(img, lens[i], opcodes, - name, disasm_opt, btf, - prog_linfo, ksyms[i], i, - linum)) - goto exit_free; + if (ksyms) { + if (disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + prog_linfo, ksyms[i], i, + linum)) + goto exit_free; + } else { + if (disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + NULL, 0, 0, false)) + goto exit_free; + } img += lens[i]; From ee1dfbdd8b4b6de85e96ae2059dc9c1bdb6b49b5 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 21 Nov 2024 11:08:25 +0100 Subject: [PATCH 068/366] can: dev: can_set_termination(): allow sleeping GPIOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 6e86a1543c37 ("can: dev: provide optional GPIO based termination support") GPIO based termination support was added. For no particular reason that patch uses gpiod_set_value() to set the GPIO. This leads to the following warning, if the systems uses a sleeping GPIO, i.e. behind an I2C port expander: | WARNING: CPU: 0 PID: 379 at /drivers/gpio/gpiolib.c:3496 gpiod_set_value+0x50/0x6c | CPU: 0 UID: 0 PID: 379 Comm: ip Not tainted 6.11.0-20241016-1 #1 823affae360cc91126e4d316d7a614a8bf86236c Replace gpiod_set_value() by gpiod_set_value_cansleep() to allow the use of sleeping GPIOs. Cc: Nicolai Buchwitz Cc: Lino Sanfilippo Cc: stable@vger.kernel.org Reported-by: Leonard Göhrs Tested-by: Leonard Göhrs Fixes: 6e86a1543c37 ("can: dev: provide optional GPIO based termination support") Link: https://patch.msgid.link/20241121-dev-fix-can_set_termination-v1-1-41fa6e29216d@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 6792c14fd7eb0..681643ab37804 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -468,7 +468,7 @@ static int can_set_termination(struct net_device *ndev, u16 term) else set = 0; - gpiod_set_value(priv->termination_gpio, set); + gpiod_set_value_cansleep(priv->termination_gpio, set); return 0; } From 889b2ae9139a87b3390f7003cb1bb3d65bf90a26 Mon Sep 17 00:00:00 2001 From: Alexander Kozhinov Date: Fri, 18 Oct 2024 23:24:26 +0200 Subject: [PATCH 069/366] can: gs_usb: add usb endpoint address detection at driver probe step There is an approach made to implement gs_usb firmware/driver based on Zephyr RTOS. It was found that USB stack of Zephyr RTOS overwrites USB EP addresses, if they have different last 4 bytes in absence of other endpoints. For example in case of gs_usb candlelight firmware EP-IN is 0x81 and EP-OUT 0x02. If there are no additional USB endpoints, Zephyr RTOS will overwrite EP-OUT to 0x01. More information can be found in the discussion with Zephyr RTOS USB stack maintainer here: https://github.com/zephyrproject-rtos/zephyr/issues/67812 There are already two different gs_usb FW driver implementations based on Zephyr RTOS: 1. https://github.com/CANnectivity/cannectivity (by: https://github.com/henrikbrixandersen) 2. https://github.com/zephyrproject-rtos/zephyr/compare/main...KozhinovAlexander:zephyr:gs_usb (by: https://github.com/KozhinovAlexander) At the moment both Zephyr RTOS implementations use dummy USB endpoint, to overcome described USB stack behavior from Zephyr itself. Since Zephyr RTOS is intended to be used on microcontrollers with very constrained amount of resources (ROM, RAM) and additional endpoint requires memory, it is more convenient to update the gs_usb driver in the Linux kernel. To fix this problem, update the gs_usb driver from using hard coded endpoint numbers to evaluate the endpoint descriptors and use the endpoints provided there. Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Reviewed-by: Vincent Mailhol Signed-off-by: Alexander Kozhinov Link: https://patch.msgid.link/20241018212450.31746-1-ak.alexander.kozhinov@gmail.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index bc86e9b329fd1..b6f4de375df75 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -43,9 +43,6 @@ #define USB_XYLANTA_SAINT3_VENDOR_ID 0x16d0 #define USB_XYLANTA_SAINT3_PRODUCT_ID 0x0f30 -#define GS_USB_ENDPOINT_IN 1 -#define GS_USB_ENDPOINT_OUT 2 - /* Timestamp 32 bit timer runs at 1 MHz (1 µs tick). Worker accounts * for timer overflow (will be after ~71 minutes) */ @@ -336,6 +333,9 @@ struct gs_usb { unsigned int hf_size_rx; u8 active_channels; + + unsigned int pipe_in; + unsigned int pipe_out; }; /* 'allocate' a tx context. @@ -687,7 +687,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) resubmit_urb: usb_fill_bulk_urb(urb, parent->udev, - usb_rcvbulkpipe(parent->udev, GS_USB_ENDPOINT_IN), + parent->pipe_in, hf, dev->parent->hf_size_rx, gs_usb_receive_bulk_callback, parent); @@ -819,7 +819,7 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, } usb_fill_bulk_urb(urb, dev->udev, - usb_sndbulkpipe(dev->udev, GS_USB_ENDPOINT_OUT), + dev->parent->pipe_out, hf, dev->hf_size_tx, gs_usb_xmit_callback, txc); @@ -925,8 +925,7 @@ static int gs_can_open(struct net_device *netdev) /* fill, anchor, and submit rx urb */ usb_fill_bulk_urb(urb, dev->udev, - usb_rcvbulkpipe(dev->udev, - GS_USB_ENDPOINT_IN), + dev->parent->pipe_in, buf, dev->parent->hf_size_rx, gs_usb_receive_bulk_callback, parent); @@ -1413,6 +1412,7 @@ static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); + struct usb_endpoint_descriptor *ep_in, *ep_out; struct gs_host_frame *hf; struct gs_usb *parent; struct gs_host_config hconf = { @@ -1422,6 +1422,13 @@ static int gs_usb_probe(struct usb_interface *intf, unsigned int icount, i; int rc; + rc = usb_find_common_endpoints(intf->cur_altsetting, + &ep_in, &ep_out, NULL, NULL); + if (rc) { + dev_err(&intf->dev, "Required endpoints not found\n"); + return rc; + } + /* send host config */ rc = usb_control_msg_send(udev, 0, GS_USB_BREQ_HOST_FORMAT, @@ -1466,6 +1473,10 @@ static int gs_usb_probe(struct usb_interface *intf, usb_set_intfdata(intf, parent); parent->udev = udev; + /* store the detected endpoints */ + parent->pipe_in = usb_rcvbulkpipe(parent->udev, ep_in->bEndpointAddress); + parent->pipe_out = usb_sndbulkpipe(parent->udev, ep_out->bEndpointAddress); + for (i = 0; i < icount; i++) { unsigned int hf_size_rx = 0; From 9e66242504f49e17481d8e197730faba7d99c934 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:42 +0100 Subject: [PATCH 070/366] can: c_can: c_can_handle_bus_err(): update statistics if skb allocation fails Ensure that the statistics are always updated, even if the skb allocation fails. Fixes: 4d6d26537940 ("can: c_can: fix {rx,tx}_errors statistics") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-2-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can_main.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/can/c_can/c_can_main.c b/drivers/net/can/c_can/c_can_main.c index 511615dc33419..cc371d0c9f3c7 100644 --- a/drivers/net/can/c_can/c_can_main.c +++ b/drivers/net/can/c_can/c_can_main.c @@ -1014,49 +1014,57 @@ static int c_can_handle_bus_err(struct net_device *dev, /* propagate the error condition to the CAN stack */ skb = alloc_can_err_skb(dev, &cf); - if (unlikely(!skb)) - return 0; /* check for 'last error code' which tells us the * type of the last error to occur on the CAN bus */ - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; + if (likely(skb)) + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; switch (lec_type) { case LEC_STUFF_ERROR: netdev_dbg(dev, "stuff error\n"); - cf->data[2] |= CAN_ERR_PROT_STUFF; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_STUFF; stats->rx_errors++; break; case LEC_FORM_ERROR: netdev_dbg(dev, "form error\n"); - cf->data[2] |= CAN_ERR_PROT_FORM; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_FORM; stats->rx_errors++; break; case LEC_ACK_ERROR: netdev_dbg(dev, "ack error\n"); - cf->data[3] = CAN_ERR_PROT_LOC_ACK; + if (likely(skb)) + cf->data[3] = CAN_ERR_PROT_LOC_ACK; stats->tx_errors++; break; case LEC_BIT1_ERROR: netdev_dbg(dev, "bit1 error\n"); - cf->data[2] |= CAN_ERR_PROT_BIT1; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_BIT1; stats->tx_errors++; break; case LEC_BIT0_ERROR: netdev_dbg(dev, "bit0 error\n"); - cf->data[2] |= CAN_ERR_PROT_BIT0; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_BIT0; stats->tx_errors++; break; case LEC_CRC_ERROR: netdev_dbg(dev, "CRC error\n"); - cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; + if (likely(skb)) + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; stats->rx_errors++; break; default: break; } + if (unlikely(!skb)) + return 0; + netif_receive_skb(skb); return 1; } From ee6bf3677ae03569d833795064e17f605c2163c7 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:43 +0100 Subject: [PATCH 071/366] can: sun4i_can: sun4i_can_err(): call can_change_state() even if cf is NULL Call the function can_change_state() if the allocation of the skb fails, as it handles the cf parameter when it is null. Additionally, this ensures that the statistics related to state error counters (i. e. warning, passive, and bus-off) are updated. Fixes: 0738eff14d81 ("can: Allwinner A10/A20 CAN Controller support - Kernel module") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-3-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sun4i_can.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index 360158c295d34..17f94cca93fbc 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -629,10 +629,10 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status) tx_state = txerr >= rxerr ? state : 0; rx_state = txerr <= rxerr ? state : 0; - if (likely(skb)) - can_change_state(dev, cf, tx_state, rx_state); - else - priv->can.state = state; + /* The skb allocation might fail, but can_change_state() + * handles cf == NULL. + */ + can_change_state(dev, cf, tx_state, rx_state); if (state == CAN_STATE_BUS_OFF) can_bus_off(dev); } From 9ad86d377ef4a19c75a9c639964879a5b25a433b Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:45 +0100 Subject: [PATCH 072/366] can: hi311x: hi3110_can_ist(): fix potential use-after-free The commit a22bd630cfff ("can: hi311x: do not report txerr and rxerr during bus-off") removed the reporting of rxerr and txerr even in case of correct operation (i. e. not bus-off). The error count information added to the CAN frame after netif_rx() is a potential use after free, since there is no guarantee that the skb is in the same state. It might be freed or reused. Fix the issue by postponing the netif_rx() call in case of txerr and rxerr reporting. Fixes: a22bd630cfff ("can: hi311x: do not report txerr and rxerr during bus-off") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-5-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/hi311x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index 148d974ebb210..b67464df25ffe 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -671,9 +671,9 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) tx_state = txerr >= rxerr ? new_state : 0; rx_state = txerr <= rxerr ? new_state : 0; can_change_state(net, cf, tx_state, rx_state); - netif_rx(skb); if (new_state == CAN_STATE_BUS_OFF) { + netif_rx(skb); can_bus_off(net); if (priv->can.restart_ms == 0) { priv->force_quit = 1; @@ -684,6 +684,7 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) cf->can_id |= CAN_ERR_CNT; cf->data[6] = txerr; cf->data[7] = rxerr; + netif_rx(skb); } } From ef5034aed9e03c649386f50e67a0867062d00d7f Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:46 +0100 Subject: [PATCH 073/366] can: hi311x: hi3110_can_ist(): update state error statistics if skb allocation fails Ensure that the statistics related to state error counters (i. e. warning, passive, and bus-off) are updated even in case the skb allocation fails. Additionally, also handle bus-off state is now. Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-6-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/hi311x.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index b67464df25ffe..25d9b32f57011 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -663,8 +663,6 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) u8 rxerr, txerr; skb = alloc_can_err_skb(net, &cf); - if (!skb) - break; txerr = hi3110_read(spi, HI3110_READ_TEC); rxerr = hi3110_read(spi, HI3110_READ_REC); @@ -673,14 +671,15 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) can_change_state(net, cf, tx_state, rx_state); if (new_state == CAN_STATE_BUS_OFF) { - netif_rx(skb); + if (skb) + netif_rx(skb); can_bus_off(net); if (priv->can.restart_ms == 0) { priv->force_quit = 1; hi3110_hw_sleep(spi); break; } - } else { + } else if (skb) { cf->can_id |= CAN_ERR_CNT; cf->data[6] = txerr; cf->data[7] = rxerr; From 988d4222bf9039a875a3d48f2fe35c317831ff68 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:47 +0100 Subject: [PATCH 074/366] can: m_can: m_can_handle_lec_err(): fix {rx,tx}_errors statistics The m_can_handle_lec_err() function was incorrectly incrementing only the receive error counter, even in cases of bit or acknowledgment errors that occur during transmission. Fix the issue by incrementing the appropriate counter based on the type of error. Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-7-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 16e9e7d7527d9..533bcb77c9f93 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -695,47 +695,60 @@ static int m_can_handle_lec_err(struct net_device *dev, u32 timestamp = 0; cdev->can.can_stats.bus_error++; - stats->rx_errors++; /* propagate the error condition to the CAN stack */ skb = alloc_can_err_skb(dev, &cf); - if (unlikely(!skb)) - return 0; /* check for 'last error code' which tells us the * type of the last error to occur on the CAN bus */ - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; + if (likely(skb)) + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; switch (lec_type) { case LEC_STUFF_ERROR: netdev_dbg(dev, "stuff error\n"); - cf->data[2] |= CAN_ERR_PROT_STUFF; + stats->rx_errors++; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_STUFF; break; case LEC_FORM_ERROR: netdev_dbg(dev, "form error\n"); - cf->data[2] |= CAN_ERR_PROT_FORM; + stats->rx_errors++; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_FORM; break; case LEC_ACK_ERROR: netdev_dbg(dev, "ack error\n"); - cf->data[3] = CAN_ERR_PROT_LOC_ACK; + stats->tx_errors++; + if (likely(skb)) + cf->data[3] = CAN_ERR_PROT_LOC_ACK; break; case LEC_BIT1_ERROR: netdev_dbg(dev, "bit1 error\n"); - cf->data[2] |= CAN_ERR_PROT_BIT1; + stats->tx_errors++; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_BIT1; break; case LEC_BIT0_ERROR: netdev_dbg(dev, "bit0 error\n"); - cf->data[2] |= CAN_ERR_PROT_BIT0; + stats->tx_errors++; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_BIT0; break; case LEC_CRC_ERROR: netdev_dbg(dev, "CRC error\n"); - cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; + stats->rx_errors++; + if (likely(skb)) + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; break; default: break; } + if (unlikely(!skb)) + return 0; + if (cdev->is_peripheral) timestamp = m_can_get_timestamp(cdev); From bb03d568bb21b4afe7935d1943bcf68ddea3ea45 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:48 +0100 Subject: [PATCH 075/366] can: ifi_canfd: ifi_canfd_handle_lec_err(): fix {rx,tx}_errors statistics The ifi_canfd_handle_lec_err() function was incorrectly incrementing only the receive error counter, even in cases of bit or acknowledgment errors that occur during transmission. Fix the issue by incrementing the appropriate counter based on the type of error. Fixes: 5bbd655a8bd0 ("can: ifi: Add more detailed error reporting") Signed-off-by: Dario Binacchi Reviewed-by: Marek Vasut Link: https://patch.msgid.link/20241122221650.633981-8-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ifi_canfd/ifi_canfd.c | 58 ++++++++++++++++++--------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index d32b10900d2f6..c86b57d47085f 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -390,36 +390,55 @@ static int ifi_canfd_handle_lec_err(struct net_device *ndev) return 0; priv->can.can_stats.bus_error++; - stats->rx_errors++; /* Propagate the error condition to the CAN stack. */ skb = alloc_can_err_skb(ndev, &cf); - if (unlikely(!skb)) - return 0; /* Read the error counter register and check for new errors. */ - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; + if (likely(skb)) + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - if (errctr & IFI_CANFD_ERROR_CTR_OVERLOAD_FIRST) - cf->data[2] |= CAN_ERR_PROT_OVERLOAD; + if (errctr & IFI_CANFD_ERROR_CTR_OVERLOAD_FIRST) { + stats->rx_errors++; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_OVERLOAD; + } - if (errctr & IFI_CANFD_ERROR_CTR_ACK_ERROR_FIRST) - cf->data[3] = CAN_ERR_PROT_LOC_ACK; + if (errctr & IFI_CANFD_ERROR_CTR_ACK_ERROR_FIRST) { + stats->tx_errors++; + if (likely(skb)) + cf->data[3] = CAN_ERR_PROT_LOC_ACK; + } - if (errctr & IFI_CANFD_ERROR_CTR_BIT0_ERROR_FIRST) - cf->data[2] |= CAN_ERR_PROT_BIT0; + if (errctr & IFI_CANFD_ERROR_CTR_BIT0_ERROR_FIRST) { + stats->tx_errors++; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_BIT0; + } - if (errctr & IFI_CANFD_ERROR_CTR_BIT1_ERROR_FIRST) - cf->data[2] |= CAN_ERR_PROT_BIT1; + if (errctr & IFI_CANFD_ERROR_CTR_BIT1_ERROR_FIRST) { + stats->tx_errors++; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_BIT1; + } - if (errctr & IFI_CANFD_ERROR_CTR_STUFF_ERROR_FIRST) - cf->data[2] |= CAN_ERR_PROT_STUFF; + if (errctr & IFI_CANFD_ERROR_CTR_STUFF_ERROR_FIRST) { + stats->rx_errors++; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_STUFF; + } - if (errctr & IFI_CANFD_ERROR_CTR_CRC_ERROR_FIRST) - cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; + if (errctr & IFI_CANFD_ERROR_CTR_CRC_ERROR_FIRST) { + stats->rx_errors++; + if (likely(skb)) + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; + } - if (errctr & IFI_CANFD_ERROR_CTR_FORM_ERROR_FIRST) - cf->data[2] |= CAN_ERR_PROT_FORM; + if (errctr & IFI_CANFD_ERROR_CTR_FORM_ERROR_FIRST) { + stats->rx_errors++; + if (likely(skb)) + cf->data[2] |= CAN_ERR_PROT_FORM; + } /* Reset the error counter, ack the IRQ and re-enable the counter. */ writel(IFI_CANFD_ERROR_CTR_ER_RESET, priv->base + IFI_CANFD_ERROR_CTR); @@ -427,6 +446,9 @@ static int ifi_canfd_handle_lec_err(struct net_device *ndev) priv->base + IFI_CANFD_INTERRUPT); writel(IFI_CANFD_ERROR_CTR_ER_ENABLE, priv->base + IFI_CANFD_ERROR_CTR); + if (unlikely(!skb)) + return 0; + netif_receive_skb(skb); return 1; From 3e4645931655776e757f9fb5ae29371cd7cb21a2 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:49 +0100 Subject: [PATCH 076/366] can: hi311x: hi3110_can_ist(): fix {rx,tx}_errors statistics The hi3110_can_ist() function was incorrectly incrementing only the receive error counter, even in cases of bit or acknowledgment errors that occur during transmission. The fix the issue by incrementing the appropriate counter based on the type of error. Fixes: 57e83fb9b746 ("can: hi311x: Add Holt HI-311x CAN driver") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-9-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/hi311x.c | 47 ++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index 25d9b32f57011..09ae218315d73 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -696,27 +696,38 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) /* Check for protocol errors */ if (eflag & HI3110_ERR_PROTOCOL_MASK) { skb = alloc_can_err_skb(net, &cf); - if (!skb) - break; + if (skb) + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; priv->can.can_stats.bus_error++; - priv->net->stats.rx_errors++; - if (eflag & HI3110_ERR_BITERR) - cf->data[2] |= CAN_ERR_PROT_BIT; - else if (eflag & HI3110_ERR_FRMERR) - cf->data[2] |= CAN_ERR_PROT_FORM; - else if (eflag & HI3110_ERR_STUFERR) - cf->data[2] |= CAN_ERR_PROT_STUFF; - else if (eflag & HI3110_ERR_CRCERR) - cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ; - else if (eflag & HI3110_ERR_ACKERR) - cf->data[3] |= CAN_ERR_PROT_LOC_ACK; - - cf->data[6] = hi3110_read(spi, HI3110_READ_TEC); - cf->data[7] = hi3110_read(spi, HI3110_READ_REC); + if (eflag & HI3110_ERR_BITERR) { + priv->net->stats.tx_errors++; + if (skb) + cf->data[2] |= CAN_ERR_PROT_BIT; + } else if (eflag & HI3110_ERR_FRMERR) { + priv->net->stats.rx_errors++; + if (skb) + cf->data[2] |= CAN_ERR_PROT_FORM; + } else if (eflag & HI3110_ERR_STUFERR) { + priv->net->stats.rx_errors++; + if (skb) + cf->data[2] |= CAN_ERR_PROT_STUFF; + } else if (eflag & HI3110_ERR_CRCERR) { + priv->net->stats.rx_errors++; + if (skb) + cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ; + } else if (eflag & HI3110_ERR_ACKERR) { + priv->net->stats.tx_errors++; + if (skb) + cf->data[3] |= CAN_ERR_PROT_LOC_ACK; + } + netdev_dbg(priv->net, "Bus Error\n"); - netif_rx(skb); + if (skb) { + cf->data[6] = hi3110_read(spi, HI3110_READ_TEC); + cf->data[7] = hi3110_read(spi, HI3110_READ_REC); + netif_rx(skb); + } } } From 2c4ef3af4b028a0eaaf378df511d3b425b1df61f Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:50 +0100 Subject: [PATCH 077/366] can: sja1000: sja1000_err(): fix {rx,tx}_errors statistics The sja1000_err() function only incremented the receive error counter and never the transmit error counter, even if the ECC_DIR flag reported that an error had occurred during transmission. Increment the receive/transmit error counter based on the value of the ECC_DIR flag. Fixes: 429da1cc841b ("can: Driver for the SJA1000 CAN controller") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-10-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/sja1000.c | 67 ++++++++++++++++++------------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index ddb3247948ad2..4d245857ef1ce 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -416,8 +416,6 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) int ret = 0; skb = alloc_can_err_skb(dev, &cf); - if (skb == NULL) - return -ENOMEM; txerr = priv->read_reg(priv, SJA1000_TXERR); rxerr = priv->read_reg(priv, SJA1000_RXERR); @@ -425,8 +423,11 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) if (isrc & IRQ_DOI) { /* data overrun interrupt */ netdev_dbg(dev, "data overrun interrupt\n"); - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; + if (skb) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; + } + stats->rx_over_errors++; stats->rx_errors++; sja1000_write_cmdreg(priv, CMD_CDO); /* clear bit */ @@ -452,7 +453,7 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) else state = CAN_STATE_ERROR_ACTIVE; } - if (state != CAN_STATE_BUS_OFF) { + if (state != CAN_STATE_BUS_OFF && skb) { cf->can_id |= CAN_ERR_CNT; cf->data[6] = txerr; cf->data[7] = rxerr; @@ -460,33 +461,38 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) if (isrc & IRQ_BEI) { /* bus error interrupt */ priv->can.can_stats.bus_error++; - stats->rx_errors++; ecc = priv->read_reg(priv, SJA1000_ECC); + if (skb) { + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - - /* set error type */ - switch (ecc & ECC_MASK) { - case ECC_BIT: - cf->data[2] |= CAN_ERR_PROT_BIT; - break; - case ECC_FORM: - cf->data[2] |= CAN_ERR_PROT_FORM; - break; - case ECC_STUFF: - cf->data[2] |= CAN_ERR_PROT_STUFF; - break; - default: - break; - } + /* set error type */ + switch (ecc & ECC_MASK) { + case ECC_BIT: + cf->data[2] |= CAN_ERR_PROT_BIT; + break; + case ECC_FORM: + cf->data[2] |= CAN_ERR_PROT_FORM; + break; + case ECC_STUFF: + cf->data[2] |= CAN_ERR_PROT_STUFF; + break; + default: + break; + } - /* set error location */ - cf->data[3] = ecc & ECC_SEG; + /* set error location */ + cf->data[3] = ecc & ECC_SEG; + } /* Error occurred during transmission? */ - if ((ecc & ECC_DIR) == 0) - cf->data[2] |= CAN_ERR_PROT_TX; + if ((ecc & ECC_DIR) == 0) { + stats->tx_errors++; + if (skb) + cf->data[2] |= CAN_ERR_PROT_TX; + } else { + stats->rx_errors++; + } } if (isrc & IRQ_EPI) { /* error passive interrupt */ @@ -502,8 +508,10 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) netdev_dbg(dev, "arbitration lost interrupt\n"); alc = priv->read_reg(priv, SJA1000_ALC); priv->can.can_stats.arbitration_lost++; - cf->can_id |= CAN_ERR_LOSTARB; - cf->data[0] = alc & 0x1f; + if (skb) { + cf->can_id |= CAN_ERR_LOSTARB; + cf->data[0] = alc & 0x1f; + } } if (state != priv->can.state) { @@ -516,6 +524,9 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) can_bus_off(dev); } + if (!skb) + return -ENOMEM; + netif_rx(skb); return ret; From 595a81988a6fe06eb5849e972c8b9cb21c4e0d54 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:51 +0100 Subject: [PATCH 078/366] can: sun4i_can: sun4i_can_err(): fix {rx,tx}_errors statistics The sun4i_can_err() function only incremented the receive error counter and never the transmit error counter, even if the STA_ERR_DIR flag reported that an error had occurred during transmission. Increment the receive/transmit error counter based on the value of the STA_ERR_DIR flag. Fixes: 0738eff14d81 ("can: Allwinner A10/A20 CAN Controller support - Kernel module") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-11-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sun4i_can.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index 17f94cca93fbc..4311c1f0eafd8 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -579,11 +579,9 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status) /* bus error interrupt */ netdev_dbg(dev, "bus error interrupt\n"); priv->can.can_stats.bus_error++; - stats->rx_errors++; + ecc = readl(priv->base + SUN4I_REG_STA_ADDR); if (likely(skb)) { - ecc = readl(priv->base + SUN4I_REG_STA_ADDR); - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; switch (ecc & SUN4I_STA_MASK_ERR) { @@ -601,9 +599,15 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status) >> 16; break; } - /* error occurred during transmission? */ - if ((ecc & SUN4I_STA_ERR_DIR) == 0) + } + + /* error occurred during transmission? */ + if ((ecc & SUN4I_STA_ERR_DIR) == 0) { + if (likely(skb)) cf->data[2] |= CAN_ERR_PROT_TX; + stats->tx_errors++; + } else { + stats->rx_errors++; } } if (isrc & SUN4I_INT_ERR_PASSIVE) { From 72a7e2e74b3075959f05e622bae09b115957dffe Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:52 +0100 Subject: [PATCH 079/366] can: ems_usb: ems_usb_rx_err(): fix {rx,tx}_errors statistics The ems_usb_rx_err() function only incremented the receive error counter and never the transmit error counter, even if the ECC_DIR flag reported that an error had occurred during transmission. Increment the receive/transmit error counter based on the value of the ECC_DIR flag. Fixes: 702171adeed3 ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-12-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 58 ++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 050c0b49938a4..5355bac4dccbe 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -335,15 +335,14 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) struct net_device_stats *stats = &dev->netdev->stats; skb = alloc_can_err_skb(dev->netdev, &cf); - if (skb == NULL) - return; if (msg->type == CPC_MSG_TYPE_CAN_STATE) { u8 state = msg->msg.can_state; if (state & SJA1000_SR_BS) { dev->can.state = CAN_STATE_BUS_OFF; - cf->can_id |= CAN_ERR_BUSOFF; + if (skb) + cf->can_id |= CAN_ERR_BUSOFF; dev->can.can_stats.bus_off++; can_bus_off(dev->netdev); @@ -361,44 +360,53 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) /* bus error interrupt */ dev->can.can_stats.bus_error++; - stats->rx_errors++; - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; + if (skb) { + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - switch (ecc & SJA1000_ECC_MASK) { - case SJA1000_ECC_BIT: - cf->data[2] |= CAN_ERR_PROT_BIT; - break; - case SJA1000_ECC_FORM: - cf->data[2] |= CAN_ERR_PROT_FORM; - break; - case SJA1000_ECC_STUFF: - cf->data[2] |= CAN_ERR_PROT_STUFF; - break; - default: - cf->data[3] = ecc & SJA1000_ECC_SEG; - break; + switch (ecc & SJA1000_ECC_MASK) { + case SJA1000_ECC_BIT: + cf->data[2] |= CAN_ERR_PROT_BIT; + break; + case SJA1000_ECC_FORM: + cf->data[2] |= CAN_ERR_PROT_FORM; + break; + case SJA1000_ECC_STUFF: + cf->data[2] |= CAN_ERR_PROT_STUFF; + break; + default: + cf->data[3] = ecc & SJA1000_ECC_SEG; + break; + } } /* Error occurred during transmission? */ - if ((ecc & SJA1000_ECC_DIR) == 0) - cf->data[2] |= CAN_ERR_PROT_TX; + if ((ecc & SJA1000_ECC_DIR) == 0) { + stats->tx_errors++; + if (skb) + cf->data[2] |= CAN_ERR_PROT_TX; + } else { + stats->rx_errors++; + } - if (dev->can.state == CAN_STATE_ERROR_WARNING || - dev->can.state == CAN_STATE_ERROR_PASSIVE) { + if (skb && (dev->can.state == CAN_STATE_ERROR_WARNING || + dev->can.state == CAN_STATE_ERROR_PASSIVE)) { cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (txerr > rxerr) ? CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE; } } else if (msg->type == CPC_MSG_TYPE_OVERRUN) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; + if (skb) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; + } stats->rx_over_errors++; stats->rx_errors++; } - netif_rx(skb); + if (skb) + netif_rx(skb); } /* From d7b916540c2ba3d2a88c27b2a6287b39d8eac052 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:53 +0100 Subject: [PATCH 080/366] can: f81604: f81604_handle_can_bus_errors(): fix {rx,tx}_errors statistics The f81604_handle_can_bus_errors() function only incremented the receive error counter and never the transmit error counter, even if the ECC_DIR flag reported that an error had occurred during transmission. Increment the receive/transmit error counter based on the value of the ECC_DIR flag. Fixes: 88da17436973 ("can: usb: f81604: add Fintek F81604 support") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-13-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/f81604.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/f81604.c b/drivers/net/can/usb/f81604.c index bc0c8903fe779..e0cfa1460b0b8 100644 --- a/drivers/net/can/usb/f81604.c +++ b/drivers/net/can/usb/f81604.c @@ -526,7 +526,6 @@ static void f81604_handle_can_bus_errors(struct f81604_port_priv *priv, netdev_dbg(netdev, "bus error interrupt\n"); priv->can.can_stats.bus_error++; - stats->rx_errors++; if (skb) { cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; @@ -548,10 +547,15 @@ static void f81604_handle_can_bus_errors(struct f81604_port_priv *priv, /* set error location */ cf->data[3] = data->ecc & F81604_SJA1000_ECC_SEG; + } - /* Error occurred during transmission? */ - if ((data->ecc & F81604_SJA1000_ECC_DIR) == 0) + /* Error occurred during transmission? */ + if ((data->ecc & F81604_SJA1000_ECC_DIR) == 0) { + stats->tx_errors++; + if (skb) cf->data[2] |= CAN_ERR_PROT_TX; + } else { + stats->rx_errors++; } set_bit(F81604_CLEAR_ECC, &priv->clear_flags); From 30447a1bc0e066e492552b3e5ffeb63c1605dfe2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sun, 24 Nov 2024 18:42:56 +0100 Subject: [PATCH 081/366] can: mcp251xfd: mcp251xfd_get_tef_len(): work around erratum DS80000789E 6. Commit b8e0ddd36ce9 ("can: mcp251xfd: tef: prepare to workaround broken TEF FIFO tail index erratum") introduced mcp251xfd_get_tef_len() to get the number of unhandled transmit events from the Transmit Event FIFO (TEF). As the TEF has no head index, the driver uses the TX-FIFO's tail index instead, assuming that send frames are completed. When calculating the number of unhandled TEF events, that commit didn't take mcp2518fd erratum DS80000789E 6. into account. According to that erratum, the FIFOCI bits of a FIFOSTA register, here the TX-FIFO tail index might be corrupted. However here it seems the bit indicating that the TX-FIFO is empty (MCP251XFD_REG_FIFOSTA_TFERFFIF) is not correct while the TX-FIFO tail index is. Assume that the TX-FIFO is indeed empty if: - Chip's head and tail index are equal (len == 0). - The TX-FIFO is less than half full. (The TX-FIFO empty case has already been checked at the beginning of this function.) - No free buffers in the TX ring. If the TX-FIFO is assumed to be empty, assume that the TEF is full and return the number of elements in the TX-FIFO (which equals the number of TEF elements). If these assumptions are false, the driver might read to many objects from the TEF. mcp251xfd_handle_tefif_one() checks the sequence numbers and will refuse to process old events. Reported-by: Renjaya Raga Zenta Closes: https://patch.msgid.link/CAJ7t6HgaeQ3a_OtfszezU=zB-FqiZXqrnATJ3UujNoQJJf7GgA@mail.gmail.com Fixes: b8e0ddd36ce9 ("can: mcp251xfd: tef: prepare to workaround broken TEF FIFO tail index erratum") Tested-by: Renjaya Raga Zenta Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20241126-mcp251xfd-fix-length-calculation-v2-1-c2ed516ed6ba@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c index d3ac865933fdf..e94321849fd7e 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c @@ -21,6 +21,11 @@ static inline bool mcp251xfd_tx_fifo_sta_empty(u32 fifo_sta) return fifo_sta & MCP251XFD_REG_FIFOSTA_TFERFFIF; } +static inline bool mcp251xfd_tx_fifo_sta_less_than_half_full(u32 fifo_sta) +{ + return fifo_sta & MCP251XFD_REG_FIFOSTA_TFHRFHIF; +} + static inline int mcp251xfd_tef_tail_get_from_chip(const struct mcp251xfd_priv *priv, u8 *tef_tail) @@ -147,7 +152,29 @@ mcp251xfd_get_tef_len(struct mcp251xfd_priv *priv, u8 *len_p) BUILD_BUG_ON(sizeof(tx_ring->obj_num) != sizeof(len)); len = (chip_tx_tail << shift) - (tail << shift); - *len_p = len >> shift; + len >>= shift; + + /* According to mcp2518fd erratum DS80000789E 6. the FIFOCI + * bits of a FIFOSTA register, here the TX-FIFO tail index + * might be corrupted. + * + * However here it seems the bit indicating that the TX-FIFO + * is empty (MCP251XFD_REG_FIFOSTA_TFERFFIF) is not correct + * while the TX-FIFO tail index is. + * + * We assume the TX-FIFO is empty, i.e. all pending CAN frames + * haven been send, if: + * - Chip's head and tail index are equal (len == 0). + * - The TX-FIFO is less than half full. + * (The TX-FIFO empty case has already been checked at the + * beginning of this function.) + * - No free buffers in the TX ring. + */ + if (len == 0 && mcp251xfd_tx_fifo_sta_less_than_half_full(fifo_sta) && + mcp251xfd_get_tx_free(tx_ring) == 0) + len = tx_ring->obj_num; + + *len_p = len; return 0; } From bd2fccac61b40eaf08d9546acc9fef958bfe4763 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 25 Nov 2024 22:53:14 +0200 Subject: [PATCH 082/366] drm/dp_mst: Fix MST sideband message body length check Fix the MST sideband message body length check, which must be at least 1 byte accounting for the message body CRC (aka message data CRC) at the end of the message. This fixes a case where an MST branch device returns a header with a correct header CRC (indicating a correctly received body length), with the body length being incorrectly set to 0. This will later lead to a memory corruption in drm_dp_sideband_append_payload() and the following errors in dmesg: UBSAN: array-index-out-of-bounds in drivers/gpu/drm/display/drm_dp_mst_topology.c:786:25 index -1 is out of range for type 'u8 [48]' Call Trace: drm_dp_sideband_append_payload+0x33d/0x350 [drm_display_helper] drm_dp_get_one_sb_msg+0x3ce/0x5f0 [drm_display_helper] drm_dp_mst_hpd_irq_handle_event+0xc8/0x1580 [drm_display_helper] memcpy: detected field-spanning write (size 18446744073709551615) of single field "&msg->msg[msg->curlen]" at drivers/gpu/drm/display/drm_dp_mst_topology.c:791 (size 256) Call Trace: drm_dp_sideband_append_payload+0x324/0x350 [drm_display_helper] drm_dp_get_one_sb_msg+0x3ce/0x5f0 [drm_display_helper] drm_dp_mst_hpd_irq_handle_event+0xc8/0x1580 [drm_display_helper] Cc: Cc: Lyude Paul Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241125205314.1725887-1-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index ac90118b9e7a8..e6ee180815b20 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -320,6 +320,9 @@ static bool drm_dp_decode_sideband_msg_hdr(const struct drm_dp_mst_topology_mgr hdr->broadcast = (buf[idx] >> 7) & 0x1; hdr->path_msg = (buf[idx] >> 6) & 0x1; hdr->msg_len = buf[idx] & 0x3f; + if (hdr->msg_len < 1) /* min space for body CRC */ + return false; + idx++; hdr->somt = (buf[idx] >> 7) & 0x1; hdr->eomt = (buf[idx] >> 6) & 0x1; From 86e8f94789dd6f3e705bfa821e1e416f97a2f863 Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Wed, 20 Nov 2024 15:21:36 +0800 Subject: [PATCH 083/366] drm/sti: Add __iomem for mixer_dbg_mxn's parameter Sparse complains about incorrect type in argument 1. expected void const volatile __iomem *ptr but got void *. so modify mixer_dbg_mxn's addr parameter. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202411191809.6V3c826r-lkp@intel.com/ Fixes: a5f81078a56c ("drm/sti: add debugfs entries for MIXER crtc") Signed-off-by: Pei Xiao Acked-by: Raphael Gallais-Pou Link: https://patchwork.freedesktop.org/patch/msgid/c28f0dcb6a4526721d83ba1f659bba30564d3d54.1732087094.git.xiaopei01@kylinos.cn Signed-off-by: Raphael Gallais-Pou --- drivers/gpu/drm/sti/sti_mixer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sti/sti_mixer.c b/drivers/gpu/drm/sti/sti_mixer.c index 7e5f14646625b..06c1b81912f79 100644 --- a/drivers/gpu/drm/sti/sti_mixer.c +++ b/drivers/gpu/drm/sti/sti_mixer.c @@ -137,7 +137,7 @@ static void mixer_dbg_crb(struct seq_file *s, int val) } } -static void mixer_dbg_mxn(struct seq_file *s, void *addr) +static void mixer_dbg_mxn(struct seq_file *s, void __iomem *addr) { int i; From 6b64128a74ebcacc5a0de5a74834e3b9f47a354c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 19 Nov 2024 17:18:19 +0100 Subject: [PATCH 084/366] selftests/bpf: Check for PREEMPTION instead of PREEMPT CONFIG_PREEMPT is a preemtion model the so called "Low-Latency Desktop". A different preemption model is PREEMPT_RT the so called "Real-Time". Both implement preemption in kernel and set CONFIG_PREEMPTION. There is also the so called "LAZY PREEMPT" which the "Scheduler controlled preemption model". Here we have also preemption in the kernel the rules are slightly different. Therefore the testsuite should not check for CONFIG_PREEMPT (as one model) but for CONFIG_PREEMPTION to figure out if preemption in the kernel is possible. Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20241119161819.qvEcs-n_@linutronix.de Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/map_tests/task_storage_map.c | 4 ++-- tools/testing/selftests/bpf/prog_tests/task_local_storage.c | 2 +- .../testing/selftests/bpf/progs/read_bpf_task_storage_busy.c | 4 ++-- tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c index 62971dbf29961..a4121d2248ac8 100644 --- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c @@ -78,8 +78,8 @@ void test_task_storage_map_stress_lookup(void) CHECK(err, "open_and_load", "error %d\n", err); /* Only for a fully preemptible kernel */ - if (!skel->kconfig->CONFIG_PREEMPT) { - printf("%s SKIP (no CONFIG_PREEMPT)\n", __func__); + if (!skel->kconfig->CONFIG_PREEMPTION) { + printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__); read_bpf_task_storage_busy__destroy(skel); skips++; return; diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index 60f474d965a9f..42e822ea352f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -197,7 +197,7 @@ static void test_nodeadlock(void) /* Unnecessary recursion and deadlock detection are reproducible * in the preemptible kernel. */ - if (!skel->kconfig->CONFIG_PREEMPT) { + if (!skel->kconfig->CONFIG_PREEMPTION) { test__skip(); goto done; } diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c index 76556e0b42b24..69da05bb6c63e 100644 --- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c +++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c @@ -4,7 +4,7 @@ #include #include -extern bool CONFIG_PREEMPT __kconfig __weak; +extern bool CONFIG_PREEMPTION __kconfig __weak; extern const int bpf_task_storage_busy __ksym; char _license[] SEC("license") = "GPL"; @@ -24,7 +24,7 @@ int BPF_PROG(read_bpf_task_storage_busy) { int *value; - if (!CONFIG_PREEMPT) + if (!CONFIG_PREEMPTION) return 0; if (bpf_get_current_pid_tgid() >> 32 != pid) diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c index ea2dbb80f7b3e..986829aaf73a6 100644 --- a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c +++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c @@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL"; #define EBUSY 16 #endif -extern bool CONFIG_PREEMPT __kconfig __weak; +extern bool CONFIG_PREEMPTION __kconfig __weak; int nr_get_errs = 0; int nr_del_errs = 0; @@ -29,7 +29,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, int ret, zero = 0; int *value; - if (!CONFIG_PREEMPT) + if (!CONFIG_PREEMPTION) return 0; task = bpf_get_current_task_btf(); From ca70b8baf2bd125b2a4d96e76db79375c07d7ff2 Mon Sep 17 00:00:00 2001 From: Zijian Zhang Date: Wed, 16 Oct 2024 23:48:38 +0000 Subject: [PATCH 085/366] tcp_bpf: Fix the sk_mem_uncharge logic in tcp_bpf_sendmsg The current sk memory accounting logic in __SK_REDIRECT is pre-uncharging tosend bytes, which is either msg->sg.size or a smaller value apply_bytes. Potential problems with this strategy are as follows: - If the actual sent bytes are smaller than tosend, we need to charge some bytes back, as in line 487, which is okay but seems not clean. - When tosend is set to apply_bytes, as in line 417, and (ret < 0), we may miss uncharging (msg->sg.size - apply_bytes) bytes. [...] 415 tosend = msg->sg.size; 416 if (psock->apply_bytes && psock->apply_bytes < tosend) 417 tosend = psock->apply_bytes; [...] 443 sk_msg_return(sk, msg, tosend); 444 release_sock(sk); 446 origsize = msg->sg.size; 447 ret = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress, 448 msg, tosend, flags); 449 sent = origsize - msg->sg.size; [...] 454 lock_sock(sk); 455 if (unlikely(ret < 0)) { 456 int free = sk_msg_free_nocharge(sk, msg); 458 if (!cork) 459 *copied -= free; 460 } [...] 487 if (eval == __SK_REDIRECT) 488 sk_mem_charge(sk, tosend - sent); [...] When running the selftest test_txmsg_redir_wait_sndmem with txmsg_apply, the following warning will be reported: ------------[ cut here ]------------ WARNING: CPU: 6 PID: 57 at net/ipv4/af_inet.c:156 inet_sock_destruct+0x190/0x1a0 Modules linked in: CPU: 6 UID: 0 PID: 57 Comm: kworker/6:0 Not tainted 6.12.0-rc1.bm.1-amd64+ #43 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Workqueue: events sk_psock_destroy RIP: 0010:inet_sock_destruct+0x190/0x1a0 RSP: 0018:ffffad0a8021fe08 EFLAGS: 00010206 RAX: 0000000000000011 RBX: ffff9aab4475b900 RCX: ffff9aab481a0800 RDX: 0000000000000303 RSI: 0000000000000011 RDI: ffff9aab4475b900 RBP: ffff9aab4475b990 R08: 0000000000000000 R09: ffff9aab40050ec0 R10: 0000000000000000 R11: ffff9aae6fdb1d01 R12: ffff9aab49c60400 R13: ffff9aab49c60598 R14: ffff9aab49c60598 R15: dead000000000100 FS: 0000000000000000(0000) GS:ffff9aae6fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffec7e47bd8 CR3: 00000001a1a1c004 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x89/0x130 ? inet_sock_destruct+0x190/0x1a0 ? report_bug+0xfc/0x1e0 ? handle_bug+0x5c/0xa0 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? inet_sock_destruct+0x190/0x1a0 __sk_destruct+0x25/0x220 sk_psock_destroy+0x2b2/0x310 process_scheduled_works+0xa3/0x3e0 worker_thread+0x117/0x240 ? __pfx_worker_thread+0x10/0x10 kthread+0xcf/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x40 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 ---[ end trace 0000000000000000 ]--- In __SK_REDIRECT, a more concise way is delaying the uncharging after sent bytes are finalized, and uncharge this value. When (ret < 0), we shall invoke sk_msg_free. Same thing happens in case __SK_DROP, when tosend is set to apply_bytes, we may miss uncharging (msg->sg.size - apply_bytes) bytes. The same warning will be reported in selftest. [...] 468 case __SK_DROP: 469 default: 470 sk_msg_free_partial(sk, msg, tosend); 471 sk_msg_apply_bytes(psock, tosend); 472 *copied -= (tosend + delta); 473 return -EACCES; [...] So instead of sk_msg_free_partial we can do sk_msg_free here. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Fixes: 8ec95b94716a ("bpf, sockmap: Fix the sk->sk_forward_alloc warning of sk_stream_kill_queues") Signed-off-by: Zijian Zhang Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241016234838.3167769-3-zijianzhang@bytedance.com --- net/ipv4/tcp_bpf.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 370993c03d313..99cef92e6290c 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -441,7 +441,6 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, tosend); release_sock(sk); origsize = msg->sg.size; @@ -453,8 +452,9 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, sock_put(sk_redir); lock_sock(sk); + sk_mem_uncharge(sk, sent); if (unlikely(ret < 0)) { - int free = sk_msg_free_nocharge(sk, msg); + int free = sk_msg_free(sk, msg); if (!cork) *copied -= free; @@ -468,7 +468,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, break; case __SK_DROP: default: - sk_msg_free_partial(sk, msg, tosend); + sk_msg_free(sk, msg); sk_msg_apply_bytes(psock, tosend); *copied -= (tosend + delta); return -EACCES; @@ -484,11 +484,8 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, } if (msg && msg->sg.data[msg->sg.start].page_link && - msg->sg.data[msg->sg.start].length) { - if (eval == __SK_REDIRECT) - sk_mem_charge(sk, tosend - sent); + msg->sg.data[msg->sg.start].length) goto more_data; - } } return ret; } From 3448ad23b34e43a2526bd0f9e1221e8de876adec Mon Sep 17 00:00:00 2001 From: Zijian Zhang Date: Wed, 16 Oct 2024 23:48:37 +0000 Subject: [PATCH 086/366] selftests/bpf: Add apply_bytes test to test_txmsg_redir_wait_sndmem in test_sockmap Add this to more comprehensively test the socket memory accounting logic in the __SK_REDIRECT and __SK_DROP cases of tcp_bpf_sendmsg. We don't have test when apply_bytes are not zero in test_txmsg_redir_wait_sndmem. test_send_large has opt->rate=2, it will invoke sendmsg two times. Specifically, the first sendmsg will trigger the case where the ret value of tcp_bpf_sendmsg_redir is less than 0; while the second sendmsg happens after the 3 seconds timeout, and it will trigger __SK_DROP because socket c2 has been removed from the sockmap/hash. Signed-off-by: Zijian Zhang Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241016234838.3167769-2-zijianzhang@bytedance.com --- tools/testing/selftests/bpf/test_sockmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index e5c7ecbe57e38..fd2da2234cc9b 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1579,8 +1579,12 @@ static void test_txmsg_redir(int cgrp, struct sockmap_options *opt) static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt) { - txmsg_redir = 1; opt->tx_wait_mem = true; + txmsg_redir = 1; + test_send_large(opt, cgrp); + + txmsg_redir = 1; + txmsg_apply = 4097; test_send_large(opt, cgrp); opt->tx_wait_mem = false; } From ac6f420291b3fee1113f21d612fa88b628afab5b Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Thu, 21 Nov 2024 18:08:54 +0530 Subject: [PATCH 087/366] quota: flush quota_release_work upon quota writeback One of the paths quota writeback is called from is: freeze_super() sync_filesystem() ext4_sync_fs() dquot_writeback_dquots() Since we currently don't always flush the quota_release_work queue in this path, we can end up with the following race: 1. dquot are added to releasing_dquots list during regular operations. 2. FS Freeze starts, however, this does not flush the quota_release_work queue. 3. Freeze completes. 4. Kernel eventually tries to flush the workqueue while FS is frozen which hits a WARN_ON since transaction gets started during frozen state: ext4_journal_check_start+0x28/0x110 [ext4] (unreliable) __ext4_journal_start_sb+0x64/0x1c0 [ext4] ext4_release_dquot+0x90/0x1d0 [ext4] quota_release_workfn+0x43c/0x4d0 Which is the following line: WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); Which ultimately results in generic/390 failing due to dmesg noise. This was detected on powerpc machine 15 cores. To avoid this, make sure to flush the workqueue during dquot_writeback_dquots() so we dont have any pending workitems after freeze. Reported-by: Disha Goel CC: stable@vger.kernel.org Fixes: dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide") Reviewed-by: Baokun Li Signed-off-by: Ojaswin Mujoo Signed-off-by: Jan Kara Link: https://patch.msgid.link/20241121123855.645335-2-ojaswin@linux.ibm.com --- fs/quota/dquot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 3dd8d6f277253..f9578918cfb25 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -688,6 +688,8 @@ int dquot_writeback_dquots(struct super_block *sb, int type) WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); + flush_delayed_work("a_release_work); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; From c5566903af56dd1abb092f18dcb0c770d6cd8dcb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 26 Nov 2024 12:46:00 +0100 Subject: [PATCH 088/366] udf: Skip parent dir link count update if corrupted If the parent directory link count is too low (likely directory inode corruption), just skip updating its link count as if it goes to 0 too early it can cause unexpected issues. Signed-off-by: Jan Kara --- fs/udf/namei.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 78a603129dd58..2be775d30ac10 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -517,7 +517,11 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) inode->i_nlink); clear_nlink(inode); inode->i_size = 0; - inode_dec_link_count(dir); + if (dir->i_nlink >= 3) + inode_dec_link_count(dir); + else + udf_warn(inode->i_sb, "parent dir link count too low (%u)\n", + dir->i_nlink); udf_add_fid_counter(dir->i_sb, true, -1); inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); From 6756af923e06aa33ad8894aaecbf9060953ba00f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 26 Nov 2024 12:55:12 +0100 Subject: [PATCH 089/366] udf: Verify inode link counts before performing rename During rename, we are updating link counts of various inodes either when rename deletes target or when moving directory across directories. Verify involved link counts are sane so that we don't trip warnings in VFS. Reported-by: syzbot+3ff7365dc04a6bcafa66@syzkaller.appspotmail.com Signed-off-by: Jan Kara --- fs/udf/namei.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 2be775d30ac10..2cb49b6b07168 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -791,8 +791,18 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, retval = -ENOTEMPTY; if (!empty_dir(new_inode)) goto out_oiter; + retval = -EFSCORRUPTED; + if (new_inode->i_nlink != 2) + goto out_oiter; } + retval = -EFSCORRUPTED; + if (old_dir->i_nlink < 3) + goto out_oiter; is_dir = true; + } else if (new_inode) { + retval = -EFSCORRUPTED; + if (new_inode->i_nlink < 1) + goto out_oiter; } if (is_dir && old_dir != new_dir) { retval = udf_fiiter_find_entry(old_inode, &dotdot_name, From 34851431ceca1bf457d85895bd38a4e7967e2055 Mon Sep 17 00:00:00 2001 From: Kenny Levinsen Date: Wed, 20 Nov 2024 00:53:17 +0100 Subject: [PATCH 090/366] HID: i2c-hid: Revert to using power commands to wake on resume commit 7d6f065de37c ("HID: i2c-hid: Use address probe to wake on resume") replaced the retry of power commands with the dummy read "bus probe" we use on boot which accounts for a necessary delay before retry. This made at least one Weida device (2575:0910 in an ASUS Vivobook S14) very unhappy, as the bus probe despite being successful somehow lead to the following power command failing so hard that the device never lets go of the bus. This means that even retries of the power command would fail on a timeout as the bus remains busy. Remove the bus probe on resume and instead reintroduce retry of the power command for wake-up purposes while respecting the newly established wake-up retry timings. Fixes: 7d6f065de37c ("HID: i2c-hid: Use address probe to wake on resume") Cc: stable@vger.kernel.org Reported-by: Michael Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219440 Link: https://lore.kernel.org/r/d5acb485-7377-4139-826d-4df04d21b5ed@leemhuis.info/ Signed-off-by: Kenny Levinsen Link: https://patch.msgid.link/20241119235615.23902-1-kl@kl.wtf Signed-off-by: Benjamin Tissoires --- drivers/hid/i2c-hid/i2c-hid-core.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 43664a24176fc..4e87380d3edd6 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -414,7 +414,19 @@ static int i2c_hid_set_power(struct i2c_hid *ihid, int power_state) i2c_hid_dbg(ihid, "%s\n", __func__); + /* + * Some STM-based devices need 400µs after a rising clock edge to wake + * from deep sleep, in which case the first request will fail due to + * the address not being acknowledged. Try after a short sleep to see + * if the device came alive on the bus. Certain Weida Tech devices also + * need this. + */ ret = i2c_hid_set_power_command(ihid, power_state); + if (ret && power_state == I2C_HID_PWR_ON) { + usleep_range(400, 500); + ret = i2c_hid_set_power_command(ihid, I2C_HID_PWR_ON); + } + if (ret) dev_err(&ihid->client->dev, "failed to change power setting.\n"); @@ -976,14 +988,6 @@ static int i2c_hid_core_resume(struct i2c_hid *ihid) enable_irq(client->irq); - /* Make sure the device is awake on the bus */ - ret = i2c_hid_probe_address(ihid); - if (ret < 0) { - dev_err(&client->dev, "nothing at address after resume: %d\n", - ret); - return -ENXIO; - } - /* On Goodix 27c6:0d42 wait extra time before device wakeup. * It's not clear why but if we send wakeup too early, the device will * never trigger input interrupts. From 214093534f3c046bf5acc9affbf4e6bd9af4538b Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 20 Nov 2024 16:06:22 +0100 Subject: [PATCH 091/366] xfs: Use xchg() in xlog_cil_insert_pcp_aggregate() try_cmpxchg() loop with constant "new" value can be substituted with just xchg() to atomically get and clear the location. The code on x86_64 improves from: 1e7f: 48 89 4c 24 10 mov %rcx,0x10(%rsp) 1e84: 48 03 14 c5 00 00 00 add 0x0(,%rax,8),%rdx 1e8b: 00 1e88: R_X86_64_32S __per_cpu_offset 1e8c: 8b 02 mov (%rdx),%eax 1e8e: 41 89 c5 mov %eax,%r13d 1e91: 31 c9 xor %ecx,%ecx 1e93: f0 0f b1 0a lock cmpxchg %ecx,(%rdx) 1e97: 75 f5 jne 1e8e 1e99: 48 8b 4c 24 10 mov 0x10(%rsp),%rcx 1e9e: 45 01 e9 add %r13d,%r9d to just: 1e7f: 48 03 14 cd 00 00 00 add 0x0(,%rcx,8),%rdx 1e86: 00 1e83: R_X86_64_32S __per_cpu_offset 1e87: 31 c9 xor %ecx,%ecx 1e89: 87 0a xchg %ecx,(%rdx) 1e8b: 41 01 cb add %ecx,%r11d No functional change intended. Signed-off-by: Uros Bizjak Cc: Chandan Babu R Cc: Darrick J. Wong Cc: Christoph Hellwig Cc: Dave Chinner Reviewed-by: Alex Elder Reviewed-by: Dave Chinner Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log_cil.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 2e9157b650e64..1ca406ec1b40b 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -171,11 +171,8 @@ xlog_cil_insert_pcp_aggregate( */ for_each_cpu(cpu, &ctx->cil_pcpmask) { struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); - int old = READ_ONCE(cilpcp->space_used); - while (!try_cmpxchg(&cilpcp->space_used, &old, 0)) - ; - count += old; + count += xchg(&cilpcp->space_used, 0); } atomic_add(count, &ctx->space_used); } From cc2dba08cc33daf8acd6e560957ef0e0f4d034ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Nov 2024 13:31:06 +0100 Subject: [PATCH 092/366] xfs: don't call xfs_bmap_same_rtgroup in xfs_bmap_add_extent_hole_delay xfs_bmap_add_extent_hole_delay works entirely on delalloc extents, for which xfs_bmap_same_rtgroup doesn't make sense. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_bmap.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 9052839305e2c..5255f93bae31f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -2620,8 +2620,7 @@ xfs_bmap_add_extent_hole_delay( */ if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && left.br_startoff + left.br_blockcount == new->br_startoff && - left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && - xfs_bmap_same_rtgroup(ip, whichfork, &left, new)) + left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) state |= BMAP_LEFT_CONTIG; if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && @@ -2629,8 +2628,7 @@ xfs_bmap_add_extent_hole_delay( new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && (!(state & BMAP_LEFT_CONTIG) || (left.br_blockcount + new->br_blockcount + - right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)) && - xfs_bmap_same_rtgroup(ip, whichfork, new, &right)) + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))) state |= BMAP_RIGHT_CONTIG; /* From 146b6f1112eb30a19776d6c323c994e9d67790db Mon Sep 17 00:00:00 2001 From: Jinghao Jia Date: Sat, 23 Nov 2024 03:42:56 -0600 Subject: [PATCH 093/366] ipvs: fix UB due to uninitialized stack access in ip_vs_protocol_init() Under certain kernel configurations when building with Clang/LLVM, the compiler does not generate a return or jump as the terminator instruction for ip_vs_protocol_init(), triggering the following objtool warning during build time: vmlinux.o: warning: objtool: ip_vs_protocol_init() falls through to next function __initstub__kmod_ip_vs_rr__935_123_ip_vs_rr_init6() At runtime, this either causes an oops when trying to load the ipvs module or a boot-time panic if ipvs is built-in. This same issue has been reported by the Intel kernel test robot previously. Digging deeper into both LLVM and the kernel code reveals this to be a undefined behavior problem. ip_vs_protocol_init() uses a on-stack buffer of 64 chars to store the registered protocol names and leaves it uninitialized after definition. The function calls strnlen() when concatenating protocol names into the buffer. With CONFIG_FORTIFY_SOURCE strnlen() performs an extra step to check whether the last byte of the input char buffer is a null character (commit 3009f891bb9f ("fortify: Allow strlen() and strnlen() to pass compile-time known lengths")). This, together with possibly other configurations, cause the following IR to be generated: define hidden i32 @ip_vs_protocol_init() local_unnamed_addr #5 section ".init.text" align 16 !kcfi_type !29 { %1 = alloca [64 x i8], align 16 ... 14: ; preds = %11 %15 = getelementptr inbounds i8, ptr %1, i64 63 %16 = load i8, ptr %15, align 1 %17 = tail call i1 @llvm.is.constant.i8(i8 %16) %18 = icmp eq i8 %16, 0 %19 = select i1 %17, i1 %18, i1 false br i1 %19, label %20, label %23 20: ; preds = %14 %21 = call i64 @strlen(ptr noundef nonnull dereferenceable(1) %1) #23 ... 23: ; preds = %14, %11, %20 %24 = call i64 @strnlen(ptr noundef nonnull dereferenceable(1) %1, i64 noundef 64) #24 ... } The above code calculates the address of the last char in the buffer (value %15) and then loads from it (value %16). Because the buffer is never initialized, the LLVM GVN pass marks value %16 as undefined: %13 = getelementptr inbounds i8, ptr %1, i64 63 br i1 undef, label %14, label %17 This gives later passes (SCCP, in particular) more DCE opportunities by propagating the undef value further, and eventually removes everything after the load on the uninitialized stack location: define hidden i32 @ip_vs_protocol_init() local_unnamed_addr #0 section ".init.text" align 16 !kcfi_type !11 { %1 = alloca [64 x i8], align 16 ... 12: ; preds = %11 %13 = getelementptr inbounds i8, ptr %1, i64 63 unreachable } In this way, the generated native code will just fall through to the next function, as LLVM does not generate any code for the unreachable IR instruction and leaves the function without a terminator. Zero the on-stack buffer to avoid this possible UB. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402100205.PWXIz1ZK-lkp@intel.com/ Co-developed-by: Ruowen Qin Signed-off-by: Ruowen Qin Signed-off-by: Jinghao Jia Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_proto.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index f100da4ba3bc3..a9fd1d3fc2cbf 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -340,7 +340,7 @@ void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs) int __init ip_vs_protocol_init(void) { - char protocols[64]; + char protocols[64] = { 0 }; #define REGISTER_PROTOCOL(p) \ do { \ register_ip_vs_protocol(p); \ @@ -348,8 +348,6 @@ int __init ip_vs_protocol_init(void) strcat(protocols, (p)->name); \ } while (0) - protocols[0] = '\0'; - protocols[2] = '\0'; #ifdef CONFIG_IP_VS_PROTO_TCP REGISTER_PROTOCOL(&ip_vs_protocol_tcp); #endif From 04317f4eb2aad312ad85c1a17ad81fe75f1f9bc7 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 21 Nov 2024 09:55:42 +0300 Subject: [PATCH 094/366] netfilter: x_tables: fix LED ID check in led_tg_check() Syzbot has reported the following BUG detected by KASAN: BUG: KASAN: slab-out-of-bounds in strlen+0x58/0x70 Read of size 1 at addr ffff8881022da0c8 by task repro/5879 ... Call Trace: dump_stack_lvl+0x241/0x360 ? __pfx_dump_stack_lvl+0x10/0x10 ? __pfx__printk+0x10/0x10 ? _printk+0xd5/0x120 ? __virt_addr_valid+0x183/0x530 ? __virt_addr_valid+0x183/0x530 print_report+0x169/0x550 ? __virt_addr_valid+0x183/0x530 ? __virt_addr_valid+0x183/0x530 ? __virt_addr_valid+0x45f/0x530 ? __phys_addr+0xba/0x170 ? strlen+0x58/0x70 kasan_report+0x143/0x180 ? strlen+0x58/0x70 strlen+0x58/0x70 kstrdup+0x20/0x80 led_tg_check+0x18b/0x3c0 xt_check_target+0x3bb/0xa40 ? __pfx_xt_check_target+0x10/0x10 ? stack_depot_save_flags+0x6e4/0x830 ? nft_target_init+0x174/0xc30 nft_target_init+0x82d/0xc30 ? __pfx_nft_target_init+0x10/0x10 ? nf_tables_newrule+0x1609/0x2980 ? nf_tables_newrule+0x1609/0x2980 ? rcu_is_watching+0x15/0xb0 ? nf_tables_newrule+0x1609/0x2980 ? nf_tables_newrule+0x1609/0x2980 ? __kmalloc_noprof+0x21a/0x400 nf_tables_newrule+0x1860/0x2980 ? __pfx_nf_tables_newrule+0x10/0x10 ? __nla_parse+0x40/0x60 nfnetlink_rcv+0x14e5/0x2ab0 ? __pfx_validate_chain+0x10/0x10 ? __pfx_nfnetlink_rcv+0x10/0x10 ? __lock_acquire+0x1384/0x2050 ? netlink_deliver_tap+0x2e/0x1b0 ? __pfx_lock_release+0x10/0x10 ? netlink_deliver_tap+0x2e/0x1b0 netlink_unicast+0x7f8/0x990 ? __pfx_netlink_unicast+0x10/0x10 ? __virt_addr_valid+0x183/0x530 ? __check_object_size+0x48e/0x900 netlink_sendmsg+0x8e4/0xcb0 ? __pfx_netlink_sendmsg+0x10/0x10 ? aa_sock_msg_perm+0x91/0x160 ? __pfx_netlink_sendmsg+0x10/0x10 __sock_sendmsg+0x223/0x270 ____sys_sendmsg+0x52a/0x7e0 ? __pfx_____sys_sendmsg+0x10/0x10 __sys_sendmsg+0x292/0x380 ? __pfx___sys_sendmsg+0x10/0x10 ? lockdep_hardirqs_on_prepare+0x43d/0x780 ? __pfx_lockdep_hardirqs_on_prepare+0x10/0x10 ? exc_page_fault+0x590/0x8c0 ? do_syscall_64+0xb6/0x230 do_syscall_64+0xf3/0x230 entry_SYSCALL_64_after_hwframe+0x77/0x7f ... Since an invalid (without '\0' byte at all) byte sequence may be passed from userspace, add an extra check to ensure that such a sequence is rejected as possible ID and so never passed to 'kstrdup()' and further. Reported-by: syzbot+6c8215822f35fdb35667@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6c8215822f35fdb35667 Fixes: 268cb38e1802 ("netfilter: x_tables: add LED trigger target") Signed-off-by: Dmitry Antipov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_LED.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index f7b0286d106ac..8a80fd76fe45b 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -96,7 +96,9 @@ static int led_tg_check(const struct xt_tgchk_param *par) struct xt_led_info_internal *ledinternal; int err; - if (ledinfo->id[0] == '\0') + /* Bail out if empty string or not a string at all. */ + if (ledinfo->id[0] == '\0' || + !memchr(ledinfo->id, '\0', sizeof(ledinfo->id))) return -EINVAL; mutex_lock(&xt_led_mutex); From b7529880cb961d515642ce63f9d7570869bbbdc3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 26 Nov 2024 11:59:06 +0100 Subject: [PATCH 095/366] netfilter: nft_socket: remove WARN_ON_ONCE on maximum cgroup level cgroup maximum depth is INT_MAX by default, there is a cgroup toggle to restrict this maximum depth to a more reasonable value not to harm performance. Remove unnecessary WARN_ON_ONCE which is reachable from userspace. Fixes: 7f3287db6543 ("netfilter: nft_socket: make cgroupsv2 matching work with namespaces") Reported-by: syzbot+57bac0866ddd99fe47c0@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index f5da0c1775f2e..35d0409b00950 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -68,7 +68,7 @@ static noinline int nft_socket_cgroup_subtree_level(void) cgroup_put(cgrp); - if (WARN_ON_ONCE(level > 255)) + if (level > 255) return -ERANGE; if (WARN_ON_ONCE(level < 0)) From 59548215b76be98cf3422eea9a67d6ea578aca3d Mon Sep 17 00:00:00 2001 From: WangYuli Date: Mon, 25 Nov 2024 13:26:16 +0800 Subject: [PATCH 096/366] HID: wacom: fix when get product name maybe null pointer Due to incorrect dev->product reporting by certain devices, null pointer dereferences occur when dev->product is empty, leading to potential system crashes. This issue was found on EXCELSIOR DL37-D05 device with Loongson-LS3A6000-7A2000-DL37 motherboard. Kernel logs: [ 56.470885] usb 4-3: new full-speed USB device number 4 using ohci-pci [ 56.671638] usb 4-3: string descriptor 0 read error: -22 [ 56.671644] usb 4-3: New USB device found, idVendor=056a, idProduct=0374, bcdDevice= 1.07 [ 56.671647] usb 4-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 56.678839] hid-generic 0003:056A:0374.0004: hiddev0,hidraw3: USB HID v1.10 Device [HID 056a:0374] on usb-0000:00:05.0-3/input0 [ 56.697719] CPU 2 Unable to handle kernel paging request at virtual address 0000000000000000, era == 90000000066e35c8, ra == ffff800004f98a80 [ 56.697732] Oops[#1]: [ 56.697734] CPU: 2 PID: 2742 Comm: (udev-worker) Tainted: G OE 6.6.0-loong64-desktop #25.00.2000.015 [ 56.697737] Hardware name: Inspur CE520L2/C09901N000000000, BIOS 2.09.00 10/11/2024 [ 56.697739] pc 90000000066e35c8 ra ffff800004f98a80 tp 9000000125478000 sp 900000012547b8a0 [ 56.697741] a0 0000000000000000 a1 ffff800004818b28 a2 0000000000000000 a3 0000000000000000 [ 56.697743] a4 900000012547b8f0 a5 0000000000000000 a6 0000000000000000 a7 0000000000000000 [ 56.697745] t0 ffff800004818b2d t1 0000000000000000 t2 0000000000000003 t3 0000000000000005 [ 56.697747] t4 0000000000000000 t5 0000000000000000 t6 0000000000000000 t7 0000000000000000 [ 56.697748] t8 0000000000000000 u0 0000000000000000 s9 0000000000000000 s0 900000011aa48028 [ 56.697750] s1 0000000000000000 s2 0000000000000000 s3 ffff800004818e80 s4 ffff800004810000 [ 56.697751] s5 90000001000b98d0 s6 ffff800004811f88 s7 ffff800005470440 s8 0000000000000000 [ 56.697753] ra: ffff800004f98a80 wacom_update_name+0xe0/0x300 [wacom] [ 56.697802] ERA: 90000000066e35c8 strstr+0x28/0x120 [ 56.697806] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 56.697816] PRMD: 0000000c (PPLV0 +PIE +PWE) [ 56.697821] EUEN: 00000000 (-FPE -SXE -ASXE -BTE) [ 56.697827] ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) [ 56.697831] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 56.697835] BADV: 0000000000000000 [ 56.697836] PRID: 0014d000 (Loongson-64bit, Loongson-3A6000) [ 56.697838] Modules linked in: wacom(+) bnep bluetooth rfkill qrtr nls_iso8859_1 nls_cp437 snd_hda_codec_conexant snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd soundcore input_leds mousedev led_class joydev deepin_netmonitor(OE) fuse nfnetlink dmi_sysfs ip_tables x_tables overlay amdgpu amdxcp drm_exec gpu_sched drm_buddy radeon drm_suballoc_helper i2c_algo_bit drm_ttm_helper r8169 ttm drm_display_helper spi_loongson_pci xhci_pci cec xhci_pci_renesas spi_loongson_core hid_generic realtek gpio_loongson_64bit [ 56.697887] Process (udev-worker) (pid: 2742, threadinfo=00000000aee0d8b4, task=00000000a9eff1f3) [ 56.697890] Stack : 0000000000000000 ffff800004817e00 0000000000000000 0000251c00000000 [ 56.697896] 0000000000000000 00000011fffffffd 0000000000000000 0000000000000000 [ 56.697901] 0000000000000000 1b67a968695184b9 0000000000000000 90000001000b98d0 [ 56.697906] 90000001000bb8d0 900000011aa48028 0000000000000000 ffff800004f9d74c [ 56.697911] 90000001000ba000 ffff800004f9ce58 0000000000000000 ffff800005470440 [ 56.697916] ffff800004811f88 90000001000b98d0 9000000100da2aa8 90000001000bb8d0 [ 56.697921] 0000000000000000 90000001000ba000 900000011aa48028 ffff800004f9d74c [ 56.697926] ffff8000054704e8 90000001000bb8b8 90000001000ba000 0000000000000000 [ 56.697931] 90000001000bb8d0 9000000006307564 9000000005e666e0 90000001752359b8 [ 56.697936] 9000000008cbe400 900000000804d000 9000000005e666e0 0000000000000000 [ 56.697941] ... [ 56.697944] Call Trace: [ 56.697945] [<90000000066e35c8>] strstr+0x28/0x120 [ 56.697950] [] wacom_update_name+0xe0/0x300 [wacom] [ 56.698000] [] wacom_parse_and_register+0x338/0x900 [wacom] [ 56.698050] [] wacom_probe+0x32c/0x420 [wacom] [ 56.698099] [<9000000006307564>] hid_device_probe+0x144/0x260 [ 56.698103] [<9000000005e65d68>] really_probe+0x208/0x540 [ 56.698109] [<9000000005e661dc>] __driver_probe_device+0x13c/0x1e0 [ 56.698112] [<9000000005e66620>] driver_probe_device+0x40/0x100 [ 56.698116] [<9000000005e6680c>] __device_attach_driver+0x12c/0x180 [ 56.698119] [<9000000005e62bc8>] bus_for_each_drv+0x88/0x160 [ 56.698123] [<9000000005e66468>] __device_attach+0x108/0x260 [ 56.698126] [<9000000005e63918>] device_reprobe+0x78/0x100 [ 56.698129] [<9000000005e62a68>] bus_for_each_dev+0x88/0x160 [ 56.698132] [<9000000006304e54>] __hid_bus_driver_added+0x34/0x80 [ 56.698134] [<9000000005e62bc8>] bus_for_each_drv+0x88/0x160 [ 56.698137] [<9000000006304df0>] __hid_register_driver+0x70/0xa0 [ 56.698142] [<9000000004e10fe4>] do_one_initcall+0x104/0x320 [ 56.698146] [<9000000004f38150>] do_init_module+0x90/0x2c0 [ 56.698151] [<9000000004f3a3d8>] init_module_from_file+0xb8/0x120 [ 56.698155] [<9000000004f3a590>] idempotent_init_module+0x150/0x3a0 [ 56.698159] [<9000000004f3a890>] sys_finit_module+0xb0/0x140 [ 56.698163] [<900000000671e4e8>] do_syscall+0x88/0xc0 [ 56.698166] [<9000000004e12404>] handle_syscall+0xc4/0x160 [ 56.698171] Code: 0011958f 00150224 5800cd85 <2a00022c> 00150004 4000c180 0015022c 03400000 03400000 [ 56.698192] ---[ end trace 0000000000000000 ]--- Fixes: 09dc28acaec7 ("HID: wacom: Improve generic name generation") Reported-by: Zhenxing Chen Co-developed-by: Xu Rao Signed-off-by: Xu Rao Signed-off-by: WangYuli Link: https://patch.msgid.link/B31757FE8E1544CF+20241125052616.18261-1-wangyuli@uniontech.com Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/wacom_sys.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 2bc45b24075c3..9843b52bd017a 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2241,7 +2241,8 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) if (hid_is_usb(wacom->hdev)) { struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent); struct usb_device *dev = interface_to_usbdev(intf); - product_name = dev->product; + if (dev->product != NULL) + product_name = dev->product; } if (wacom->hdev->bus == BUS_I2C) { From e8f34747bddedaf3895e5d5066e0f71713fff811 Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Tue, 26 Nov 2024 13:58:50 +0000 Subject: [PATCH 097/366] selftests: hid: fix typo and exit code The correct exit code to mark a test as skipped is 4. Fixes: ffb85d5c9e80 ("selftests: hid: import hid-tools hid-core tests") Signed-off-by: Maximilian Heyne Link: https://patch.msgid.link/20241126135850.76493-1-mheyne@amazon.de Signed-off-by: Benjamin Tissoires --- .../testing/selftests/hid/run-hid-tools-tests.sh | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/hid/run-hid-tools-tests.sh b/tools/testing/selftests/hid/run-hid-tools-tests.sh index bdae8464da865..af1682a53c27e 100755 --- a/tools/testing/selftests/hid/run-hid-tools-tests.sh +++ b/tools/testing/selftests/hid/run-hid-tools-tests.sh @@ -2,24 +2,26 @@ # SPDX-License-Identifier: GPL-2.0 # Runs tests for the HID subsystem +KSELFTEST_SKIP_TEST=4 + if ! command -v python3 > /dev/null 2>&1; then echo "hid-tools: [SKIP] python3 not installed" - exit 77 + exit $KSELFTEST_SKIP_TEST fi if ! python3 -c "import pytest" > /dev/null 2>&1; then - echo "hid: [SKIP/ pytest module not installed" - exit 77 + echo "hid: [SKIP] pytest module not installed" + exit $KSELFTEST_SKIP_TEST fi if ! python3 -c "import pytest_tap" > /dev/null 2>&1; then - echo "hid: [SKIP/ pytest_tap module not installed" - exit 77 + echo "hid: [SKIP] pytest_tap module not installed" + exit $KSELFTEST_SKIP_TEST fi if ! python3 -c "import hidtools" > /dev/null 2>&1; then - echo "hid: [SKIP/ hid-tools module not installed" - exit 77 + echo "hid: [SKIP] hid-tools module not installed" + exit $KSELFTEST_SKIP_TEST fi TARGET=${TARGET:=.} From f9a11da1d92f78279afafdc811214b88cfe54a77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 27 Nov 2024 17:41:56 +0100 Subject: [PATCH 098/366] HID: bpf: constify hid_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hid_ops struct is never modified. Mark it as const. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20241127-hid-bpf-ops-v1-1-f9e41bfa3afd@weissschuh.net Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 2 +- drivers/hid/hid-core.c | 2 +- include/linux/hid_bpf.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 961b7f35aa673..e8be9ab51d632 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -19,7 +19,7 @@ #include #include "hid_bpf_dispatch.h" -struct hid_ops *hid_ops; +const struct hid_ops *hid_ops; EXPORT_SYMBOL(hid_ops); u8 * diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 98bef39642a9e..33a1919733248 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -3064,7 +3064,7 @@ int hid_check_keys_pressed(struct hid_device *hid) EXPORT_SYMBOL_GPL(hid_check_keys_pressed); #ifdef CONFIG_HID_BPF -static struct hid_ops __hid_ops = { +static const struct hid_ops __hid_ops = { .hid_get_report = hid_get_report, .hid_hw_raw_request = __hid_hw_raw_request, .hid_hw_output_report = __hid_hw_output_report, diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index a6876ab290048..a2e47dbcf82c8 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -78,7 +78,7 @@ struct hid_ops { const struct bus_type *bus_type; }; -extern struct hid_ops *hid_ops; +extern const struct hid_ops *hid_ops; /** * struct hid_bpf_ops - A BPF struct_ops of callbacks allowing to attach HID-BPF From 0b1b0c112437d7547a6588009e08face31b22c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 27 Nov 2024 17:42:47 +0100 Subject: [PATCH 099/366] HID: bpf: drop unneeded casts discarding const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 33c0fb85b571 ("HID: bpf: make part of struct hid_device writable") the const qualifier was dropped from struct hid_bpf_ctx::hid. The casts are now unnecessary. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20241127-hid-bpf-cast-v1-1-f26424960e84@weissschuh.net Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index e8be9ab51d632..2e96ec6a3073d 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -352,7 +352,6 @@ __hid_bpf_hw_check_params(struct hid_bpf_ctx *ctx, __u8 *buf, size_t *buf__sz, { struct hid_report_enum *report_enum; struct hid_report *report; - struct hid_device *hdev; u32 report_len; /* check arguments */ @@ -371,9 +370,7 @@ __hid_bpf_hw_check_params(struct hid_bpf_ctx *ctx, __u8 *buf, size_t *buf__sz, if (*buf__sz < 1) return -EINVAL; - hdev = (struct hid_device *)ctx->hid; /* discard const */ - - report_enum = hdev->report_enum + rtype; + report_enum = ctx->hid->report_enum + rtype; report = hid_ops->hid_get_report(report_enum, buf); if (!report) return -EINVAL; @@ -402,7 +399,6 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, enum hid_report_type rtype, enum hid_class_request reqtype) { struct hid_bpf_ctx_kern *ctx_kern; - struct hid_device *hdev; size_t size = buf__sz; u8 *dma_data; int ret; @@ -429,13 +425,11 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, return -EINVAL; } - hdev = (struct hid_device *)ctx->hid; /* discard const */ - dma_data = kmemdup(buf, size, GFP_KERNEL); if (!dma_data) return -ENOMEM; - ret = hid_ops->hid_hw_raw_request(hdev, + ret = hid_ops->hid_hw_raw_request(ctx->hid, dma_data[0], dma_data, size, @@ -464,7 +458,6 @@ __bpf_kfunc int hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz) { struct hid_bpf_ctx_kern *ctx_kern; - struct hid_device *hdev; size_t size = buf__sz; u8 *dma_data; int ret; @@ -478,13 +471,11 @@ hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz) if (ret) return ret; - hdev = (struct hid_device *)ctx->hid; /* discard const */ - dma_data = kmemdup(buf, size, GFP_KERNEL); if (!dma_data) return -ENOMEM; - ret = hid_ops->hid_hw_output_report(hdev, dma_data, size, (u64)(long)ctx, true); + ret = hid_ops->hid_hw_output_report(ctx->hid, dma_data, size, (u64)(long)ctx, true); kfree(dma_data); return ret; From 05b36b04d74a517d6675bf2f90829ff1ac7e28dc Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 13 Nov 2024 18:16:48 +0100 Subject: [PATCH 100/366] btrfs: fix use-after-free in btrfs_encoded_read_endio() Shinichiro reported the following use-after free that sometimes is happening in our CI system when running fstests' btrfs/284 on a TCMU runner device: BUG: KASAN: slab-use-after-free in lock_release+0x708/0x780 Read of size 8 at addr ffff888106a83f18 by task kworker/u80:6/219 CPU: 8 UID: 0 PID: 219 Comm: kworker/u80:6 Not tainted 6.12.0-rc6-kts+ #15 Hardware name: Supermicro Super Server/X11SPi-TF, BIOS 3.3 02/21/2020 Workqueue: btrfs-endio btrfs_end_bio_work [btrfs] Call Trace: dump_stack_lvl+0x6e/0xa0 ? lock_release+0x708/0x780 print_report+0x174/0x505 ? lock_release+0x708/0x780 ? __virt_addr_valid+0x224/0x410 ? lock_release+0x708/0x780 kasan_report+0xda/0x1b0 ? lock_release+0x708/0x780 ? __wake_up+0x44/0x60 lock_release+0x708/0x780 ? __pfx_lock_release+0x10/0x10 ? __pfx_do_raw_spin_lock+0x10/0x10 ? lock_is_held_type+0x9a/0x110 _raw_spin_unlock_irqrestore+0x1f/0x60 __wake_up+0x44/0x60 btrfs_encoded_read_endio+0x14b/0x190 [btrfs] btrfs_check_read_bio+0x8d9/0x1360 [btrfs] ? lock_release+0x1b0/0x780 ? trace_lock_acquire+0x12f/0x1a0 ? __pfx_btrfs_check_read_bio+0x10/0x10 [btrfs] ? process_one_work+0x7e3/0x1460 ? lock_acquire+0x31/0xc0 ? process_one_work+0x7e3/0x1460 process_one_work+0x85c/0x1460 ? __pfx_process_one_work+0x10/0x10 ? assign_work+0x16c/0x240 worker_thread+0x5e6/0xfc0 ? __pfx_worker_thread+0x10/0x10 kthread+0x2c3/0x3a0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x70 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Allocated by task 3661: kasan_save_stack+0x30/0x50 kasan_save_track+0x14/0x30 __kasan_kmalloc+0xaa/0xb0 btrfs_encoded_read_regular_fill_pages+0x16c/0x6d0 [btrfs] send_extent_data+0xf0f/0x24a0 [btrfs] process_extent+0x48a/0x1830 [btrfs] changed_cb+0x178b/0x2ea0 [btrfs] btrfs_ioctl_send+0x3bf9/0x5c20 [btrfs] _btrfs_ioctl_send+0x117/0x330 [btrfs] btrfs_ioctl+0x184a/0x60a0 [btrfs] __x64_sys_ioctl+0x12e/0x1a0 do_syscall_64+0x95/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Freed by task 3661: kasan_save_stack+0x30/0x50 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x70 __kasan_slab_free+0x4f/0x70 kfree+0x143/0x490 btrfs_encoded_read_regular_fill_pages+0x531/0x6d0 [btrfs] send_extent_data+0xf0f/0x24a0 [btrfs] process_extent+0x48a/0x1830 [btrfs] changed_cb+0x178b/0x2ea0 [btrfs] btrfs_ioctl_send+0x3bf9/0x5c20 [btrfs] _btrfs_ioctl_send+0x117/0x330 [btrfs] btrfs_ioctl+0x184a/0x60a0 [btrfs] __x64_sys_ioctl+0x12e/0x1a0 do_syscall_64+0x95/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e The buggy address belongs to the object at ffff888106a83f00 which belongs to the cache kmalloc-rnd-07-96 of size 96 The buggy address is located 24 bytes inside of freed 96-byte region [ffff888106a83f00, ffff888106a83f60) The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888106a83800 pfn:0x106a83 flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff) page_type: f5(slab) raw: 0017ffffc0000000 ffff888100053680 ffffea0004917200 0000000000000004 raw: ffff888106a83800 0000000080200019 00000001f5000000 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888106a83e00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff888106a83e80: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc >ffff888106a83f00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ^ ffff888106a83f80: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff888106a84000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Further analyzing the trace and the crash dump's vmcore file shows that the wake_up() call in btrfs_encoded_read_endio() is calling wake_up() on the wait_queue that is in the private data passed to the end_io handler. Commit 4ff47df40447 ("btrfs: move priv off stack in btrfs_encoded_read_regular_fill_pages()") moved 'struct btrfs_encoded_read_private' off the stack. Before that commit one can see a corruption of the private data when analyzing the vmcore after a crash: *(struct btrfs_encoded_read_private *)0xffff88815626eec8 = { .wait = (wait_queue_head_t){ .lock = (spinlock_t){ .rlock = (struct raw_spinlock){ .raw_lock = (arch_spinlock_t){ .val = (atomic_t){ .counter = (int)-2005885696, }, .locked = (u8)0, .pending = (u8)157, .locked_pending = (u16)40192, .tail = (u16)34928, }, .magic = (unsigned int)536325682, .owner_cpu = (unsigned int)29, .owner = (void *)__SCT__tp_func_btrfs_transaction_commit+0x0 = 0x0, .dep_map = (struct lockdep_map){ .key = (struct lock_class_key *)0xffff8881575a3b6c, .class_cache = (struct lock_class *[2]){ 0xffff8882a71985c0, 0xffffea00066f5d40 }, .name = (const char *)0xffff88815626f100 = "", .wait_type_outer = (u8)37, .wait_type_inner = (u8)178, .lock_type = (u8)154, }, }, .__padding = (u8 [24]){ 0, 157, 112, 136, 50, 174, 247, 31, 29 }, .dep_map = (struct lockdep_map){ .key = (struct lock_class_key *)0xffff8881575a3b6c, .class_cache = (struct lock_class *[2]){ 0xffff8882a71985c0, 0xffffea00066f5d40 }, .name = (const char *)0xffff88815626f100 = "", .wait_type_outer = (u8)37, .wait_type_inner = (u8)178, .lock_type = (u8)154, }, }, .head = (struct list_head){ .next = (struct list_head *)0x112cca, .prev = (struct list_head *)0x47, }, }, .pending = (atomic_t){ .counter = (int)-1491499288, }, .status = (blk_status_t)130, } Here we can see several indicators of in-memory data corruption, e.g. the large negative atomic values of ->pending or ->wait->lock->rlock->raw_lock->val, as well as the bogus spinlock magic 0x1ff7ae32 (decimal 536325682 above) instead of 0xdead4ead or the bogus pointer values for ->wait->head. To fix this, change atomic_dec_return() to atomic_dec_and_test() to fix the corruption, as atomic_dec_return() is defined as two instructions on x86_64, whereas atomic_dec_and_test() is defined as a single atomic operation. This can lead to a situation where counter value is already decremented but the if statement in btrfs_encoded_read_endio() is not completely processed, i.e. the 0 test has not completed. If another thread continues executing btrfs_encoded_read_regular_fill_pages() the atomic_dec_return() there can see an already updated ->pending counter and continues by freeing the private data. Continuing in the endio handler the test for 0 succeeds and the wait_queue is woken up, resulting in a use-after-free. Reported-by: Shinichiro Kawasaki Suggested-by: Damien Le Moal Fixes: 1881fba89bd5 ("btrfs: add BTRFS_IOC_ENCODED_READ ioctl") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Filipe Manana Reviewed-by: Qu Wenruo Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 22b8e2764619f..fdad1adee1a33 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9089,7 +9089,7 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) */ WRITE_ONCE(priv->status, bbio->bio.bi_status); } - if (atomic_dec_return(&priv->pending) == 0) { + if (atomic_dec_and_test(&priv->pending)) { int err = blk_status_to_errno(READ_ONCE(priv->status)); if (priv->uring_ctx) { From 7d6872ccbd56c310d2b9658ecaeafeda258727b6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 14 Nov 2024 12:11:26 +0000 Subject: [PATCH 101/366] btrfs: fix deadlock between transaction commits and extent locks When running a workload with fsstress and duperemove (generic/561) we can hit a deadlock related to transaction commits and locking extent ranges, as described below. Task A hanging during a transaction commit, waiting for all other writers to complete: [178317.334817] INFO: task fsstress:555623 blocked for more than 120 seconds. [178317.335693] Not tainted 6.12.0-rc6-btrfs-next-179+ #1 [178317.336528] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [178317.337673] task:fsstress state:D stack:0 pid:555623 tgid:555623 ppid:555620 flags:0x00004002 [178317.337679] Call Trace: [178317.337681] [178317.337685] __schedule+0x364/0xbe0 [178317.337691] schedule+0x26/0xa0 [178317.337695] btrfs_commit_transaction+0x5c5/0x1050 [btrfs] [178317.337769] ? start_transaction+0xc4/0x800 [btrfs] [178317.337815] ? __pfx_autoremove_wake_function+0x10/0x10 [178317.337819] btrfs_mksubvol+0x381/0x640 [btrfs] [178317.337878] btrfs_mksnapshot+0x7a/0xb0 [btrfs] [178317.337935] __btrfs_ioctl_snap_create+0x1bb/0x1d0 [btrfs] [178317.337995] btrfs_ioctl_snap_create_v2+0x103/0x130 [btrfs] [178317.338053] btrfs_ioctl+0x29b/0x2a90 [btrfs] [178317.338118] ? kmem_cache_alloc_noprof+0x5f/0x2c0 [178317.338126] ? getname_flags+0x45/0x1f0 [178317.338133] ? _raw_spin_unlock+0x15/0x30 [178317.338145] ? __x64_sys_ioctl+0x88/0xc0 [178317.338149] __x64_sys_ioctl+0x88/0xc0 [178317.338152] do_syscall_64+0x4a/0x110 [178317.338160] entry_SYSCALL_64_after_hwframe+0x76/0x7e [178317.338190] RIP: 0033:0x7f13c28e271b Which corresponds to line 2361 of transaction.c: $ cat -n fs/btrfs/transaction.c (...) 2162 int btrfs_commit_transaction(struct btrfs_trans_handle *trans) 2163 { (...) 2349 spin_lock(&fs_info->trans_lock); 2350 add_pending_snapshot(trans); 2351 cur_trans->state = TRANS_STATE_COMMIT_DOING; 2352 spin_unlock(&fs_info->trans_lock); 2353 2354 /* 2355 * The thread has started/joined the transaction thus it holds the 2356 * lockdep map as a reader. It has to release it before acquiring the 2357 * lockdep map as a writer. 2358 */ 2359 btrfs_lockdep_release(fs_info, btrfs_trans_num_writers); 2360 btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers); 2361 wait_event(cur_trans->writer_wait, 2362 atomic_read(&cur_trans->num_writers) == 1); (...) The transaction is in the TRANS_STATE_COMMIT_DOING state and so it's waiting for all other existing writers to complete and release their transaction handle. Task B is running ordered extent completion and blocked waiting to lock an extent range in an inode's io tree: [178317.327411] INFO: task kworker/u48:8:554545 blocked for more than 120 seconds. [178317.328630] Not tainted 6.12.0-rc6-btrfs-next-179+ #1 [178317.329635] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [178317.330872] task:kworker/u48:8 state:D stack:0 pid:554545 tgid:554545 ppid:2 flags:0x00004000 [178317.330878] Workqueue: btrfs-endio-write btrfs_work_helper [btrfs] [178317.330944] Call Trace: [178317.330945] [178317.330947] __schedule+0x364/0xbe0 [178317.330952] schedule+0x26/0xa0 [178317.330955] __lock_extent+0x337/0x3a0 [btrfs] [178317.331014] ? __pfx_autoremove_wake_function+0x10/0x10 [178317.331017] btrfs_finish_one_ordered+0x47a/0xaa0 [btrfs] [178317.331074] ? psi_group_change+0x132/0x2d0 [178317.331078] btrfs_work_helper+0xbd/0x370 [btrfs] [178317.331140] process_scheduled_works+0xd3/0x460 [178317.331144] ? __pfx_worker_thread+0x10/0x10 [178317.331146] worker_thread+0x121/0x250 [178317.331149] ? __pfx_worker_thread+0x10/0x10 [178317.331151] kthread+0xe9/0x120 [178317.331154] ? __pfx_kthread+0x10/0x10 [178317.331157] ret_from_fork+0x2d/0x50 [178317.331159] ? __pfx_kthread+0x10/0x10 [178317.331162] ret_from_fork_asm+0x1a/0x30 This extent range locking happens after joining the current transaction, so task A is waiting for task B to release its transaction handle (decrementing the transaction's num_writers counter). Task C while doing a fiemap it tries to join the current transaction: [242682.812815] task:pool state:D stack:0 pid:560767 tgid:560724 ppid:555622 flags:0x00004006 [242682.812827] Call Trace: [242682.812856] [242682.812864] __schedule+0x364/0xbe0 [242682.812879] ? _raw_spin_unlock_irqrestore+0x23/0x40 [242682.812897] schedule+0x26/0xa0 [242682.812909] wait_current_trans+0xd6/0x130 [btrfs] [242682.813148] ? __pfx_autoremove_wake_function+0x10/0x10 [242682.813162] start_transaction+0x3d4/0x800 [btrfs] [242682.813399] btrfs_is_data_extent_shared+0xd2/0x440 [btrfs] [242682.813723] fiemap_process_hole+0x2a2/0x300 [btrfs] [242682.813995] extent_fiemap+0x9b8/0xb80 [btrfs] [242682.814249] btrfs_fiemap+0x78/0xc0 [btrfs] [242682.814501] do_vfs_ioctl+0x2db/0xa50 [242682.814519] __x64_sys_ioctl+0x6a/0xc0 [242682.814531] do_syscall_64+0x4a/0x110 [242682.814544] entry_SYSCALL_64_after_hwframe+0x76/0x7e [242682.814556] RIP: 0033:0x7efff595e71b It tries to join the current transaction, but it can't because the transaction is in the TRANS_STATE_COMMIT_DOING state, so join_transaction() returns -EBUSY to start_transaction() and makes it wait for the current transaction to complete. And while it's waiting for the transaction to complete, it's holding an extent range locked in the same inode that task B is operating, which causes a deadlock between these 3 tasks. The extent range for the inode was locked at the start of the fiemap operation, early at extent_fiemap(). In short these tasks deadlock because: 1) Task A is waiting for task B to release its transaction handle; 2) Task B is waiting to lock an extent range for an inode while holding a transaction handle open; 3) Task C is waiting for the current transaction to complete (for task A to finish the transaction commit) while holding the extent range for the inode locked, so task B can't progress and release its transaction handle. This results in an ABBA deadlock involving transaction commits and extent locks. Extent locks are higher level locks, like inode VFS locks, and should always be acquired before joining or starting a transaction, but recently commit 2206265f41e9 ("btrfs: remove code duplication in ordered extent finishing") accidentally changed btrfs_finish_one_ordered() to do the transaction join before locking the extent range. Fix this by making sure that btrfs_finish_one_ordered() always locks the extent before joining a transaction and add an explicit comment about the need for this order. Fixes: 2206265f41e9 ("btrfs: remove code duplication in ordered extent finishing") Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fdad1adee1a33..659a5940a5b2b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3063,6 +3063,19 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent) goto out; } + /* + * If it's a COW write we need to lock the extent range as we will be + * inserting/replacing file extent items and unpinning an extent map. + * This must be taken before joining a transaction, as it's a higher + * level lock (like the inode's VFS lock), otherwise we can run into an + * ABBA deadlock with other tasks (transactions work like a lock, + * depending on their current state). + */ + if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { + clear_bits |= EXTENT_LOCKED; + lock_extent(io_tree, start, end, &cached_state); + } + if (freespace_inode) trans = btrfs_join_transaction_spacecache(root); else @@ -3099,9 +3112,6 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent) goto out; } - clear_bits |= EXTENT_LOCKED; - lock_extent(io_tree, start, end, &cached_state); - if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) compress_type = ordered_extent->compress_type; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { From b188ad7791899da8afe937e439e3086ffddd84a8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 14 Nov 2024 15:38:27 +0000 Subject: [PATCH 102/366] btrfs: sysfs: advertise experimental features only if CONFIG_BTRFS_EXPERIMENTAL=y We are advertising experimental features through sysfs if CONFIG_BTRFS_DEBUG is set, without looking if CONFIG_BTRFS_EXPERIMENTAL is set. This is wrong as it will result in reporting experimental features as supported when CONFIG_BTRFS_EXPERIMENTAL is not set but CONFIG_BTRFS_DEBUG is set. Fix this by checking for CONFIG_BTRFS_EXPERIMENTAL instead of CONFIG_BTRFS_DEBUG. Fixes: 67cd3f221769 ("btrfs: split out CONFIG_BTRFS_EXPERIMENTAL from CONFIG_BTRFS_DEBUG") Reviewed-by: Neal Gompa Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index b843308e2bc6f..fdcbf650ac31f 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -295,7 +295,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(simple_quota, SIMPLE_QUOTA); #ifdef CONFIG_BLK_DEV_ZONED BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED); #endif -#ifdef CONFIG_BTRFS_DEBUG +#ifdef CONFIG_BTRFS_EXPERIMENTAL /* Remove once support for extent tree v2 is feature complete */ BTRFS_FEAT_ATTR_INCOMPAT(extent_tree_v2, EXTENT_TREE_V2); /* Remove once support for raid stripe tree is feature complete. */ @@ -329,7 +329,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = { #ifdef CONFIG_BLK_DEV_ZONED BTRFS_FEAT_ATTR_PTR(zoned), #endif -#ifdef CONFIG_BTRFS_DEBUG +#ifdef CONFIG_BTRFS_EXPERIMENTAL BTRFS_FEAT_ATTR_PTR(extent_tree_v2), BTRFS_FEAT_ATTR_PTR(raid_stripe_tree), #endif From aaa55faa2495320e44bc643a917c701f2cc89ee7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 28 Nov 2024 18:04:22 +0100 Subject: [PATCH 103/366] ALSA: seq: ump: Fix seq port updates per FB info notify update_port_infos() is called when a UMP FB Info update notification is received, and this function is supposed to update the attributes of the corresponding sequencer port. However, the function had a few issues and it brought to the incorrect states. Namely: - It tried to get a wrong sequencer info for the update without correcting the port number with the group-offset 1 - The loop exited immediately when a sequencer port isn't present; this ended up with the truncation if a sequencer port in the middle goes away This patch addresses those bugs. Fixes: 4a16a3af0571 ("ALSA: seq: ump: Handle FB info update") Link: https://patch.msgid.link/20241128170423.23351-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ump_client.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index e5d3f4d206bf6..e956f17f37928 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c @@ -257,12 +257,12 @@ static void update_port_infos(struct seq_ump_client *client) continue; old->addr.client = client->seq_client; - old->addr.port = i; + old->addr.port = ump_group_to_seq_port(i); err = snd_seq_kernel_client_ctl(client->seq_client, SNDRV_SEQ_IOCTL_GET_PORT_INFO, old); if (err < 0) - return; + continue; fill_port_info(new, client, &client->ump->groups[i]); if (old->capability == new->capability && !strcmp(old->name, new->name)) @@ -271,7 +271,7 @@ static void update_port_infos(struct seq_ump_client *client) SNDRV_SEQ_IOCTL_SET_PORT_INFO, new); if (err < 0) - return; + continue; /* notify to system port */ snd_seq_system_client_ev_port_change(client->seq_client, i); } From 3978d53df7236f0a517c2abeb43ddf6ac162cdd8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Nov 2024 10:45:42 +0100 Subject: [PATCH 104/366] ALSA: ump: Don't open legacy substream for an inactive group When a UMP Group is inactive, we shouldn't allow users to access it via the legacy MIDI access. Add the group active flag check and return -ENODEV if it's inactive. Link: https://patch.msgid.link/20241129094546.32119-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/ump.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/ump.c b/sound/core/ump.c index 6d0aac6c763fe..9f9ad106107f7 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1087,6 +1087,8 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream) guard(mutex)(&ump->open_mutex); if (ump->legacy_substreams[dir][group]) return -EBUSY; + if (!ump->groups[group].active) + return -ENODEV; if (dir == SNDRV_RAWMIDI_STREAM_OUTPUT) { if (!ump->legacy_out_opens) { err = snd_rawmidi_kernel_open(&ump->core, 0, From e29e504e7890b9ee438ca6370d0180d607c473f9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Nov 2024 10:45:43 +0100 Subject: [PATCH 105/366] ALSA: ump: Indicate the inactive group in legacy substream names Since the legacy rawmidi has no proper way to know the inactive group, indicate it in the rawmidi substream names with "[Inactive]" suffix when the corresponding UMP group is inactive. Link: https://patch.msgid.link/20241129094546.32119-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/ump.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/core/ump.c b/sound/core/ump.c index 9f9ad106107f7..7b00a957e5489 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1256,8 +1256,9 @@ static void fill_substream_names(struct snd_ump_endpoint *ump, name = ump->groups[idx].name; if (!*name) name = ump->info.name; - snprintf(s->name, sizeof(s->name), "Group %d (%.16s)", - idx + 1, name); + snprintf(s->name, sizeof(s->name), "Group %d (%.16s)%s", + idx + 1, name, + ump->groups[idx].active ? "" : " [Inactive]"); } } From edad3f9519fcacb926d0e3f3217aecaf628a593f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Nov 2024 10:45:44 +0100 Subject: [PATCH 106/366] ALSA: ump: Update legacy substream names upon FB info update The legacy rawmidi substreams should be updated when UMP FB Info or UMP FB Name are received, too. Link: https://patch.msgid.link/20241129094546.32119-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/ump.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/sound/core/ump.c b/sound/core/ump.c index 7b00a957e5489..d2b810eb84bcb 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -37,6 +37,7 @@ static int process_legacy_output(struct snd_ump_endpoint *ump, u32 *buffer, int count); static void process_legacy_input(struct snd_ump_endpoint *ump, const u32 *src, int words); +static void update_legacy_names(struct snd_ump_endpoint *ump); #else static inline int process_legacy_output(struct snd_ump_endpoint *ump, u32 *buffer, int count) @@ -47,6 +48,9 @@ static inline void process_legacy_input(struct snd_ump_endpoint *ump, const u32 *src, int words) { } +static inline void update_legacy_names(struct snd_ump_endpoint *ump) +{ +} #endif static const struct snd_rawmidi_global_ops snd_ump_rawmidi_ops = { @@ -861,6 +865,7 @@ static int ump_handle_fb_info_msg(struct snd_ump_endpoint *ump, fill_fb_info(ump, &fb->info, buf); if (ump->parsed) { snd_ump_update_group_attrs(ump); + update_legacy_names(ump); seq_notify_fb_change(ump, fb); } } @@ -893,6 +898,7 @@ static int ump_handle_fb_name_msg(struct snd_ump_endpoint *ump, /* notify the FB name update to sequencer, too */ if (ret > 0 && ump->parsed) { snd_ump_update_group_attrs(ump); + update_legacy_names(ump); seq_notify_fb_change(ump, fb); } return ret; @@ -1262,6 +1268,14 @@ static void fill_substream_names(struct snd_ump_endpoint *ump, } } +static void update_legacy_names(struct snd_ump_endpoint *ump) +{ + struct snd_rawmidi *rmidi = ump->legacy_rmidi; + + fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_INPUT); + fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT); +} + int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump, char *id, int device) { @@ -1298,10 +1312,7 @@ int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump, rmidi->ops = &snd_ump_legacy_ops; rmidi->private_data = ump; ump->legacy_rmidi = rmidi; - if (input) - fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_INPUT); - if (output) - fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT); + update_legacy_names(ump); ump_dbg(ump, "Created a legacy rawmidi #%d (%s)\n", device, id); return 0; From 947c4012f8f03a8bb946beb6e5294d5e32817d67 Mon Sep 17 00:00:00 2001 From: bo liu Date: Fri, 29 Nov 2024 09:44:41 +0800 Subject: [PATCH 107/366] ALSA: hda/conexant: fix Z60MR100 startup pop issue When Z60MR100 startup, speaker will output a pop. To fix this issue, we mute codec by init verbs in bios when system startup, and set GPIO to low to unmute codec in codec driver when it loaded . [ white space fixes and compile warning fix by tiwai ] Signed-off-by: bo liu Link: https://patch.msgid.link/20241129014441.437205-1-bo.liu@senarytech.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 2e9f817b948eb..538c37a78a56f 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -307,6 +307,7 @@ enum { CXT_FIXUP_HP_MIC_NO_PRESENCE, CXT_PINCFG_SWS_JS201D, CXT_PINCFG_TOP_SPEAKER, + CXT_FIXUP_HP_A_U, }; /* for hda_fixup_thinkpad_acpi() */ @@ -774,6 +775,18 @@ static void cxt_setup_mute_led(struct hda_codec *codec, } } +static void cxt_setup_gpio_unmute(struct hda_codec *codec, + unsigned int gpio_mute_mask) +{ + if (gpio_mute_mask) { + // set gpio data to 0. + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, 0); + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_MASK, gpio_mute_mask); + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DIRECTION, gpio_mute_mask); + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_STICKY_MASK, 0); + } +} + static void cxt_fixup_mute_led_gpio(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -788,6 +801,15 @@ static void cxt_fixup_hp_zbook_mute_led(struct hda_codec *codec, cxt_setup_mute_led(codec, 0x10, 0x20); } +static void cxt_fixup_hp_a_u(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + // Init vers in BIOS mute the spk/hp by set gpio high to avoid pop noise, + // so need to unmute once by clearing the gpio data when runs into the system. + if (action == HDA_FIXUP_ACT_INIT) + cxt_setup_gpio_unmute(codec, 0x2); +} + /* ThinkPad X200 & co with cxt5051 */ static const struct hda_pintbl cxt_pincfg_lenovo_x200[] = { { 0x16, 0x042140ff }, /* HP (seq# overridden) */ @@ -998,6 +1020,10 @@ static const struct hda_fixup cxt_fixups[] = { { } }, }, + [CXT_FIXUP_HP_A_U] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_hp_a_u, + }, }; static const struct hda_quirk cxt5045_fixups[] = { @@ -1072,6 +1098,7 @@ static const struct hda_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), + SND_PCI_QUIRK(0x14f1, 0x0252, "MBX-Z60MR100", CXT_FIXUP_HP_A_U), SND_PCI_QUIRK(0x14f1, 0x0265, "SWS JS201D", CXT_PINCFG_SWS_JS201D), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), @@ -1117,6 +1144,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" }, { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" }, { .id = CXT_PINCFG_TOP_SPEAKER, .name = "sirius-top-speaker" }, + { .id = CXT_FIXUP_HP_A_U, .name = "HP-U-support" }, {} }; From 8e00072c31e28e7de1ffd9b28c773a3143aa4167 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 25 Nov 2024 17:07:18 +0800 Subject: [PATCH 108/366] net: enetc: read TSN capabilities from port register, not SI Configuring TSN (Qbv, Qbu, PSFP) capabilities requires access to port registers, which are available to the PSI but not the VSI. Yet, the SI port capability register 0 (PSICAPR0), exposed to both PSIs and VSIs, presents the same capabilities to the VF as to the PF, thus leading the VF driver into thinking it can configure these features. In the case of ENETC_SI_F_QBU, having it set in the VF leads to a crash: root@ls1028ardb:~# tc qdisc add dev eno0vf0 parent root handle 100: \ mqprio num_tc 4 map 0 0 1 1 2 2 3 3 queues 1@0 1@1 1@2 1@3 hw 1 [ 187.290775] Unable to handle kernel paging request at virtual address 0000000000001f00 [ 187.424831] pc : enetc_mm_commit_preemptible_tcs+0x1c4/0x400 [ 187.430518] lr : enetc_mm_commit_preemptible_tcs+0x30c/0x400 [ 187.511140] Call trace: [ 187.513588] enetc_mm_commit_preemptible_tcs+0x1c4/0x400 [ 187.518918] enetc_setup_tc_mqprio+0x180/0x214 [ 187.523374] enetc_vf_setup_tc+0x1c/0x30 [ 187.527306] mqprio_enable_offload+0x144/0x178 [ 187.531766] mqprio_init+0x3ec/0x668 [ 187.535351] qdisc_create+0x15c/0x488 [ 187.539023] tc_modify_qdisc+0x398/0x73c [ 187.542958] rtnetlink_rcv_msg+0x128/0x378 [ 187.547064] netlink_rcv_skb+0x60/0x130 [ 187.550910] rtnetlink_rcv+0x18/0x24 [ 187.554492] netlink_unicast+0x300/0x36c [ 187.558425] netlink_sendmsg+0x1a8/0x420 [ 187.606759] ---[ end trace 0000000000000000 ]--- while the other TSN features in the VF are harmless, because the net_device_ops used for the VF driver do not expose entry points for these other features. These capability bits are in the process of being defeatured from the SI registers. We should read them from the port capability register, where they are also present, and which is naturally only exposed to the PF. The change to blame (relevant for stable backports) is the one where this started being a problem, aka when the kernel started to crash due to the wrong capability seen by the VF driver. Fixes: 827145392a4a ("net: enetc: only commit preemptible TCs to hardware when MM TX is active") Reported-by: Wei Fang Signed-off-by: Vladimir Oltean Reviewed-by: Frank Li Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 9 --------- .../net/ethernet/freescale/enetc/enetc_hw.h | 6 +++--- .../net/ethernet/freescale/enetc/enetc_pf.c | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 35634c516e266..bece220535a1a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1756,15 +1756,6 @@ void enetc_get_si_caps(struct enetc_si *si) rss = enetc_rd(hw, ENETC_SIRSSCAPR); si->num_rss = ENETC_SIRSSCAPR_GET_NUM_RSS(rss); } - - if (val & ENETC_SIPCAPR0_QBV) - si->hw_features |= ENETC_SI_F_QBV; - - if (val & ENETC_SIPCAPR0_QBU) - si->hw_features |= ENETC_SI_F_QBU; - - if (val & ENETC_SIPCAPR0_PSFP) - si->hw_features |= ENETC_SI_F_PSFP; } EXPORT_SYMBOL_GPL(enetc_get_si_caps); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 7c3285584f8a5..55ba949230ffd 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -23,10 +23,7 @@ #define ENETC_SICTR0 0x18 #define ENETC_SICTR1 0x1c #define ENETC_SIPCAPR0 0x20 -#define ENETC_SIPCAPR0_PSFP BIT(9) #define ENETC_SIPCAPR0_RSS BIT(8) -#define ENETC_SIPCAPR0_QBV BIT(4) -#define ENETC_SIPCAPR0_QBU BIT(3) #define ENETC_SIPCAPR0_RFS BIT(2) #define ENETC_SIPCAPR1 0x24 #define ENETC_SITGTGR 0x30 @@ -194,6 +191,9 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PCAPR0 0x0900 #define ENETC_PCAPR0_RXBDR(val) ((val) >> 24) #define ENETC_PCAPR0_TXBDR(val) (((val) >> 16) & 0xff) +#define ENETC_PCAPR0_PSFP BIT(9) +#define ENETC_PCAPR0_QBV BIT(4) +#define ENETC_PCAPR0_QBU BIT(3) #define ENETC_PCAPR1 0x0904 #define ENETC_PSICFGR0(n) (0x0940 + (n) * 0xc) /* n = SI index */ #define ENETC_PSICFGR0_SET_TXBDR(val) ((val) & 0xff) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index c47b4a743d93b..203862ec11142 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -409,6 +409,23 @@ static void enetc_port_assign_rfs_entries(struct enetc_si *si) enetc_port_wr(hw, ENETC_PRFSMR, ENETC_PRFSMR_RFSE); } +static void enetc_port_get_caps(struct enetc_si *si) +{ + struct enetc_hw *hw = &si->hw; + u32 val; + + val = enetc_port_rd(hw, ENETC_PCAPR0); + + if (val & ENETC_PCAPR0_QBV) + si->hw_features |= ENETC_SI_F_QBV; + + if (val & ENETC_PCAPR0_QBU) + si->hw_features |= ENETC_SI_F_QBU; + + if (val & ENETC_PCAPR0_PSFP) + si->hw_features |= ENETC_SI_F_PSFP; +} + static void enetc_port_si_configure(struct enetc_si *si) { struct enetc_pf *pf = enetc_si_priv(si); @@ -416,6 +433,8 @@ static void enetc_port_si_configure(struct enetc_si *si) int num_rings, i; u32 val; + enetc_port_get_caps(si); + val = enetc_port_rd(hw, ENETC_PCAPR0); num_rings = min(ENETC_PCAPR0_RXBDR(val), ENETC_PCAPR0_TXBDR(val)); From b2420b8c81ec674552d00c55d46245e5c184b260 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 25 Nov 2024 17:07:19 +0800 Subject: [PATCH 109/366] net: enetc: Do not configure preemptible TCs if SIs do not support Both ENETC PF and VF drivers share enetc_setup_tc_mqprio() to configure MQPRIO. And enetc_setup_tc_mqprio() calls enetc_change_preemptible_tcs() to configure preemptible TCs. However, only PF is able to configure preemptible TCs. Because only PF has related registers, while VF does not have these registers. So for VF, its hw->port pointer is NULL. Therefore, VF will access an invalid pointer when accessing a non-existent register, which will cause a crash issue. The simplified log is as follows. root@ls1028ardb:~# tc qdisc add dev eno0vf0 parent root handle 100: \ mqprio num_tc 4 map 0 0 1 1 2 2 3 3 queues 1@0 1@1 1@2 1@3 hw 1 [ 187.290775] Unable to handle kernel paging request at virtual address 0000000000001f00 [ 187.424831] pc : enetc_mm_commit_preemptible_tcs+0x1c4/0x400 [ 187.430518] lr : enetc_mm_commit_preemptible_tcs+0x30c/0x400 [ 187.511140] Call trace: [ 187.513588] enetc_mm_commit_preemptible_tcs+0x1c4/0x400 [ 187.518918] enetc_setup_tc_mqprio+0x180/0x214 [ 187.523374] enetc_vf_setup_tc+0x1c/0x30 [ 187.527306] mqprio_enable_offload+0x144/0x178 [ 187.531766] mqprio_init+0x3ec/0x668 [ 187.535351] qdisc_create+0x15c/0x488 [ 187.539023] tc_modify_qdisc+0x398/0x73c [ 187.542958] rtnetlink_rcv_msg+0x128/0x378 [ 187.547064] netlink_rcv_skb+0x60/0x130 [ 187.550910] rtnetlink_rcv+0x18/0x24 [ 187.554492] netlink_unicast+0x300/0x36c [ 187.558425] netlink_sendmsg+0x1a8/0x420 [ 187.606759] ---[ end trace 0000000000000000 ]--- In addition, some PFs also do not support configuring preemptible TCs, such as eno1 and eno3 on LS1028A. It won't crash like it does for VFs, but we should prevent these PFs from accessing these unimplemented registers. Fixes: 827145392a4a ("net: enetc: only commit preemptible TCs to hardware when MM TX is active") Signed-off-by: Wei Fang Suggested-by: Vladimir Oltean Reviewed-by: Frank Li Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index bece220535a1a..535969fa0fdbe 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -29,6 +29,9 @@ EXPORT_SYMBOL_GPL(enetc_port_mac_wr); static void enetc_change_preemptible_tcs(struct enetc_ndev_priv *priv, u8 preemptible_tcs) { + if (!(priv->si->hw_features & ENETC_SI_F_QBU)) + return; + priv->preemptible_tcs = preemptible_tcs; enetc_mm_commit_preemptible_tcs(priv); } From 4f9d674377d090e38d93360bd4df21b67534d622 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 28 Nov 2024 09:04:16 +0100 Subject: [PATCH 110/366] ALSA: usb-audio: Notify xrun for low-latency mode The low-latency mode of USB-audio driver uses a similar approach like the implicit feedback mode but it has an explicit queuing at the trigger start time. The difference is, however, that no packet will be handled any longer after all queued packets are handled but no enough data is fed. In the case of implicit feedback mode, the capture-side packet handling triggers the re-queuing, and this checks the XRUN. OTOH, in the low-latency mode, it just stops without XRUN notification unless any new action is taken from user-space via ack callback. For example, when you stop the stream in aplay, no XRUN is reported. This patch adds the XRUN check at the packet complete callback in the case all pending URBs are exhausted. Strictly speaking, this state doesn't match really with XRUN; in theory the application may queue immediately after this happens. But such behavior is only for 1-period configuration, which the USB-audio driver doesn't support. So we may conclude that this situation leads certainly to XRUN. A caveat is that the XRUN should be triggered only for the PCM RUNNING state, and not during DRAINING. This additional state check is put in notify_xrun(), too. Fixes: d5f871f89e21 ("ALSA: usb-audio: Improved lowlatency playback support") Reported-by: Leonard Crestez Link: https://lore.kernel.org/25d5b0d8-4efd-4630-9d33-7a9e3fa9dc2b@gmail.com Link: https://patch.msgid.link/20241128080446.1181-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 568099467dbbc..a29f28eb7d0c6 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -403,10 +403,15 @@ static int prepare_inbound_urb(struct snd_usb_endpoint *ep, static void notify_xrun(struct snd_usb_endpoint *ep) { struct snd_usb_substream *data_subs; + struct snd_pcm_substream *psubs; data_subs = READ_ONCE(ep->data_subs); - if (data_subs && data_subs->pcm_substream) - snd_pcm_stop_xrun(data_subs->pcm_substream); + if (!data_subs) + return; + psubs = data_subs->pcm_substream; + if (psubs && psubs->runtime && + psubs->runtime->state == SNDRV_PCM_STATE_RUNNING) + snd_pcm_stop_xrun(psubs); } static struct snd_usb_packet_info * @@ -562,7 +567,10 @@ static void snd_complete_urb(struct urb *urb) push_back_to_ready_list(ep, ctx); clear_bit(ctx->index, &ep->active_mask); snd_usb_queue_pending_output_urbs(ep, false); - atomic_dec(&ep->submitted_urbs); /* decrement at last */ + /* decrement at last, and check xrun */ + if (atomic_dec_and_test(&ep->submitted_urbs) && + !snd_usb_endpoint_implicit_feedback_sink(ep)) + notify_xrun(ep); return; } From 9b5f8ee43e48c25fbe1a10163ec04343d750acd0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 28 Nov 2024 11:49:38 +0100 Subject: [PATCH 111/366] ALSA: sh: Use standard helper for buffer accesses The SH DAC audio driver uses the kmalloc'ed buffer as the main PCM buffer, and the data is transferred via hrtimer callbacks manually from there to the hardware. Meanwhile, some of its code are written as if the buffer is on iomem and use the special helpers for the iomem (e.g. copy_from_iter_toio() or memset_io()). Those are rather useless and the standard helpers should be used. Similarly, the PCM mmap callback is set to a special one with snd_pcm_lib_mmap_iomem, but this is also nonsense, because SH architecture doesn't support this function, hence it leads just to NULL -- the fallback to the standard helper. This patch replaces those special setups with the standard ones. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202411281337.I4M07b7i-lkp@intel.com/ Link: https://patch.msgid.link/20241128104939.13755-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/sh/sh_dac_audio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c index e7b80328f0ef9..a4d07438ad64b 100644 --- a/sound/sh/sh_dac_audio.c +++ b/sound/sh/sh_dac_audio.c @@ -163,7 +163,7 @@ static int snd_sh_dac_pcm_copy(struct snd_pcm_substream *substream, /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - if (copy_from_iter_toio(chip->data_buffer + pos, src, count)) + if (copy_from_iter(chip->data_buffer + pos, src, count) != count) return -EFAULT; chip->buffer_end = chip->data_buffer + pos + count; @@ -182,7 +182,7 @@ static int snd_sh_dac_pcm_silence(struct snd_pcm_substream *substream, /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - memset_io(chip->data_buffer + pos, 0, count); + memset(chip->data_buffer + pos, 0, count); chip->buffer_end = chip->data_buffer + pos + count; if (chip->empty) { @@ -211,7 +211,6 @@ static const struct snd_pcm_ops snd_sh_dac_pcm_ops = { .pointer = snd_sh_dac_pcm_pointer, .copy = snd_sh_dac_pcm_copy, .fill_silence = snd_sh_dac_pcm_silence, - .mmap = snd_pcm_lib_mmap_iomem, }; static int snd_sh_dac_pcm(struct snd_sh_dac *chip, int device) From 8d355b56f29533e0b0db0d9a2de8bdc05ab27375 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 28 Nov 2024 14:27:16 +0100 Subject: [PATCH 112/366] selftests/hid: fix kfunc inclusions with newer bpftool bpftool now embeds the kfuncs definitions directly in the generated vmlinux.h This is great, but because the selftests dir might be compiled with HID_BPF disabled, we have no guarantees to be able to compile the sources with the generated kfuncs. If we have the kfuncs, because we have the `__not_used` hack, the newly defined kfuncs do not match the ones from vmlinux.h and things go wrong. Prevent vmlinux.h to define its kfuncs and also add the missing `__weak` symbols for our custom kfuncs definitions Link: https://patch.msgid.link/20241128-fix-new-bpftool-v1-1-c9abdf94a719@kernel.org Signed-off-by: Benjamin Tissoires --- .../selftests/hid/progs/hid_bpf_helpers.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index e5db897586bbf..531228b849dae 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -22,6 +22,9 @@ #define HID_REQ_SET_IDLE HID_REQ_SET_IDLE___not_used #define HID_REQ_SET_PROTOCOL HID_REQ_SET_PROTOCOL___not_used +/* do not define kfunc through vmlinux.h as this messes up our custom hack */ +#define BPF_NO_KFUNC_PROTOTYPES + #include "vmlinux.h" #undef hid_bpf_ctx @@ -91,31 +94,31 @@ struct hid_bpf_ops { /* following are kfuncs exported by HID for HID-BPF */ extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, - const size_t __sz) __ksym; -extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __ksym; -extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __ksym; + const size_t __sz) __weak __ksym; +extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __weak __ksym; +extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __weak __ksym; extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *data, size_t buf__sz, enum hid_report_type type, - enum hid_class_request reqtype) __ksym; + enum hid_class_request reqtype) __weak __ksym; extern int hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx, - __u8 *buf, size_t buf__sz) __ksym; + __u8 *buf, size_t buf__sz) __weak __ksym; extern int hid_bpf_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, __u8 *data, - size_t buf__sz) __ksym; + size_t buf__sz) __weak __ksym; extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, __u8 *data, - size_t buf__sz) __ksym; + size_t buf__sz) __weak __ksym; /* bpf_wq implementation */ extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, int (callback_fn)(void *map, int *key, void *wq), - unsigned int flags__k, void *aux__ign) __ksym; + unsigned int flags__k, void *aux__ign) __weak __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) From ed67f2a913a4f0fc505db29805c41dd07d3cb356 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 15 Nov 2024 15:46:13 +0000 Subject: [PATCH 113/366] btrfs: don't loop for nowait writes when checking for cross references When checking for delayed refs when verifying if there are cross references for a data extent, we stop if the path has nowait set and we can't try lock the delayed ref head's mutex, returning -EAGAIN with the goal of making a write fallback to a blocking context. However we ignore the -EAGAIN at btrfs_cross_ref_exist() when check_delayed_ref() returns it, and keep looping instead of immediately returning the -EAGAIN to the caller. Fix this by not looping if we get -EAGAIN and we have a nowait path. Fixes: 26ce91144631 ("btrfs: make can_nocow_extent nowait compatible") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 412e318e4a22d..bd09dd3ad1a0a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2422,7 +2422,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, goto out; ret = check_delayed_ref(root, path, objectid, offset, bytenr); - } while (ret == -EAGAIN); + } while (ret == -EAGAIN && !path->nowait); out: btrfs_release_path(path); From 3ed51857a50f530ac7a1482e069dfbd1298558d4 Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Fri, 25 Oct 2024 12:55:53 +0800 Subject: [PATCH 114/366] btrfs: add a sanity check for btrfs root in btrfs_search_slot() Syzbot reports a null-ptr-deref in btrfs_search_slot(). The reproducer is using rescue=ibadroots, and the extent tree root is corrupted thus the extent tree is NULL. When scrub tries to search the extent tree to gather the needed extent info, btrfs_search_slot() doesn't check if the target root is NULL or not, resulting the null-ptr-deref. Add sanity check for btrfs root before using it in btrfs_search_slot(). Reported-by: syzbot+3030e17bd57a73d39bd7@syzkaller.appspotmail.com Fixes: 42437a6386ff ("btrfs: introduce mount option rescue=ignorebadroots") Link: https://syzkaller.appspot.com/bug?extid=3030e17bd57a73d39bd7 CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Qu Wenruo Tested-by: syzbot+3030e17bd57a73d39bd7@syzkaller.appspotmail.com Signed-off-by: Lizhi Xu Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 148648ea1c8b9..693dc27ffb897 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2046,7 +2046,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_info *fs_info; struct extent_buffer *b; int slot; int ret; @@ -2059,6 +2059,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, int min_write_lock_level; int prev_cmp; + if (!root) + return -EINVAL; + + fs_info = root->fs_info; might_sleep(); lowest_level = p->lowest_level; From 7c4e39f9d2af4abaf82ca0e315d1fd340456620f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 15 Nov 2024 11:29:21 +0000 Subject: [PATCH 115/366] btrfs: ref-verify: fix use-after-free after invalid ref action At btrfs_ref_tree_mod() after we successfully inserted the new ref entry (local variable 'ref') into the respective block entry's rbtree (local variable 'be'), if we find an unexpected action of BTRFS_DROP_DELAYED_REF, we error out and free the ref entry without removing it from the block entry's rbtree. Then in the error path of btrfs_ref_tree_mod() we call btrfs_free_ref_cache(), which iterates over all block entries and then calls free_block_entry() for each one, and there we will trigger a use-after-free when we are called against the block entry to which we added the freed ref entry to its rbtree, since the rbtree still points to the block entry, as we didn't remove it from the rbtree before freeing it in the error path at btrfs_ref_tree_mod(). Fix this by removing the new ref entry from the rbtree before freeing it. Syzbot report this with the following stack traces: BTRFS error (device loop0 state EA): Ref action 2, root 5, ref_root 0, parent 8564736, owner 0, offset 0, num_refs 18446744073709551615 __btrfs_mod_ref+0x7dd/0xac0 fs/btrfs/extent-tree.c:2523 update_ref_for_cow+0x9cd/0x11f0 fs/btrfs/ctree.c:512 btrfs_force_cow_block+0x9f6/0x1da0 fs/btrfs/ctree.c:594 btrfs_cow_block+0x35e/0xa40 fs/btrfs/ctree.c:754 btrfs_search_slot+0xbdd/0x30d0 fs/btrfs/ctree.c:2116 btrfs_insert_empty_items+0x9c/0x1a0 fs/btrfs/ctree.c:4314 btrfs_insert_empty_item fs/btrfs/ctree.h:669 [inline] btrfs_insert_orphan_item+0x1f1/0x320 fs/btrfs/orphan.c:23 btrfs_orphan_add+0x6d/0x1a0 fs/btrfs/inode.c:3482 btrfs_unlink+0x267/0x350 fs/btrfs/inode.c:4293 vfs_unlink+0x365/0x650 fs/namei.c:4469 do_unlinkat+0x4ae/0x830 fs/namei.c:4533 __do_sys_unlinkat fs/namei.c:4576 [inline] __se_sys_unlinkat fs/namei.c:4569 [inline] __x64_sys_unlinkat+0xcc/0xf0 fs/namei.c:4569 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f BTRFS error (device loop0 state EA): Ref action 1, root 5, ref_root 5, parent 0, owner 260, offset 0, num_refs 1 __btrfs_mod_ref+0x76b/0xac0 fs/btrfs/extent-tree.c:2521 update_ref_for_cow+0x96a/0x11f0 btrfs_force_cow_block+0x9f6/0x1da0 fs/btrfs/ctree.c:594 btrfs_cow_block+0x35e/0xa40 fs/btrfs/ctree.c:754 btrfs_search_slot+0xbdd/0x30d0 fs/btrfs/ctree.c:2116 btrfs_lookup_inode+0xdc/0x480 fs/btrfs/inode-item.c:411 __btrfs_update_delayed_inode+0x1e7/0xb90 fs/btrfs/delayed-inode.c:1030 btrfs_update_delayed_inode fs/btrfs/delayed-inode.c:1114 [inline] __btrfs_commit_inode_delayed_items+0x2318/0x24a0 fs/btrfs/delayed-inode.c:1137 __btrfs_run_delayed_items+0x213/0x490 fs/btrfs/delayed-inode.c:1171 btrfs_commit_transaction+0x8a8/0x3740 fs/btrfs/transaction.c:2313 prepare_to_relocate+0x3c4/0x4c0 fs/btrfs/relocation.c:3586 relocate_block_group+0x16c/0xd40 fs/btrfs/relocation.c:3611 btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4081 btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3377 __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4161 btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4538 BTRFS error (device loop0 state EA): Ref action 2, root 5, ref_root 0, parent 8564736, owner 0, offset 0, num_refs 18446744073709551615 __btrfs_mod_ref+0x7dd/0xac0 fs/btrfs/extent-tree.c:2523 update_ref_for_cow+0x9cd/0x11f0 fs/btrfs/ctree.c:512 btrfs_force_cow_block+0x9f6/0x1da0 fs/btrfs/ctree.c:594 btrfs_cow_block+0x35e/0xa40 fs/btrfs/ctree.c:754 btrfs_search_slot+0xbdd/0x30d0 fs/btrfs/ctree.c:2116 btrfs_lookup_inode+0xdc/0x480 fs/btrfs/inode-item.c:411 __btrfs_update_delayed_inode+0x1e7/0xb90 fs/btrfs/delayed-inode.c:1030 btrfs_update_delayed_inode fs/btrfs/delayed-inode.c:1114 [inline] __btrfs_commit_inode_delayed_items+0x2318/0x24a0 fs/btrfs/delayed-inode.c:1137 __btrfs_run_delayed_items+0x213/0x490 fs/btrfs/delayed-inode.c:1171 btrfs_commit_transaction+0x8a8/0x3740 fs/btrfs/transaction.c:2313 prepare_to_relocate+0x3c4/0x4c0 fs/btrfs/relocation.c:3586 relocate_block_group+0x16c/0xd40 fs/btrfs/relocation.c:3611 btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4081 btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3377 __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4161 btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4538 ================================================================== BUG: KASAN: slab-use-after-free in rb_first+0x69/0x70 lib/rbtree.c:473 Read of size 8 at addr ffff888042d1af38 by task syz.0.0/5329 CPU: 0 UID: 0 PID: 5329 Comm: syz.0.0 Not tainted 6.12.0-rc7-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 rb_first+0x69/0x70 lib/rbtree.c:473 free_block_entry+0x78/0x230 fs/btrfs/ref-verify.c:248 btrfs_free_ref_cache+0xa3/0x100 fs/btrfs/ref-verify.c:917 btrfs_ref_tree_mod+0x139f/0x15e0 fs/btrfs/ref-verify.c:898 btrfs_free_extent+0x33c/0x380 fs/btrfs/extent-tree.c:3544 __btrfs_mod_ref+0x7dd/0xac0 fs/btrfs/extent-tree.c:2523 update_ref_for_cow+0x9cd/0x11f0 fs/btrfs/ctree.c:512 btrfs_force_cow_block+0x9f6/0x1da0 fs/btrfs/ctree.c:594 btrfs_cow_block+0x35e/0xa40 fs/btrfs/ctree.c:754 btrfs_search_slot+0xbdd/0x30d0 fs/btrfs/ctree.c:2116 btrfs_lookup_inode+0xdc/0x480 fs/btrfs/inode-item.c:411 __btrfs_update_delayed_inode+0x1e7/0xb90 fs/btrfs/delayed-inode.c:1030 btrfs_update_delayed_inode fs/btrfs/delayed-inode.c:1114 [inline] __btrfs_commit_inode_delayed_items+0x2318/0x24a0 fs/btrfs/delayed-inode.c:1137 __btrfs_run_delayed_items+0x213/0x490 fs/btrfs/delayed-inode.c:1171 btrfs_commit_transaction+0x8a8/0x3740 fs/btrfs/transaction.c:2313 prepare_to_relocate+0x3c4/0x4c0 fs/btrfs/relocation.c:3586 relocate_block_group+0x16c/0xd40 fs/btrfs/relocation.c:3611 btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4081 btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3377 __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4161 btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4538 btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3673 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl+0xf9/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f996df7e719 RSP: 002b:00007f996ede7038 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f996e135f80 RCX: 00007f996df7e719 RDX: 0000000020000180 RSI: 00000000c4009420 RDI: 0000000000000004 RBP: 00007f996dff139e R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f996e135f80 R15: 00007fff79f32e68 Allocated by task 5329: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:257 [inline] __kmalloc_cache_noprof+0x19c/0x2c0 mm/slub.c:4295 kmalloc_noprof include/linux/slab.h:878 [inline] kzalloc_noprof include/linux/slab.h:1014 [inline] btrfs_ref_tree_mod+0x264/0x15e0 fs/btrfs/ref-verify.c:701 btrfs_free_extent+0x33c/0x380 fs/btrfs/extent-tree.c:3544 __btrfs_mod_ref+0x7dd/0xac0 fs/btrfs/extent-tree.c:2523 update_ref_for_cow+0x9cd/0x11f0 fs/btrfs/ctree.c:512 btrfs_force_cow_block+0x9f6/0x1da0 fs/btrfs/ctree.c:594 btrfs_cow_block+0x35e/0xa40 fs/btrfs/ctree.c:754 btrfs_search_slot+0xbdd/0x30d0 fs/btrfs/ctree.c:2116 btrfs_lookup_inode+0xdc/0x480 fs/btrfs/inode-item.c:411 __btrfs_update_delayed_inode+0x1e7/0xb90 fs/btrfs/delayed-inode.c:1030 btrfs_update_delayed_inode fs/btrfs/delayed-inode.c:1114 [inline] __btrfs_commit_inode_delayed_items+0x2318/0x24a0 fs/btrfs/delayed-inode.c:1137 __btrfs_run_delayed_items+0x213/0x490 fs/btrfs/delayed-inode.c:1171 btrfs_commit_transaction+0x8a8/0x3740 fs/btrfs/transaction.c:2313 prepare_to_relocate+0x3c4/0x4c0 fs/btrfs/relocation.c:3586 relocate_block_group+0x16c/0xd40 fs/btrfs/relocation.c:3611 btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4081 btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3377 __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4161 btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4538 btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3673 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl+0xf9/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 5329: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:579 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x59/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:230 [inline] slab_free_hook mm/slub.c:2342 [inline] slab_free mm/slub.c:4579 [inline] kfree+0x1a0/0x440 mm/slub.c:4727 btrfs_ref_tree_mod+0x136c/0x15e0 btrfs_free_extent+0x33c/0x380 fs/btrfs/extent-tree.c:3544 __btrfs_mod_ref+0x7dd/0xac0 fs/btrfs/extent-tree.c:2523 update_ref_for_cow+0x9cd/0x11f0 fs/btrfs/ctree.c:512 btrfs_force_cow_block+0x9f6/0x1da0 fs/btrfs/ctree.c:594 btrfs_cow_block+0x35e/0xa40 fs/btrfs/ctree.c:754 btrfs_search_slot+0xbdd/0x30d0 fs/btrfs/ctree.c:2116 btrfs_lookup_inode+0xdc/0x480 fs/btrfs/inode-item.c:411 __btrfs_update_delayed_inode+0x1e7/0xb90 fs/btrfs/delayed-inode.c:1030 btrfs_update_delayed_inode fs/btrfs/delayed-inode.c:1114 [inline] __btrfs_commit_inode_delayed_items+0x2318/0x24a0 fs/btrfs/delayed-inode.c:1137 __btrfs_run_delayed_items+0x213/0x490 fs/btrfs/delayed-inode.c:1171 btrfs_commit_transaction+0x8a8/0x3740 fs/btrfs/transaction.c:2313 prepare_to_relocate+0x3c4/0x4c0 fs/btrfs/relocation.c:3586 relocate_block_group+0x16c/0xd40 fs/btrfs/relocation.c:3611 btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4081 btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3377 __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4161 btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4538 btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3673 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl+0xf9/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f The buggy address belongs to the object at ffff888042d1af00 which belongs to the cache kmalloc-64 of size 64 The buggy address is located 56 bytes inside of freed 64-byte region [ffff888042d1af00, ffff888042d1af40) The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x42d1a anon flags: 0x4fff00000000000(node=1|zone=1|lastcpupid=0x7ff) page_type: f5(slab) raw: 04fff00000000000 ffff88801ac418c0 0000000000000000 dead000000000001 raw: 0000000000000000 0000000000200020 00000001f5000000 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x52c40(GFP_NOFS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP), pid 5055, tgid 5055 (dhcpcd-run-hook), ts 40377240074, free_ts 40376848335 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1541 prep_new_page mm/page_alloc.c:1549 [inline] get_page_from_freelist+0x3649/0x3790 mm/page_alloc.c:3459 __alloc_pages_noprof+0x292/0x710 mm/page_alloc.c:4735 alloc_pages_mpol_noprof+0x3e8/0x680 mm/mempolicy.c:2265 alloc_slab_page+0x6a/0x140 mm/slub.c:2412 allocate_slab+0x5a/0x2f0 mm/slub.c:2578 new_slab mm/slub.c:2631 [inline] ___slab_alloc+0xcd1/0x14b0 mm/slub.c:3818 __slab_alloc+0x58/0xa0 mm/slub.c:3908 __slab_alloc_node mm/slub.c:3961 [inline] slab_alloc_node mm/slub.c:4122 [inline] __do_kmalloc_node mm/slub.c:4263 [inline] __kmalloc_noprof+0x25a/0x400 mm/slub.c:4276 kmalloc_noprof include/linux/slab.h:882 [inline] kzalloc_noprof include/linux/slab.h:1014 [inline] tomoyo_encode2 security/tomoyo/realpath.c:45 [inline] tomoyo_encode+0x26f/0x540 security/tomoyo/realpath.c:80 tomoyo_realpath_from_path+0x59e/0x5e0 security/tomoyo/realpath.c:283 tomoyo_get_realpath security/tomoyo/file.c:151 [inline] tomoyo_check_open_permission+0x255/0x500 security/tomoyo/file.c:771 security_file_open+0x777/0x990 security/security.c:3109 do_dentry_open+0x369/0x1460 fs/open.c:945 vfs_open+0x3e/0x330 fs/open.c:1088 do_open fs/namei.c:3774 [inline] path_openat+0x2c84/0x3590 fs/namei.c:3933 page last free pid 5055 tgid 5055 stack trace: reset_page_owner include/linux/page_owner.h:25 [inline] free_pages_prepare mm/page_alloc.c:1112 [inline] free_unref_page+0xcfb/0xf20 mm/page_alloc.c:2642 free_pipe_info+0x300/0x390 fs/pipe.c:860 put_pipe_info fs/pipe.c:719 [inline] pipe_release+0x245/0x320 fs/pipe.c:742 __fput+0x23f/0x880 fs/file_table.c:431 __do_sys_close fs/open.c:1567 [inline] __se_sys_close fs/open.c:1552 [inline] __x64_sys_close+0x7f/0x110 fs/open.c:1552 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Memory state around the buggy address: ffff888042d1ae00: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff888042d1ae80: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc >ffff888042d1af00: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ^ ffff888042d1af80: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc ffff888042d1b000: 00 00 00 00 00 fc fc 00 00 00 00 00 fc fc 00 00 Reported-by: syzbot+7325f164162e200000c1@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/673723eb.050a0220.1324f8.00a8.GAE@google.com/T/#u Fixes: fd708b81d972 ("Btrfs: add a extent ref verify tool") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ref-verify.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 9522a8b79d22b..2928abf7eb827 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -857,6 +857,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, "dropping a ref for a root that doesn't have a ref on the block"); dump_block_entry(fs_info, be); dump_ref_action(fs_info, ra); + rb_erase(&ref->node, &be->refs); kfree(ref); kfree(ra); goto out_unlock; From 22d2e48e318564f8c9b09faf03ecb4f03fb44dd5 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Fri, 15 Nov 2024 15:49:17 +0000 Subject: [PATCH 116/366] btrfs: fix lockdep warnings on io_uring encoded reads Lockdep doesn't like the fact that btrfs_uring_read_extent() returns to userspace still holding the inode lock, even though we release it once the I/O finishes. Add calls to rwsem_release() and rwsem_acquire_read() to work round this. Reported-by: Johannes Thumshirn 34310c442e17 ("btrfs: add io_uring command for encoded reads (ENCODED_READ ioctl)") Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 10 ++++++++++ fs/btrfs/locking.h | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1fdeb216bf6c5..f8680e7cc974f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4752,6 +4752,9 @@ static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss size_t page_offset; ssize_t ret; + /* The inode lock has already been acquired in btrfs_uring_read_extent. */ + btrfs_lockdep_inode_acquire(inode, i_rwsem); + if (priv->err) { ret = priv->err; goto out; @@ -4860,6 +4863,13 @@ static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter, * and inode and freeing the allocations. */ + /* + * We're returning to userspace with the inode lock held, and that's + * okay - it'll get unlocked in a worker thread. Call + * btrfs_lockdep_inode_release() to avoid confusing lockdep. + */ + btrfs_lockdep_inode_release(inode, i_rwsem); + return -EIOCBQUEUED; out_fail: diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 46c8be2afab1c..35036b151bf5b 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -128,6 +128,16 @@ enum btrfs_lockdep_trans_states { #define btrfs_lockdep_release(owner, lock) \ rwsem_release(&owner->lock##_map, _THIS_IP_) +/* + * Used to account for the fact that when doing io_uring encoded I/O, we can + * return to userspace with the inode lock still held. + */ +#define btrfs_lockdep_inode_acquire(owner, lock) \ + rwsem_acquire_read(&owner->vfs_inode.lock.dep_map, 0, 0, _THIS_IP_) + +#define btrfs_lockdep_inode_release(owner, lock) \ + rwsem_release(&owner->vfs_inode.lock.dep_map, _THIS_IP_) + /* * Macros for the transaction states wait events, similar to the generic wait * event macros. From 537a2525eaf76ea9b0dca62b994500d8670b39d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 27 Nov 2024 11:17:46 +0100 Subject: [PATCH 117/366] tools: Override makefile ARCH variable if defined, but empty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a number of tools (bpftool, selftests), that require a "bootstrap" build. Here, a bootstrap build is a build host variant of a target. E.g., assume that you're performing a bpftool cross-build on x86 to riscv, a bootstrap build would then be an x86 variant of bpftool. The typical way to perform the host build variant, is to pass "ARCH=" in a sub-make. However, if a variable has been set with a command argument, then ordinary assignments in the makefile are ignored. This side-effect results in that ARCH, and variables depending on ARCH are not set. Workaround by overriding ARCH to the host arch, if ARCH is empty. Fixes: 8859b0da5aac ("tools/bpftool: Fix cross-build") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Tested-by: Alexandre Ghiti Reviewed-by: Jean-Philippe Brucker Reviewed-by: Namhyung Kim Reviewed-by: Toke Høiland-Jørgensen Acked-by: Quentin Monnet Acked-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/bpf/20241127101748.165693-1-bjorn@kernel.org --- tools/scripts/Makefile.arch | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch index f6a50f06dfc45..eabfe9f411d91 100644 --- a/tools/scripts/Makefile.arch +++ b/tools/scripts/Makefile.arch @@ -7,8 +7,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ -e s/riscv.*/riscv/ -e s/loongarch.*/loongarch/) -ifndef ARCH -ARCH := $(HOSTARCH) +ifeq ($(strip $(ARCH)),) +override ARCH := $(HOSTARCH) endif SRCARCH := $(ARCH) From ed990c07af70d286f5736021c6e25d8df6f2f7b0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 30 Nov 2024 10:00:08 +0100 Subject: [PATCH 118/366] ALSA: ump: Shut up truncated string warning The recent change for the legacy substream name update brought a compile warning for some compilers due to the nature of snprintf(). Use scnprintf() to shut up the warning since the truncation is intentional. Fixes: e29e504e7890 ("ALSA: ump: Indicate the inactive group in legacy substream names") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202411300103.FrGuTAYp-lkp@intel.com/ Link: https://patch.msgid.link/20241130090009.19849-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/ump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/ump.c b/sound/core/ump.c index d2b810eb84bcb..fe4d39ae11593 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1262,9 +1262,9 @@ static void fill_substream_names(struct snd_ump_endpoint *ump, name = ump->groups[idx].name; if (!*name) name = ump->info.name; - snprintf(s->name, sizeof(s->name), "Group %d (%.16s)%s", - idx + 1, name, - ump->groups[idx].active ? "" : " [Inactive]"); + scnprintf(s->name, sizeof(s->name), "Group %d (%.16s)%s", + idx + 1, name, + ump->groups[idx].active ? "" : " [Inactive]"); } } From 0a4cc4accf00b49b4728bb7639cb90a6a5b674e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Nov 2024 09:30:39 +0000 Subject: [PATCH 119/366] tcp: populate XPS related fields of timewait sockets syzbot reported that netdev_core_pick_tx() was reading an uninitialized field [1]. This is indeed hapening for timewait sockets after recent commits. We can copy the original established socket sk_tx_queue_mapping and sk_rx_queue_mapping fields, instead of adding more checks in fast paths. As a bonus, packets will use the same transmit queue than prior ones, this potentially can avoid reordering. [1] BUG: KMSAN: uninit-value in netdev_pick_tx+0x5c7/0x1550 netdev_pick_tx+0x5c7/0x1550 netdev_core_pick_tx+0x1d2/0x4a0 net/core/dev.c:4312 __dev_queue_xmit+0x128a/0x57d0 net/core/dev.c:4394 dev_queue_xmit include/linux/netdevice.h:3168 [inline] neigh_hh_output include/net/neighbour.h:523 [inline] neigh_output include/net/neighbour.h:537 [inline] ip_finish_output2+0x187c/0x1b70 net/ipv4/ip_output.c:236 __ip_finish_output+0x287/0x810 ip_finish_output+0x4b/0x600 net/ipv4/ip_output.c:324 NF_HOOK_COND include/linux/netfilter.h:303 [inline] ip_output+0x15f/0x3f0 net/ipv4/ip_output.c:434 dst_output include/net/dst.h:450 [inline] ip_local_out net/ipv4/ip_output.c:130 [inline] ip_send_skb net/ipv4/ip_output.c:1505 [inline] ip_push_pending_frames+0x444/0x570 net/ipv4/ip_output.c:1525 ip_send_unicast_reply+0x18c1/0x1b30 net/ipv4/ip_output.c:1672 tcp_v4_send_reset+0x238d/0x2a40 net/ipv4/tcp_ipv4.c:910 tcp_v4_rcv+0x48f8/0x5750 net/ipv4/tcp_ipv4.c:2431 ip_protocol_deliver_rcu+0x2a3/0x13d0 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x336/0x500 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:578 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:628 [inline] ip_sublist_rcv+0x15f3/0x17f0 net/ipv4/ip_input.c:636 ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:670 __netif_receive_skb_list_ptype net/core/dev.c:5715 [inline] __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5762 __netif_receive_skb_list net/core/dev.c:5814 [inline] netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:5905 gro_normal_list include/net/gro.h:515 [inline] napi_complete_done+0x3d4/0x810 net/core/dev.c:6256 virtqueue_napi_complete drivers/net/virtio_net.c:758 [inline] virtnet_poll+0x5d80/0x6bf0 drivers/net/virtio_net.c:3013 __napi_poll+0xe7/0x980 net/core/dev.c:6877 napi_poll net/core/dev.c:6946 [inline] net_rx_action+0xa5a/0x19b0 net/core/dev.c:7068 handle_softirqs+0x1a0/0x7c0 kernel/softirq.c:554 __do_softirq kernel/softirq.c:588 [inline] invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu+0x68/0x180 kernel/softirq.c:655 irq_exit_rcu+0x12/0x20 kernel/softirq.c:671 common_interrupt+0x97/0xb0 arch/x86/kernel/irq.c:278 asm_common_interrupt+0x2b/0x40 arch/x86/include/asm/idtentry.h:693 __preempt_count_sub arch/x86/include/asm/preempt.h:84 [inline] kmsan_virt_addr_valid arch/x86/include/asm/kmsan.h:95 [inline] virt_to_page_or_null+0xfb/0x150 mm/kmsan/shadow.c:75 kmsan_get_metadata+0x13e/0x1c0 mm/kmsan/shadow.c:141 kmsan_get_shadow_origin_ptr+0x4d/0xb0 mm/kmsan/shadow.c:102 get_shadow_origin_ptr mm/kmsan/instrumentation.c:38 [inline] __msan_metadata_ptr_for_store_4+0x27/0x40 mm/kmsan/instrumentation.c:93 rcu_preempt_read_enter kernel/rcu/tree_plugin.h:390 [inline] __rcu_read_lock+0x46/0x70 kernel/rcu/tree_plugin.h:413 rcu_read_lock include/linux/rcupdate.h:847 [inline] batadv_nc_purge_orig_hash net/batman-adv/network-coding.c:408 [inline] batadv_nc_worker+0x114/0x19e0 net/batman-adv/network-coding.c:719 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xae0/0x1c40 kernel/workqueue.c:3310 worker_thread+0xea7/0x14f0 kernel/workqueue.c:3391 kthread+0x3e2/0x540 kernel/kthread.c:389 ret_from_fork+0x6d/0x90 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Uninit was created at: __alloc_pages_noprof+0x9a7/0xe00 mm/page_alloc.c:4774 alloc_pages_mpol_noprof+0x299/0x990 mm/mempolicy.c:2265 alloc_pages_noprof+0x1bf/0x1e0 mm/mempolicy.c:2344 alloc_slab_page mm/slub.c:2412 [inline] allocate_slab+0x320/0x12e0 mm/slub.c:2578 new_slab mm/slub.c:2631 [inline] ___slab_alloc+0x12ef/0x35e0 mm/slub.c:3818 __slab_alloc mm/slub.c:3908 [inline] __slab_alloc_node mm/slub.c:3961 [inline] slab_alloc_node mm/slub.c:4122 [inline] kmem_cache_alloc_noprof+0x57a/0xb20 mm/slub.c:4141 inet_twsk_alloc+0x11f/0x9d0 net/ipv4/inet_timewait_sock.c:188 tcp_time_wait+0x83/0xf50 net/ipv4/tcp_minisocks.c:309 tcp_rcv_state_process+0x145a/0x49d0 tcp_v4_do_rcv+0xbf9/0x11a0 net/ipv4/tcp_ipv4.c:1939 tcp_v4_rcv+0x51df/0x5750 net/ipv4/tcp_ipv4.c:2351 ip_protocol_deliver_rcu+0x2a3/0x13d0 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x336/0x500 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:578 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:628 [inline] ip_sublist_rcv+0x15f3/0x17f0 net/ipv4/ip_input.c:636 ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:670 __netif_receive_skb_list_ptype net/core/dev.c:5715 [inline] __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5762 __netif_receive_skb_list net/core/dev.c:5814 [inline] netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:5905 gro_normal_list include/net/gro.h:515 [inline] napi_complete_done+0x3d4/0x810 net/core/dev.c:6256 virtqueue_napi_complete drivers/net/virtio_net.c:758 [inline] virtnet_poll+0x5d80/0x6bf0 drivers/net/virtio_net.c:3013 __napi_poll+0xe7/0x980 net/core/dev.c:6877 napi_poll net/core/dev.c:6946 [inline] net_rx_action+0xa5a/0x19b0 net/core/dev.c:7068 handle_softirqs+0x1a0/0x7c0 kernel/softirq.c:554 __do_softirq kernel/softirq.c:588 [inline] invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu+0x68/0x180 kernel/softirq.c:655 irq_exit_rcu+0x12/0x20 kernel/softirq.c:671 common_interrupt+0x97/0xb0 arch/x86/kernel/irq.c:278 asm_common_interrupt+0x2b/0x40 arch/x86/include/asm/idtentry.h:693 CPU: 0 UID: 0 PID: 3962 Comm: kworker/u8:18 Not tainted 6.12.0-syzkaller-09073-g9f16d5e6f220 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Workqueue: bat_events batadv_nc_worker Fixes: 79636038d37e ("ipv4: tcp: give socket pointer to control skbs") Fixes: 507a96737d99 ("ipv6: tcp: give socket pointer to control skbs") Reported-by: syzbot+8b0959fc16551d55896b@syzkaller.appspotmail.com Link: https://lore.kernel.org/netdev/674442bd.050a0220.1cc393.0072.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Brian Vazquez Link: https://patch.msgid.link/20241125093039.3095790-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_timewait_sock.h | 2 ++ net/ipv4/tcp_minisocks.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index beb533a0e8809..62c0a7e65d6bd 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -45,6 +45,8 @@ struct inet_timewait_sock { #define tw_node __tw_common.skc_nulls_node #define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt +#define tw_tx_queue_mapping __tw_common.skc_tx_queue_mapping +#define tw_rx_queue_mapping __tw_common.skc_rx_queue_mapping #define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot #define tw_net __tw_common.skc_net diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index bb1fe1ba867ac..7121d8573928c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -326,6 +326,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_last_oow_ack_time = 0; tcptw->tw_tx_delay = tp->tcp_tx_delay; tw->tw_txhash = sk->sk_txhash; + tw->tw_tx_queue_mapping = sk->sk_tx_queue_mapping; +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING + tw->tw_rx_queue_mapping = sk->sk_rx_queue_mapping; +#endif #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); From 98337d7c87577ded71114f6976edb70a163e27bc Mon Sep 17 00:00:00 2001 From: Ajay Kaher Date: Mon, 25 Nov 2024 10:59:54 +0000 Subject: [PATCH 120/366] ptp: Add error handling for adjfine callback in ptp_clock_adjtime ptp_clock_adjtime sets ptp->dialed_frequency even when adjfine callback returns an error. This causes subsequent reads to return an incorrect value. Fix this by adding error check before ptp->dialed_frequency is set. Fixes: 39a8cbd9ca05 ("ptp: remember the adjusted frequency") Signed-off-by: Ajay Kaher Acked-by: Richard Cochran Link: https://patch.msgid.link/20241125105954.1509971-1-ajay.kaher@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_clock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index c56cd0f63909a..77a36e7bddd54 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -150,7 +150,8 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) if (ppb > ops->max_adj || ppb < -ops->max_adj) return -ERANGE; err = ops->adjfine(ops, tx->freq); - ptp->dialed_frequency = tx->freq; + if (!err) + ptp->dialed_frequency = tx->freq; } else if (tx->modes & ADJ_OFFSET) { if (ops->adjphase) { s32 max_phase_adj = ops->getmaxphase(ops); From 1596a135e3180c92e42dd1fbcad321f4fb3e3b17 Mon Sep 17 00:00:00 2001 From: Martin Ottens Date: Mon, 25 Nov 2024 18:46:07 +0100 Subject: [PATCH 121/366] net/sched: tbf: correct backlog statistic for GSO packets When the length of a GSO packet in the tbf qdisc is larger than the burst size configured the packet will be segmented by the tbf_segment function. Whenever this function is used to enqueue SKBs, the backlog statistic of the tbf is not increased correctly. This can lead to underflows of the 'backlog' byte-statistic value when these packets are dequeued from tbf. Reproduce the bug: Ensure that the sender machine has GSO enabled. Configured the tbf on the outgoing interface of the machine as follows (burstsize = 1 MTU): $ tc qdisc add dev root handle 1: tbf rate 50Mbit burst 1514 latency 50ms Send bulk TCP traffic out via this interface, e.g., by running an iPerf3 client on this machine. Check the qdisc statistics: $ tc -s qdisc show dev The 'backlog' byte-statistic has incorrect values while traffic is transferred, e.g., high values due to u32 underflows. When the transfer is stopped, the value is != 0, which should never happen. This patch fixes this bug by updating the statistics correctly, even if single SKBs of a GSO SKB cannot be enqueued. Fixes: e43ac79a4bc6 ("sch_tbf: segment too big GSO packets") Signed-off-by: Martin Ottens Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241125174608.1484356-1-martin.ottens@fau.de Signed-off-by: Jakub Kicinski --- net/sched/sch_tbf.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index f1d09183ae632..dc26b22d53c73 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -208,7 +208,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); - unsigned int len = 0, prev_len = qdisc_pkt_len(skb); + unsigned int len = 0, prev_len = qdisc_pkt_len(skb), seg_len; int ret, nb; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); @@ -219,21 +219,27 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, nb = 0; skb_list_walk_safe(segs, segs, nskb) { skb_mark_not_on_list(segs); - qdisc_skb_cb(segs)->pkt_len = segs->len; - len += segs->len; + seg_len = segs->len; + qdisc_skb_cb(segs)->pkt_len = seg_len; ret = qdisc_enqueue(segs, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); } else { nb++; + len += seg_len; } } sch->q.qlen += nb; - if (nb > 1) + sch->qstats.backlog += len; + if (nb > 0) { qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); - consume_skb(skb); - return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; + consume_skb(skb); + return NET_XMIT_SUCCESS; + } + + kfree_skb(skb); + return NET_XMIT_DROP; } static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, From eedcad2f2a371786f8a32d0046794103dadcedf3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Nov 2024 14:59:11 +0000 Subject: [PATCH 122/366] selinux: use sk_to_full_sk() in selinux_ip_output() In blamed commit, TCP started to attach timewait sockets to some skbs. syzbot reported that selinux_ip_output() was not expecting them yet. Note that using sk_to_full_sk() is still allowing the following sk_listener() check to work as before. BUG: KASAN: slab-out-of-bounds in selinux_sock security/selinux/include/objsec.h:207 [inline] BUG: KASAN: slab-out-of-bounds in selinux_ip_output+0x1e0/0x1f0 security/selinux/hooks.c:5761 Read of size 8 at addr ffff88804e86e758 by task syz-executor347/5894 CPU: 0 UID: 0 PID: 5894 Comm: syz-executor347 Not tainted 6.12.0-syzkaller-05480-gfcc79e1714e8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/30/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc3/0x620 mm/kasan/report.c:488 kasan_report+0xd9/0x110 mm/kasan/report.c:601 selinux_sock security/selinux/include/objsec.h:207 [inline] selinux_ip_output+0x1e0/0x1f0 security/selinux/hooks.c:5761 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xbb/0x200 net/netfilter/core.c:626 nf_hook+0x386/0x6d0 include/linux/netfilter.h:269 __ip_local_out+0x339/0x640 net/ipv4/ip_output.c:119 ip_local_out net/ipv4/ip_output.c:128 [inline] ip_send_skb net/ipv4/ip_output.c:1505 [inline] ip_push_pending_frames+0xa0/0x5b0 net/ipv4/ip_output.c:1525 ip_send_unicast_reply+0xd0e/0x1650 net/ipv4/ip_output.c:1672 tcp_v4_send_ack+0x976/0x13f0 net/ipv4/tcp_ipv4.c:1024 tcp_v4_timewait_ack net/ipv4/tcp_ipv4.c:1077 [inline] tcp_v4_rcv+0x2f96/0x4390 net/ipv4/tcp_ipv4.c:2428 ip_protocol_deliver_rcu+0xba/0x4c0 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x316/0x570 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ip_local_deliver+0x18e/0x1f0 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:460 [inline] ip_rcv_finish net/ipv4/ip_input.c:447 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ip_rcv+0x2c3/0x5d0 net/ipv4/ip_input.c:567 __netif_receive_skb_one_core+0x199/0x1e0 net/core/dev.c:5672 __netif_receive_skb+0x1d/0x160 net/core/dev.c:5785 process_backlog+0x443/0x15f0 net/core/dev.c:6117 __napi_poll.constprop.0+0xb7/0x550 net/core/dev.c:6877 napi_poll net/core/dev.c:6946 [inline] net_rx_action+0xa94/0x1010 net/core/dev.c:7068 handle_softirqs+0x213/0x8f0 kernel/softirq.c:554 do_softirq kernel/softirq.c:455 [inline] do_softirq+0xb2/0xf0 kernel/softirq.c:442 __local_bh_enable_ip+0x100/0x120 kernel/softirq.c:382 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:919 [inline] __dev_queue_xmit+0x8af/0x43e0 net/core/dev.c:4461 dev_queue_xmit include/linux/netdevice.h:3168 [inline] neigh_hh_output include/net/neighbour.h:523 [inline] neigh_output include/net/neighbour.h:537 [inline] ip_finish_output2+0xc6c/0x2150 net/ipv4/ip_output.c:236 __ip_finish_output net/ipv4/ip_output.c:314 [inline] __ip_finish_output+0x49e/0x950 net/ipv4/ip_output.c:296 ip_finish_output+0x35/0x380 net/ipv4/ip_output.c:324 NF_HOOK_COND include/linux/netfilter.h:303 [inline] ip_output+0x13b/0x2a0 net/ipv4/ip_output.c:434 dst_output include/net/dst.h:450 [inline] ip_local_out+0x33e/0x4a0 net/ipv4/ip_output.c:130 __ip_queue_xmit+0x777/0x1970 net/ipv4/ip_output.c:536 __tcp_transmit_skb+0x2b39/0x3df0 net/ipv4/tcp_output.c:1466 tcp_transmit_skb net/ipv4/tcp_output.c:1484 [inline] tcp_write_xmit+0x12b1/0x8560 net/ipv4/tcp_output.c:2827 __tcp_push_pending_frames+0xaf/0x390 net/ipv4/tcp_output.c:3010 tcp_send_fin+0x154/0xc70 net/ipv4/tcp_output.c:3616 __tcp_close+0x96b/0xff0 net/ipv4/tcp.c:3130 tcp_close+0x28/0x120 net/ipv4/tcp.c:3221 inet_release+0x13c/0x280 net/ipv4/af_inet.c:435 __sock_release net/socket.c:640 [inline] sock_release+0x8e/0x1d0 net/socket.c:668 smc_clcsock_release+0xb7/0xe0 net/smc/smc_close.c:34 __smc_release+0x5c2/0x880 net/smc/af_smc.c:301 smc_release+0x1fc/0x5f0 net/smc/af_smc.c:344 __sock_release+0xb0/0x270 net/socket.c:640 sock_close+0x1c/0x30 net/socket.c:1408 __fput+0x3f8/0xb60 fs/file_table.c:450 __fput_sync+0xa1/0xc0 fs/file_table.c:535 __do_sys_close fs/open.c:1550 [inline] __se_sys_close fs/open.c:1535 [inline] __x64_sys_close+0x86/0x100 fs/open.c:1535 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f6814c9ae10 Code: ff f7 d8 64 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 80 3d b1 e2 07 00 00 74 17 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 48 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89 7c RSP: 002b:00007fffb2389758 EFLAGS: 00000202 ORIG_RAX: 0000000000000003 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f6814c9ae10 RDX: 0000000000000010 RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000000f4240 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000202 R12: 00007fffb23897b0 R13: 00000000000141c3 R14: 00007fffb238977c R15: 00007fffb2389790 Fixes: 79636038d37e ("ipv4: tcp: give socket pointer to control skbs") Reported-by: syzbot+2d9f5f948c31dcb7745e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/6745e1a2.050a0220.1286eb.001c.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Acked-by: Paul Moore Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241126145911.4187198-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- security/selinux/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f5a08f94e0940..366c87a40bd15 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5738,7 +5738,7 @@ static unsigned int selinux_ip_output(void *priv, struct sk_buff *skb, /* we do this in the LOCAL_OUT path and not the POST_ROUTING path * because we want to make sure we apply the necessary labeling * before IPsec is applied so we can leverage AH protection */ - sk = skb->sk; + sk = sk_to_full_sk(skb->sk); if (sk) { struct sk_security_struct *sksec; From 16ed454515a4fdcb8050a399bdcba6961aca089d Mon Sep 17 00:00:00 2001 From: Vyshnav Ajith Date: Fri, 22 Nov 2024 04:18:27 +0530 Subject: [PATCH 123/366] docs: net: bareudp: fix spelling and grammar mistakes The BareUDP documentation had several grammar and spelling mistakes, making it harder to read. This patch fixes those errors to improve clarity and readability for developers. Signed-off-by: Vyshnav Ajith Link: https://patch.msgid.link/20241121224827.12293-1-puthen1977@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/bareudp.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/bareudp.rst b/Documentation/networking/bareudp.rst index b9d04ee6dac14..621cb9575c8f7 100644 --- a/Documentation/networking/bareudp.rst +++ b/Documentation/networking/bareudp.rst @@ -6,16 +6,17 @@ Bare UDP Tunnelling Module Documentation There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. -MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. +MPLSoUDP (https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel module provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. Special Handling ---------------- + The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. -MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). +The MPLS protocol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). This special handling can be enabled only for ethertypes ETH_P_IP & ETH_P_MPLS_UC with a flag called multiproto mode. @@ -52,7 +53,7 @@ be enabled explicitly with the "multiproto" flag. 3) Device Usage The bareudp device could be used along with OVS or flower filter in TC. -The OVS or TC flower layer must set the tunnel information in SKB dst field before -sending packet buffer to the bareudp device for transmission. On reception the -bareudp device extracts and stores the tunnel information in SKB dst field before +The OVS or TC flower layer must set the tunnel information in the SKB dst field before +sending the packet buffer to the bareudp device for transmission. On reception, the +bareUDP device extracts and stores the tunnel information in the SKB dst field before passing the packet buffer to the network stack. From b9653d19e556c6afd035602927a93d100a0d7644 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Nov 2024 14:43:44 +0000 Subject: [PATCH 124/366] net: hsr: avoid potential out-of-bound access in fill_frame_info() syzbot is able to feed a packet with 14 bytes, pretending it is a vlan one. Since fill_frame_info() is relying on skb->mac_len already, extend the check to cover this case. BUG: KMSAN: uninit-value in fill_frame_info net/hsr/hsr_forward.c:709 [inline] BUG: KMSAN: uninit-value in hsr_forward_skb+0x9ee/0x3b10 net/hsr/hsr_forward.c:724 fill_frame_info net/hsr/hsr_forward.c:709 [inline] hsr_forward_skb+0x9ee/0x3b10 net/hsr/hsr_forward.c:724 hsr_dev_xmit+0x2f0/0x350 net/hsr/hsr_device.c:235 __netdev_start_xmit include/linux/netdevice.h:5002 [inline] netdev_start_xmit include/linux/netdevice.h:5011 [inline] xmit_one net/core/dev.c:3590 [inline] dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3606 __dev_queue_xmit+0x366a/0x57d0 net/core/dev.c:4434 dev_queue_xmit include/linux/netdevice.h:3168 [inline] packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3146 [inline] packet_sendmsg+0x91ae/0xa6f0 net/packet/af_packet.c:3178 sock_sendmsg_nosec net/socket.c:711 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:726 __sys_sendto+0x594/0x750 net/socket.c:2197 __do_sys_sendto net/socket.c:2204 [inline] __se_sys_sendto net/socket.c:2200 [inline] __x64_sys_sendto+0x125/0x1d0 net/socket.c:2200 x64_sys_call+0x346a/0x3c30 arch/x86/include/generated/asm/syscalls_64.h:45 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: slab_post_alloc_hook mm/slub.c:4091 [inline] slab_alloc_node mm/slub.c:4134 [inline] kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4186 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:587 __alloc_skb+0x363/0x7b0 net/core/skbuff.c:678 alloc_skb include/linux/skbuff.h:1323 [inline] alloc_skb_with_frags+0xc8/0xd00 net/core/skbuff.c:6612 sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2881 packet_alloc_skb net/packet/af_packet.c:2995 [inline] packet_snd net/packet/af_packet.c:3089 [inline] packet_sendmsg+0x74c6/0xa6f0 net/packet/af_packet.c:3178 sock_sendmsg_nosec net/socket.c:711 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:726 __sys_sendto+0x594/0x750 net/socket.c:2197 __do_sys_sendto net/socket.c:2204 [inline] __se_sys_sendto net/socket.c:2200 [inline] __x64_sys_sendto+0x125/0x1d0 net/socket.c:2200 x64_sys_call+0x346a/0x3c30 arch/x86/include/generated/asm/syscalls_64.h:45 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 48b491a5cc74 ("net: hsr: fix mac_len checks") Reported-by: syzbot+671e2853f9851d039551@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6745dc7f.050a0220.21d33d.0018.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: WingMan Kwok Cc: Murali Karicheri Cc: MD Danish Anwar Cc: Jiri Pirko Cc: George McCollister Link: https://patch.msgid.link/20241126144344.4177332-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/hsr/hsr_forward.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index aa6acebc7c1ef..87bb3a91598ee 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -700,6 +700,8 @@ static int fill_frame_info(struct hsr_frame_info *frame, frame->is_vlan = true; if (frame->is_vlan) { + if (skb->mac_len < offsetofend(struct hsr_vlan_ethhdr, vlanhdr)) + return -EINVAL; vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr; proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto; } From be75cda92a65a13db242117d674cd5584477a168 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 27 Nov 2024 15:58:29 -0700 Subject: [PATCH 125/366] bnxt_en: ethtool: Supply ntuple rss context action Commit 2f4f9fe5bf5f ("bnxt_en: Support adding ntuple rules on RSS contexts") added support for redirecting to an RSS context as an ntuple rule action. However, it forgot to update the ETHTOOL_GRXCLSRULE codepath. This caused `ethtool -n` to always report the action as "Action: Direct to queue 0" which is wrong. Fix by teaching bnxt driver to report the RSS context when applicable. Fixes: 2f4f9fe5bf5f ("bnxt_en: Support adding ntuple rules on RSS contexts") Reviewed-by: Michael Chan Signed-off-by: Daniel Xu Link: https://patch.msgid.link/2e884ae39e08dc5123be7c170a6089cefe6a78f7.1732748253.git.dxu@dxuuu.xyz Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index f1f6bb328a55b..d87681d711067 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1187,10 +1187,14 @@ static int bnxt_grxclsrule(struct bnxt *bp, struct ethtool_rxnfc *cmd) } } - if (fltr->base.flags & BNXT_ACT_DROP) + if (fltr->base.flags & BNXT_ACT_DROP) { fs->ring_cookie = RX_CLS_FLOW_DISC; - else + } else if (fltr->base.flags & BNXT_ACT_RSS_CTX) { + fs->flow_type |= FLOW_RSS; + cmd->rss_context = fltr->base.fw_vnic_id; + } else { fs->ring_cookie = fltr->base.rxq; + } rc = 0; fltr_err: From 7078d43b2374daedf254662053e924c938eb86b3 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 27 Nov 2024 15:58:30 -0700 Subject: [PATCH 126/366] selftests: drv-net: rss_ctx: Add test for ntuple rule Extend the rss_ctx test suite to test that an ntuple action that redirects to an RSS context contains that information in `ethtool -n`. Otherwise the output from ethtool is highly deceiving. This test helps ensure drivers are compliant with the API. Signed-off-by: Daniel Xu Link: https://patch.msgid.link/759870e430b7c93ecaae6e448f30a47284c59637.1732748253.git.dxu@dxuuu.xyz Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_ctx.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 0b49ce7ae6784..ca8a7edff3dda 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -3,7 +3,8 @@ import datetime import random -from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt +import re +from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt, ksft_true from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx, KsftFailEx @@ -96,6 +97,13 @@ def _send_traffic_check(cfg, port, name, params): f"traffic on inactive queues ({name}): " + str(cnts)) +def _ntuple_rule_check(cfg, rule_id, ctx_id): + """Check that ntuple rule references RSS context ID""" + text = ethtool(f"-n {cfg.ifname} rule {rule_id}").stdout + pattern = f"RSS Context (ID: )?{ctx_id}" + ksft_true(re.search(pattern, text), "RSS context not referenced in ntuple rule") + + def test_rss_key_indir(cfg): """Test basics like updating the main RSS key and indirection table.""" @@ -459,6 +467,8 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ntuple = ethtool_create(cfg, "-N", flow) defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + _ntuple_rule_check(cfg, ntuple, ctx_id) + for i in range(ctx_cnt): _send_traffic_check(cfg, ports[i], f"context {i}", { 'target': (2+i*2, 3+i*2), From c44daa7e3c73229f7ac74985acb8c7fb909c4e0a Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Wed, 27 Nov 2024 12:08:50 +0800 Subject: [PATCH 127/366] net: Fix icmp host relookup triggering ip_rt_bug arp link failure may trigger ip_rt_bug while xfrm enabled, call trace is: WARNING: CPU: 0 PID: 0 at net/ipv4/route.c:1241 ip_rt_bug+0x14/0x20 Modules linked in: CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.0-rc6-00077-g2e1b3cc9d7f7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:ip_rt_bug+0x14/0x20 Call Trace: ip_send_skb+0x14/0x40 __icmp_send+0x42d/0x6a0 ipv4_link_failure+0xe2/0x1d0 arp_error_report+0x3c/0x50 neigh_invalidate+0x8d/0x100 neigh_timer_handler+0x2e1/0x330 call_timer_fn+0x21/0x120 __run_timer_base.part.0+0x1c9/0x270 run_timer_softirq+0x4c/0x80 handle_softirqs+0xac/0x280 irq_exit_rcu+0x62/0x80 sysvec_apic_timer_interrupt+0x77/0x90 The script below reproduces this scenario: ip xfrm policy add src 0.0.0.0/0 dst 0.0.0.0/0 \ dir out priority 0 ptype main flag localok icmp ip l a veth1 type veth ip a a 192.168.141.111/24 dev veth0 ip l s veth0 up ping 192.168.141.155 -c 1 icmp_route_lookup() create input routes for locally generated packets while xfrm relookup ICMP traffic.Then it will set input route (dst->out = ip_rt_bug) to skb for DESTUNREACH. For ICMP err triggered by locally generated packets, dst->dev of output route is loopback. Generally, xfrm relookup verification is not required on loopback interfaces (net.ipv4.conf.lo.disable_xfrm = 1). Skip icmp relookup for locally generated packets to fix it. Fixes: 8b7817f3a959 ("[IPSEC]: Add ICMP host relookup support") Signed-off-by: Dong Chenchen Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241127040850.1513135-1-dongchenchen2@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4f088fa1c2f2c..963a89ae9c26e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -517,6 +517,9 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, if (!IS_ERR(dst)) { if (rt != rt2) return rt; + if (inet_addr_type_dev_table(net, route_lookup_dev, + fl4->daddr) == RTN_LOCAL) + return rt; } else if (PTR_ERR(dst) == -EPERM) { rt = NULL; } else { From a7de2b873f3dbcda02d504536f1ec6dc50e3f6c4 Mon Sep 17 00:00:00 2001 From: Marie Ramlow Date: Sat, 30 Nov 2024 17:52:40 +0100 Subject: [PATCH 128/366] ALSA: usb-audio: add mixer mapping for Corsair HS80 The Corsair HS80 RGB Wireless is a USB headset with a mic and a sidetone feature. It has the same quirk as the Virtuoso series. This labels the mixers appropriately, so applications don't move the sidetone volume when they actually intend to move the main headset volume. Signed-off-by: Marie Ramlow cc: Link: https://patch.msgid.link/20241130165240.17838-1-me@nycode.dev Signed-off-by: Takashi Iwai --- sound/usb/mixer_maps.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 23260aa1919d3..0e9b5431a47f2 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -621,6 +621,16 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x1b1c, 0x0a42), .map = corsair_virtuoso_map, }, + { + /* Corsair HS80 RGB Wireless (wired mode) */ + .id = USB_ID(0x1b1c, 0x0a6a), + .map = corsair_virtuoso_map, + }, + { + /* Corsair HS80 RGB Wireless (wireless mode) */ + .id = USB_ID(0x1b1c, 0x0a6b), + .map = corsair_virtuoso_map, + }, { /* Gigabyte TRX40 Aorus Master (rear panel + front mic) */ .id = USB_ID(0x0414, 0xa001), .map = aorus_master_alc1220vb_map, From 3a83f7baf1346aca885cb83cb888e835fef7c472 Mon Sep 17 00:00:00 2001 From: Nazar Bilinskyi Date: Sun, 1 Dec 2024 01:16:31 +0200 Subject: [PATCH 129/366] ALSA: hda/realtek: Enable mute and micmute LED on HP ProBook 430 G8 HP ProBook 430 G8 has a mute and micmute LEDs that can be made to work using quirk ALC236_FIXUP_HP_GPIO_LED. Enable already existing quirk. Signed-off-by: Nazar Bilinskyi Cc: Link: https://patch.msgid.link/20241130231631.8929-1-nbilinskyi@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2bf5c512ebaf3..877c5d20ac774 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10340,6 +10340,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87d3, "HP Laptop 15-gw0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), + SND_PCI_QUIRK(0x103c, 0x87df, "HP ProBook 430 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f1, "HP ProBook 630 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), From a0cd2b265fe3f675c121df848aec2e79ff7c100f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 Nov 2024 13:08:18 +0300 Subject: [PATCH 130/366] ALSA: hda/tas2781: Fix error code tas2781_read_acpi() Return an error code if acpi_get_subsystem_id() fails. Don't return success. Fixes: 4e7035a75da9 ("ALSA: hda/tas2781: Add speaker id check for ASUS projects") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/ef773f8a-a61d-478b-9e81-41a38a75c77b@stanley.mountain Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_hda_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 45cfb5a6f309c..8ec03bda85f32 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -143,6 +143,7 @@ static int tas2781_read_acpi(struct tasdevice_priv *p, const char *hid) sub = acpi_get_subsystem_id(ACPI_HANDLE(physdev)); if (IS_ERR(sub)) { dev_err(p->dev, "Failed to get SUBSYS ID.\n"); + ret = PTR_ERR(sub); goto err; } /* Speaker id was needed for ASUS projects. */ From 9cdc6423acb49055efb444ecd895d853a70ef931 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Fri, 29 Nov 2024 11:13:47 +0200 Subject: [PATCH 131/366] memblock: allow zero threshold in validate_numa_converage() Currently memblock validate_numa_converage() returns false negative when threshold set to zero. Make the check if the memory size with invalid node ID is greater than the threshold exclusive to fix that. Link: https://lore.kernel.org/all/Z0mIDBD4KLyxyOCm@kernel.org/ Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index 0389ce5cd281e..095c18b5c430d 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -735,7 +735,7 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) /** * memblock_validate_numa_coverage - check if amount of memory with * no node ID assigned is less than a threshold - * @threshold_bytes: maximal number of pages that can have unassigned node + * @threshold_bytes: maximal memory size that can have unassigned node * ID (in bytes). * * A buggy firmware may report memory that does not belong to any node. @@ -755,7 +755,7 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt nr_pages += end_pfn - start_pfn; } - if ((nr_pages << PAGE_SHIFT) >= threshold_bytes) { + if ((nr_pages << PAGE_SHIFT) > threshold_bytes) { mem_size_mb = memblock_phys_mem_size() >> 20; pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); From 180bbad698641873120a48857bb3b9f3166bf684 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 1 Dec 2024 09:27:02 +0000 Subject: [PATCH 132/366] arch_numa: Restore nid checks before registering a memblock with a node Commit 767507654c22 ("arch_numa: switch over to numa_memblks") significantly cleaned up the NUMA registration code, but also dropped a significant check that was refusing to accept to configure a memblock with an invalid nid. On "quality hardware" such as my ThunderX machine, this results in a kernel that dies immediately: [ 0.000000] Booting Linux on physical CPU 0x0000000000 [0x431f0a10] [ 0.000000] Linux version 6.12.0-00013-g8920d74cf8db (maz@valley-girl) (gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40) #3872 SMP PREEMPT Wed Nov 27 15:25:49 GMT 2024 [ 0.000000] KASLR disabled due to lack of seed [ 0.000000] Machine model: Cavium ThunderX CN88XX board [ 0.000000] efi: EFI v2.4 by American Megatrends [ 0.000000] efi: ESRT=0xffce0ff18 SMBIOS 3.0=0xfffb0000 ACPI 2.0=0xffec60000 MEMRESERVE=0xffc905d98 [ 0.000000] esrt: Reserving ESRT space from 0x0000000ffce0ff18 to 0x0000000ffce0ff50. [ 0.000000] earlycon: pl11 at MMIO 0x000087e024000000 (options '115200n8') [ 0.000000] printk: legacy bootconsole [pl11] enabled [ 0.000000] NODE_DATA(0) allocated [mem 0xff6754580-0xff67566bf] [ 0.000000] Unable to handle kernel paging request at virtual address 0000000000001d40 [ 0.000000] Mem abort info: [ 0.000000] ESR = 0x0000000096000004 [ 0.000000] EC = 0x25: DABT (current EL), IL = 32 bits [ 0.000000] SET = 0, FnV = 0 [ 0.000000] EA = 0, S1PTW = 0 [ 0.000000] FSC = 0x04: level 0 translation fault [ 0.000000] Data abort info: [ 0.000000] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 0.000000] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 0.000000] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 0.000000] [0000000000001d40] user address but active_mm is swapper [ 0.000000] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.12.0-00013-g8920d74cf8db #3872 [ 0.000000] Hardware name: Cavium ThunderX CN88XX board (DT) [ 0.000000] pstate: a00000c5 (NzCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 0.000000] pc : sparse_init_nid+0x54/0x428 [ 0.000000] lr : sparse_init+0x118/0x240 [ 0.000000] sp : ffff800081da3cb0 [ 0.000000] x29: ffff800081da3cb0 x28: 0000000fedbab10c x27: 0000000000000001 [ 0.000000] x26: 0000000ffee250f8 x25: 0000000000000001 x24: ffff800082102cd0 [ 0.000000] x23: 0000000000000001 x22: 0000000000000000 x21: 00000000001fffff [ 0.000000] x20: 0000000000000001 x19: 0000000000000000 x18: ffffffffffffffff [ 0.000000] x17: 0000000001b00000 x16: 0000000ffd130000 x15: 0000000000000000 [ 0.000000] x14: 00000000003e0000 x13: 00000000000001c8 x12: 0000000000000014 [ 0.000000] x11: ffff800081e82860 x10: ffff8000820fb2c8 x9 : ffff8000820fb490 [ 0.000000] x8 : 0000000000ffed20 x7 : 0000000000000014 x6 : 00000000001fffff [ 0.000000] x5 : 00000000ffffffff x4 : 0000000000000000 x3 : 0000000000000000 [ 0.000000] x2 : 0000000000000000 x1 : 0000000000000040 x0 : 0000000000000007 [ 0.000000] Call trace: [ 0.000000] sparse_init_nid+0x54/0x428 [ 0.000000] sparse_init+0x118/0x240 [ 0.000000] bootmem_init+0x70/0x1c8 [ 0.000000] setup_arch+0x184/0x270 [ 0.000000] start_kernel+0x74/0x670 [ 0.000000] __primary_switched+0x80/0x90 [ 0.000000] Code: f865d804 d37df060 cb030000 d2800003 (b95d4084) [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! [ 0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]--- while previous kernel versions were able to recognise how brain-damaged the machine is, and only build a fake node. Use the memblock_validate_numa_coverage() helper to restore some sanity and a "working" system. Fixes: 767507654c22 ("arch_numa: switch over to numa_memblks") Suggested-by: Mike Rapoport Signed-off-by: Marc Zyngier Cc: Catalin Marinas Cc: Will Deacon Cc: Zi Yan Cc: Dan Williams Cc: David Hildenbrand Cc: Andrew Morton Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20241201092702.3792845-1-maz@kernel.org Signed-off-by: Mike Rapoport (Microsoft) --- drivers/base/arch_numa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index e187016764265..c99f2ab105e5b 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -208,6 +208,10 @@ static int __init numa_register_nodes(void) { int nid; + /* Check the validity of the memblock/node mapping */ + if (!memblock_validate_numa_coverage(0)) + return -EINVAL; + /* Finally register nodes. */ for_each_node_mask(nid, numa_nodes_parsed) { unsigned long start_pfn, end_pfn; From a747e02430dfb3657141f99aa6b09331283fa493 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Nov 2024 19:28:27 +0000 Subject: [PATCH 133/366] ipv6: avoid possible NULL deref in modify_prefix_route() syzbot found a NULL deref [1] in modify_prefix_route(), caused by one fib6_info without a fib6_table pointer set. This can happen for net->ipv6.fib6_null_entry [1] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] CPU: 1 UID: 0 PID: 5837 Comm: syz-executor888 Not tainted 6.12.0-syzkaller-09567-g7eef7e306d3c #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:__lock_acquire+0xe4/0x3c40 kernel/locking/lockdep.c:5089 Code: 08 84 d2 0f 85 15 14 00 00 44 8b 0d ca 98 f5 0e 45 85 c9 0f 84 b4 0e 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 e2 48 c1 ea 03 <80> 3c 02 00 0f 85 96 2c 00 00 49 8b 04 24 48 3d a0 07 7f 93 0f 84 RSP: 0018:ffffc900035d7268 EFLAGS: 00010006 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000006 RSI: 1ffff920006bae5f RDI: 0000000000000030 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001 R10: ffffffff90608e17 R11: 0000000000000001 R12: 0000000000000030 R13: ffff888036334880 R14: 0000000000000000 R15: 0000000000000000 FS: 0000555579e90380(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffc59cc4278 CR3: 0000000072b54000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: lock_acquire.part.0+0x11b/0x380 kernel/locking/lockdep.c:5849 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:126 [inline] _raw_spin_lock_bh+0x33/0x40 kernel/locking/spinlock.c:178 spin_lock_bh include/linux/spinlock.h:356 [inline] modify_prefix_route+0x30b/0x8b0 net/ipv6/addrconf.c:4831 inet6_addr_modify net/ipv6/addrconf.c:4923 [inline] inet6_rtm_newaddr+0x12c7/0x1ab0 net/ipv6/addrconf.c:5055 rtnetlink_rcv_msg+0x3c7/0xea0 net/core/rtnetlink.c:6920 netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2541 netlink_unicast_kernel net/netlink/af_netlink.c:1321 [inline] netlink_unicast+0x53c/0x7f0 net/netlink/af_netlink.c:1347 netlink_sendmsg+0x8b8/0xd70 net/netlink/af_netlink.c:1891 sock_sendmsg_nosec net/socket.c:711 [inline] __sock_sendmsg net/socket.c:726 [inline] ____sys_sendmsg+0xaaf/0xc90 net/socket.c:2583 ___sys_sendmsg+0x135/0x1e0 net/socket.c:2637 __sys_sendmsg+0x16e/0x220 net/socket.c:2669 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fd1dcef8b79 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffc59cc4378 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fd1dcef8b79 RDX: 0000000000040040 RSI: 0000000020000140 RDI: 0000000000000004 RBP: 00000000000113fd R08: 0000000000000006 R09: 0000000000000006 R10: 0000000000000006 R11: 0000000000000246 R12: 00007ffc59cc438c R13: 431bde82d7b634db R14: 0000000000000001 R15: 0000000000000001 Fixes: 5eb902b8e719 ("net/ipv6: Remove expired routes with a separated list of routes.") Reported-by: syzbot+1de74b0794c40c8eb300@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67461f7f.050a0220.1286eb.0021.GAE@google.com/T/#u Signed-off-by: Eric Dumazet CC: Kui-Feng Lee Cc: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c489a1e6aec9a..0e765466d7f79 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4821,7 +4821,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, ifm->ifa_prefixlen, extack); } -static int modify_prefix_route(struct inet6_ifaddr *ifp, +static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp, unsigned long expires, u32 flags, bool modify_peer) { @@ -4845,7 +4845,9 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, ifp->prefix_len, ifp->rt_priority, ifp->idev->dev, expires, flags, GFP_KERNEL); - } else { + return 0; + } + if (f6i != net->ipv6.fib6_null_entry) { table = f6i->fib6_table; spin_lock_bh(&table->tb6_lock); @@ -4858,9 +4860,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, } spin_unlock_bh(&table->tb6_lock); - - fib6_info_release(f6i); } + fib6_info_release(f6i); return 0; } @@ -4939,7 +4940,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, int rc = -ENOENT; if (had_prefixroute) - rc = modify_prefix_route(ifp, expires, flags, false); + rc = modify_prefix_route(net, ifp, expires, flags, false); /* prefix route could have been deleted; if so restore it */ if (rc == -ENOENT) { @@ -4949,7 +4950,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, } if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr)) - rc = modify_prefix_route(ifp, expires, flags, true); + rc = modify_prefix_route(net, ifp, expires, flags, true); if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) { addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len, From e18655cf35a5958fbf4ae9ca3ebf28871a3a1801 Mon Sep 17 00:00:00 2001 From: Brahmajit Das Date: Tue, 26 Nov 2024 11:41:35 +0530 Subject: [PATCH 134/366] smb: server: Fix building with GCC 15 GCC 15 introduces -Werror=unterminated-string-initialization by default, this results in the following build error fs/smb/server/smb_common.c:21:35: error: initializer-string for array of 'char' is too long [-Werror=unterminated-string-ini tialization] 21 | static const char basechars[43] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_-!@#$%"; | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors To this we are replacing char basechars[43] with a character pointer and then using strlen to get the length. Signed-off-by: Brahmajit Das Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index 4e6f169fcf83b..f51cd0816b1a9 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -18,8 +18,8 @@ #include "mgmt/share_config.h" /*for shortname implementation */ -static const char basechars[43] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_-!@#$%"; -#define MANGLE_BASE (sizeof(basechars) / sizeof(char) - 1) +static const char *basechars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_-!@#$%"; +#define MANGLE_BASE (strlen(basechars) - 1) #define MAGIC_CHAR '~' #define PERIOD '.' #define mangle(V) ((char)(basechars[(V) % MANGLE_BASE])) From fc342cf86e2dc4d2edb0fc2ff5e28b6c7845adb9 Mon Sep 17 00:00:00 2001 From: Jordy Zomer Date: Thu, 28 Nov 2024 09:32:45 +0900 Subject: [PATCH 135/366] ksmbd: fix Out-of-Bounds Read in ksmbd_vfs_stream_read An offset from client could be a negative value, It could lead to an out-of-bounds read from the stream_buf. Note that this issue is coming when setting 'vfs objects = streams_xattr parameter' in ksmbd.conf. Cc: stable@vger.kernel.org # v5.15+ Reported-by: Jordy Zomer Signed-off-by: Jordy Zomer Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 416f7df4edef5..7b6a3952f228e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6663,6 +6663,10 @@ int smb2_read(struct ksmbd_work *work) } offset = le64_to_cpu(req->Offset); + if (offset < 0) { + err = -EINVAL; + goto out; + } length = le32_to_cpu(req->Length); mincount = le32_to_cpu(req->MinimumCount); From 313dab082289e460391c82d855430ec8a28ddf81 Mon Sep 17 00:00:00 2001 From: Jordy Zomer Date: Thu, 28 Nov 2024 09:33:25 +0900 Subject: [PATCH 136/366] ksmbd: fix Out-of-Bounds Write in ksmbd_vfs_stream_write An offset from client could be a negative value, It could allows to write data outside the bounds of the allocated buffer. Note that this issue is coming when setting 'vfs objects = streams_xattr parameter' in ksmbd.conf. Cc: stable@vger.kernel.org # v5.15+ Reported-by: Jordy Zomer Signed-off-by: Jordy Zomer Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 7b6a3952f228e..23879555880fc 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6882,6 +6882,8 @@ int smb2_write(struct ksmbd_work *work) } offset = le64_to_cpu(req->Offset); + if (offset < 0) + return -EINVAL; length = le32_to_cpu(req->Length); if (req->Channel == SMB2_CHANNEL_RDMA_V1 || From 04c319e05d0b08cc789db7abccce0fcb13dbab16 Mon Sep 17 00:00:00 2001 From: Marek Maslanka Date: Thu, 28 Nov 2024 20:52:09 +0000 Subject: [PATCH 137/366] ASoC: Intel: avs: da7219: Remove suspend_pre() and resume_post() The presence of a plugged jack is not detected after resuming the device if the jack was plugged before the device was suspended. This problem is caused by calling the sound/soc/codecs/da7219-aad.c:da7219_aad_jack_det() function on resume, which forces the jack insertion state to be unplugged. Signed-off-by: Marek Maslanka Link: https://patch.msgid.link/20241128205215.2435485-1-mmaslanka@google.com Reviewed-by: Cezary Rojewski Signed-off-by: Mark Brown --- sound/soc/intel/avs/boards/da7219.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/sound/soc/intel/avs/boards/da7219.c b/sound/soc/intel/avs/boards/da7219.c index 93eba4fd27710..76078a7005b07 100644 --- a/sound/soc/intel/avs/boards/da7219.c +++ b/sound/soc/intel/avs/boards/da7219.c @@ -209,21 +209,6 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in return 0; } -static int avs_card_suspend_pre(struct snd_soc_card *card) -{ - struct snd_soc_dai *codec_dai = snd_soc_card_get_codec_dai(card, DA7219_DAI_NAME); - - return snd_soc_component_set_jack(codec_dai->component, NULL, NULL); -} - -static int avs_card_resume_post(struct snd_soc_card *card) -{ - struct snd_soc_dai *codec_dai = snd_soc_card_get_codec_dai(card, DA7219_DAI_NAME); - struct snd_soc_jack *jack = snd_soc_card_get_drvdata(card); - - return snd_soc_component_set_jack(codec_dai->component, jack, NULL); -} - static int avs_da7219_probe(struct platform_device *pdev) { struct snd_soc_dai_link *dai_link; @@ -255,8 +240,6 @@ static int avs_da7219_probe(struct platform_device *pdev) card->name = "avs_da7219"; card->dev = dev; card->owner = THIS_MODULE; - card->suspend_pre = avs_card_suspend_pre; - card->resume_post = avs_card_resume_post; card->dai_link = dai_link; card->num_links = 1; card->controls = card_controls; From 3f1aa0c533d9dd8a835caf9a6824449c463ee7e2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 27 Nov 2024 13:35:06 +0000 Subject: [PATCH 138/366] regmap: Use correct format specifier for logging range errors The register addresses are unsigned ints so we should use %u not %d to log them. Signed-off-by: Mark Brown Link: https://patch.msgid.link/20241127-regmap-test-high-addr-v1-1-74a48a9e0dc5@kernel.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 53131a7ede0a6..7ac4dcd15f242 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1052,13 +1052,13 @@ struct regmap *__regmap_init(struct device *dev, /* Sanity check */ if (range_cfg->range_max < range_cfg->range_min) { - dev_err(map->dev, "Invalid range %d: %d < %d\n", i, + dev_err(map->dev, "Invalid range %d: %u < %u\n", i, range_cfg->range_max, range_cfg->range_min); goto err_range; } if (range_cfg->range_max > map->max_register) { - dev_err(map->dev, "Invalid range %d: %d > %d\n", i, + dev_err(map->dev, "Invalid range %d: %u > %u\n", i, range_cfg->range_max, map->max_register); goto err_range; } From 3061e170381af96d1e66799d34264e6414d428a7 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 28 Nov 2024 15:16:23 +0200 Subject: [PATCH 139/366] regmap: detach regmap from dev on regmap_exit At the end of __regmap_init(), if dev is not NULL, regmap_attach_dev() is called, which adds a devres reference to the regmap, to be able to retrieve a dev's regmap by name using dev_get_regmap(). When calling regmap_exit, the opposite does not happen, and the reference is kept until the dev is detached. Add a regmap_detach_dev() function and call it in regmap_exit() to make sure that the devres reference is not kept. Cc: stable@vger.kernel.org Fixes: 72b39f6f2b5a ("regmap: Implement dev_get_regmap()") Signed-off-by: Cosmin Tanislav Rule: add Link: https://lore.kernel.org/stable/20241128130554.362486-1-demonsingur%40gmail.com Link: https://patch.msgid.link/20241128131625.363835-1-demonsingur@gmail.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 7ac4dcd15f242..5962ea1230a17 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -598,6 +598,17 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, } EXPORT_SYMBOL_GPL(regmap_attach_dev); +static int dev_get_regmap_match(struct device *dev, void *res, void *data); + +static int regmap_detach_dev(struct device *dev, struct regmap *map) +{ + if (!dev) + return 0; + + return devres_release(dev, dev_get_regmap_release, + dev_get_regmap_match, (void *)map->name); +} + static enum regmap_endian regmap_get_reg_endian(const struct regmap_bus *bus, const struct regmap_config *config) { @@ -1445,6 +1456,7 @@ void regmap_exit(struct regmap *map) { struct regmap_async *async; + regmap_detach_dev(map->dev, map); regcache_exit(map); regmap_debugfs_exit(map); From 984836621aad98802d92c4a3047114cf518074c8 Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Thu, 28 Nov 2024 16:38:17 +0800 Subject: [PATCH 140/366] spi: mpc52xx: Add cancel_work_sync before module remove If we remove the module which will call mpc52xx_spi_remove it will free 'ms' through spi_unregister_controller. while the work ms->work will be used. The sequence of operations that may lead to a UAF bug. Fix it by ensuring that the work is canceled before proceeding with the cleanup in mpc52xx_spi_remove. Fixes: ca632f556697 ("spi: reorganize drivers") Signed-off-by: Pei Xiao Link: https://patch.msgid.link/1f16f8ae0e50ca9adb1dc849bf2ac65a40c9ceb9.1732783000.git.xiaopei01@kylinos.cn Signed-off-by: Mark Brown --- drivers/spi/spi-mpc52xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-mpc52xx.c b/drivers/spi/spi-mpc52xx.c index 036bfb7bf1890..6d4dde15ac545 100644 --- a/drivers/spi/spi-mpc52xx.c +++ b/drivers/spi/spi-mpc52xx.c @@ -520,6 +520,7 @@ static void mpc52xx_spi_remove(struct platform_device *op) struct mpc52xx_spi *ms = spi_controller_get_devdata(host); int i; + cancel_work_sync(&ms->work); free_irq(ms->irq0, ms); free_irq(ms->irq1, ms); From 1b299bd0c22887543b276bcc5b4ed26f5bd83ae4 Mon Sep 17 00:00:00 2001 From: Sasha Finkelstein Date: Wed, 27 Nov 2024 23:18:29 +0100 Subject: [PATCH 141/366] spi: apple: Set use_gpio_descriptors to true There is at least one peripheral that is attached to this controller and can not use native CS. Make it possible to use a GPIO instead. Signed-off-by: Sasha Finkelstein Reviewed-by: Janne Grunau Link: https://patch.msgid.link/20241127-gpio-descs-v1-1-c586b518a7d5@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-apple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-apple.c b/drivers/spi/spi-apple.c index d4b126c8701a6..6273352a2b286 100644 --- a/drivers/spi/spi-apple.c +++ b/drivers/spi/spi-apple.c @@ -493,6 +493,7 @@ static int apple_spi_probe(struct platform_device *pdev) ctlr->prepare_message = apple_spi_prepare_message; ctlr->set_cs = apple_spi_set_cs; ctlr->transfer_one = apple_spi_transfer_one; + ctlr->use_gpio_descriptors = true; ctlr->auto_runtime_pm = true; pm_runtime_set_active(&pdev->dev); From 55dc2f8f263448f1e6c7ef135d08e640d5a4826e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 2 Dec 2024 16:42:07 +0800 Subject: [PATCH 142/366] LoongArch: Fix reserving screen info memory for above-4G firmware Since screen_info.lfb_base is a __u32 type, an above-4G address need an ext_lfb_base to present its higher 32bits. In init_screen_info() we can use __screen_info_lfb_base() to handle this case for reserving screen info memory. Signed-off-by: Xuefeng Zhao Signed-off-by: Jianmin Lv Signed-off-by: Tianyang Zhang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 2bf86aeda874c..de21e72759eeb 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -95,7 +95,7 @@ static void __init init_screen_info(void) memset(si, 0, sizeof(*si)); early_memunmap(si, sizeof(*si)); - memblock_reserve(screen_info.lfb_base, screen_info.lfb_size); + memblock_reserve(__screen_info_lfb_base(&screen_info), screen_info.lfb_size); } void __init efi_init(void) From ad2a05a6d287aef7e069c06e329f1355756415c2 Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Mon, 2 Dec 2024 16:42:08 +0800 Subject: [PATCH 143/366] LoongArch/irq: Use seq_put_decimal_ull_width() for decimal values Performance improvement for reading /proc/interrupts on LoongArch. On a system with n CPUs and m interrupts, there will be n*m decimal values yielded via seq_printf(.."%10u "..) which is less efficient than seq_put_decimal_ull_width(), stress reading /proc/interrupts indicates ~30% performance improvement with this patch (and its friends). Signed-off-by: David Wang <00107082@163.com> Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 5d59e9ce2772d..fbf747447f13f 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -82,7 +82,7 @@ void show_ipi_list(struct seq_file *p, int prec) for (i = 0; i < NR_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).ipi_irqs[i]); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, cpu).ipi_irqs[i], 10); seq_printf(p, " LoongArch %d %s\n", i + 1, ipi_types[i]); } } From 7cd1f5f77925ae905a57296932f0f9ef0dc364f8 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 2 Dec 2024 16:42:08 +0800 Subject: [PATCH 144/366] LoongArch: Add architecture specific huge_pte_clear() When executing mm selftests run_vmtests.sh, there is such an error: BUG: Bad page state in process uffd-unit-tests pfn:00000 page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x0 flags: 0xffff0000002000(reserved|node=0|zone=0|lastcpupid=0xffff) raw: 00ffff0000002000 ffffbf0000000008 ffffbf0000000008 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set Modules linked in: snd_seq_dummy snd_seq snd_seq_device rfkill vfat fat virtio_balloon efi_pstore virtio_net pstore net_failover failover fuse nfnetlink virtio_scsi virtio_gpu virtio_dma_buf dm_multipath efivarfs CPU: 2 UID: 0 PID: 1913 Comm: uffd-unit-tests Not tainted 6.12.0 #184 Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 Stack : 900000047c8ac000 0000000000000000 9000000000223a7c 900000047c8ac000 900000047c8af690 900000047c8af698 0000000000000000 900000047c8af7d8 900000047c8af7d0 900000047c8af7d0 900000047c8af5b0 0000000000000001 0000000000000001 900000047c8af698 10b3c7d53da40d26 0000010000000000 0000000000000022 0000000fffffffff fffffffffe000000 ffff800000000000 000000000000002f 0000800000000000 000000017a6d4000 90000000028f8940 0000000000000000 0000000000000000 90000000025aa5e0 9000000002905000 0000000000000000 90000000028f8940 ffff800000000000 0000000000000000 0000000000000000 0000000000000000 9000000000223a94 000000012001839c 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d ... Call Trace: [<9000000000223a94>] show_stack+0x5c/0x180 [<9000000001c3fd64>] dump_stack_lvl+0x6c/0xa0 [<900000000056aa08>] bad_page+0x1a0/0x1f0 [<9000000000574978>] free_unref_folios+0xbf0/0xd20 [<90000000004e65cc>] folios_put_refs+0x1a4/0x2b8 [<9000000000599a0c>] free_pages_and_swap_cache+0x164/0x260 [<9000000000547698>] tlb_batch_pages_flush+0xa8/0x1c0 [<9000000000547f30>] tlb_finish_mmu+0xa8/0x218 [<9000000000543cb8>] exit_mmap+0x1a0/0x360 [<9000000000247658>] __mmput+0x78/0x200 [<900000000025583c>] do_exit+0x43c/0xde8 [<9000000000256490>] do_group_exit+0x68/0x110 [<9000000000256554>] sys_exit_group+0x1c/0x20 [<9000000001c413b4>] do_syscall+0x94/0x130 [<90000000002216d8>] handle_syscall+0xb8/0x158 Disabling lock debugging due to kernel taint BUG: non-zero pgtables_bytes on freeing mm: -16384 On LoongArch system, invalid huge pte entry should be invalid_pte_table or a single _PAGE_HUGE bit rather than a zero value. And it should be the same with invalid pmd entry, since pmd_none() is called by function free_pgd_range() and pmd_none() return 0 by huge_pte_clear(). So single _PAGE_HUGE bit is also treated as a valid pte table and free_pte_range() will be called in free_pmd_range(). free_pmd_range() pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) continue; free_pte_range(tlb, pmd, addr); } while (pmd++, addr = next, addr != end); Here invalid_pte_table is used for both invalid huge pte entry and pmd entry. Cc: stable@vger.kernel.org Fixes: 09cfefb7fa70 ("LoongArch: Add memory management") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/hugetlb.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index b837c65a4894e..c8e4057734d0d 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -24,6 +24,16 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } +#define __HAVE_ARCH_HUGE_PTE_CLEAR +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) +{ + pte_t clear; + + pte_val(clear) = (unsigned long)invalid_pte_table; + set_pte_at(mm, addr, ptep, clear); +} + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) From c1474bb0b7cff4e8481095bd0618b8f6c2f0aeb4 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 2 Dec 2024 16:42:08 +0800 Subject: [PATCH 145/366] LoongArch: BPF: Adjust the parameter of emit_jirl() The branch instructions beq, bne, blt, bge, bltu, bgeu and jirl belong to the format reg2i16, but the sequence of oprand is different for the instruction jirl. So adjust the parameter order of emit_jirl() to make it more readable correspond with the Instruction Set Architecture manual. Here are the instruction formats: beq rj, rd, offs16 bne rj, rd, offs16 blt rj, rd, offs16 bge rj, rd, offs16 bltu rj, rd, offs16 bgeu rj, rd, offs16 jirl rd, rj, offs16 Link: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#branch-instructions Suggested-by: Huacai Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/inst.h | 12 +++++++++++- arch/loongarch/kernel/inst.c | 2 +- arch/loongarch/net/bpf_jit.c | 6 +++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 944482063f14e..3089785ca97e7 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -683,7 +683,17 @@ DEF_EMIT_REG2I16_FORMAT(blt, blt_op) DEF_EMIT_REG2I16_FORMAT(bge, bge_op) DEF_EMIT_REG2I16_FORMAT(bltu, bltu_op) DEF_EMIT_REG2I16_FORMAT(bgeu, bgeu_op) -DEF_EMIT_REG2I16_FORMAT(jirl, jirl_op) + +static inline void emit_jirl(union loongarch_instruction *insn, + enum loongarch_gpr rd, + enum loongarch_gpr rj, + int offset) +{ + insn->reg2i16_format.opcode = jirl_op; + insn->reg2i16_format.immediate = offset; + insn->reg2i16_format.rd = rd; + insn->reg2i16_format.rj = rj; +} #define DEF_EMIT_REG2BSTRD_FORMAT(NAME, OP) \ static inline void emit_##NAME(union loongarch_instruction *insn, \ diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 3050329556d11..14d7d700bcb98 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -332,7 +332,7 @@ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) return INSN_BREAK; } - emit_jirl(&insn, rj, rd, imm >> 2); + emit_jirl(&insn, rd, rj, imm >> 2); return insn.word; } diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index dd350cba1252f..ea357a3edc094 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -181,13 +181,13 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) /* Set return value */ emit_insn(ctx, addiw, LOONGARCH_GPR_A0, regmap[BPF_REG_0], 0); /* Return to the caller */ - emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0); + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); } else { /* * Call the next bpf prog and skip the first instruction * of TCC initialization. */ - emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1); + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 1); } } @@ -904,7 +904,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext return ret; move_addr(ctx, t1, func_addr); - emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0); + emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0); move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); break; From 589e6cc7597655bed7b8543b8286925f631f597c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 2 Dec 2024 16:42:10 +0800 Subject: [PATCH 146/366] LoongArch: KVM: Protect kvm_check_requests() with SRCU When we enable lockdep we get such a warning: ============================= WARNING: suspicious RCU usage 6.12.0-rc7+ #1891 Tainted: G W ----------------------------- include/linux/kvm_host.h:1043 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by qemu-system-loo/948: #0: 90000001184a00a8 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0xf4/0xe20 [kvm] stack backtrace: CPU: 0 UID: 0 PID: 948 Comm: qemu-system-loo Tainted: G W 6.12.0-rc7+ #1891 Tainted: [W]=WARN Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.0-prebeta9 10/21/2022 Stack : 0000000000000089 9000000005a0db9c 90000000071519c8 900000012c578000 900000012c57b920 0000000000000000 900000012c57b928 9000000007e53788 900000000815bcc8 900000000815bcc0 900000012c57b790 0000000000000001 0000000000000001 4b031894b9d6b725 0000000004dec000 90000001003299c0 0000000000000414 0000000000000001 000000000000002d 0000000000000003 0000000000000030 00000000000003b4 0000000004dec000 90000001184a0000 900000000806d000 9000000007e53788 00000000000000b4 0000000000000004 0000000000000004 0000000000000000 0000000000000000 9000000107baf600 9000000008916000 9000000007e53788 9000000005924778 0000000010000044 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d ... Call Trace: [<9000000005924778>] show_stack+0x38/0x180 [<90000000071519c4>] dump_stack_lvl+0x94/0xe4 [<90000000059eb754>] lockdep_rcu_suspicious+0x194/0x240 [] kvm_gfn_to_hva_cache_init+0xfc/0x120 [kvm] [] kvm_pre_enter_guest+0x3a4/0x520 [kvm] [] kvm_handle_exit+0x23c/0x480 [kvm] Fix it by protecting kvm_check_requests() with SRCU. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index cab1818be68d8..d18a4a2704150 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -240,7 +240,7 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu) */ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) { - int ret; + int idx, ret; /* * Check conditions before entering the guest @@ -249,7 +249,9 @@ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) if (ret < 0) return ret; + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_check_requests(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; } From a8c695005bfe6569acd73d777ca298ddddd66105 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 5 Nov 2024 12:48:23 +0300 Subject: [PATCH 147/366] can: j1939: j1939_session_new(): fix skb reference counting Since j1939_session_skb_queue() does an extra skb_get() for each new skb, do the same for the initial one in j1939_session_new() to avoid refcount underflow. Reported-by: syzbot+d4e8dc385d9258220c31@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d4e8dc385d9258220c31 Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Dmitry Antipov Tested-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20241105094823.2403806-1-dmantipov@yandex.ru [mkl: clean up commit message] Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 319f47df33300..95f7a7e65a73f 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1505,7 +1505,7 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv, session->state = J1939_SESSION_NEW; skb_queue_head_init(&session->skb_queue); - skb_queue_tail(&session->skb_queue, skb); + skb_queue_tail(&session->skb_queue, skb_get(skb)); skcb = j1939_skb_to_cb(skb); memcpy(&session->skcb, skcb, sizeof(session->skcb)); From e2974a220594c06f536e65dfd7b2447e0e83a1cb Mon Sep 17 00:00:00 2001 From: Sahas Leelodharry Date: Mon, 2 Dec 2024 03:28:33 +0000 Subject: [PATCH 148/366] ALSA: hda/realtek: Add support for Samsung Galaxy Book3 360 (NP730QFG) Fixes the 3.5mm headphone jack on the Samsung Galaxy Book 3 360 NP730QFG laptop. Unlike the other Galaxy Book3 series devices, this device only needs the ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET quirk. Verified changes on the device and compared with codec state in Windows. [ white-space fixes by tiwai ] Signed-off-by: Sahas Leelodharry Cc: Link: https://patch.msgid.link/QB1PR01MB40047D4CC1282DB7F1333124CC352@QB1PR01MB4004.CANPRD01.PROD.OUTLOOK.COM Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 877c5d20ac774..faad84c345eb9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10677,6 +10677,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xca06, "Samsung Galaxy Book3 360 (NP730QFG)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc868, "Samsung Galaxy Book2 Pro (NP930XED)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc870, "Samsung Galaxy Book2 Pro (NP950XED)", ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS), SND_PCI_QUIRK(0x144d, 0xc872, "Samsung Galaxy Book2 Pro (NP950XEE)", ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS), From 49ccf2c3cafb5774a4562d61294afcf492bed487 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 19 Nov 2024 12:09:14 -0800 Subject: [PATCH 149/366] arm64: mte: set VM_MTE_ALLOWED for hugetlbfs at correct place The commit 5de195060b2e ("mm: resolve faulty mmap_region() error path behaviour") moved vm flags validation before fop->mmap for file mappings. But when commit 25c17c4b55de ("hugetlb: arm64: add mte support") was rebased on top of it, the hugetlbfs part was missed. Mmapping hugetlbfs file may not have MAP_HUGETLB set. Fixes: 25c17c4b55de ("hugetlb: arm64: add mte support") Signed-off-by: Yang Shi Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20241119200914.1145249-1-yang@os.amperecomputing.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mman.h | 3 ++- fs/hugetlbfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 1d53022fc7e1f..21df8bbd2668a 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -7,6 +7,7 @@ #ifndef BUILD_VDSO #include #include +#include #include #include @@ -44,7 +45,7 @@ static inline unsigned long arch_calc_vm_flag_bits(struct file *file, if (system_supports_mte()) { if (flags & (MAP_ANONYMOUS | MAP_HUGETLB)) return VM_MTE_ALLOWED; - if (shmem_file(file)) + if (shmem_file(file) || is_file_hugepages(file)) return VM_MTE_ALLOWED; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a4441fb77f7c1..90f883d6b8fd8 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -113,7 +113,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap unwinds (may be important on powerpc * and ia64). */ - vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND | VM_MTE_ALLOWED); + vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND); vma->vm_ops = &hugetlb_vm_ops; ret = seal_check_write(info->seals, vma); From 28866d6e84b8d36a76b2cde221391aed1294e5cd Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 26 Nov 2024 17:14:31 +0530 Subject: [PATCH 150/366] octeontx2-af: Fix SDP MAC link credits configuration Current driver allows only packet size < 512B as SDP_LINK_CREDIT register is set to default value. This patch fixes this issue by configure the register with maximum HW supported value to allow packet size > 512B. Fixes: 2f7f33a09516 ("octeontx2-pf: Add representors for sdp MAC") Signed-off-by: Geetha sowjanya Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/common.h | 1 + drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h index 5d84386ed22da..406c59100a35a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -159,6 +159,7 @@ enum nix_scheduler { #define SDP_HW_MIN_FRS 16 #define CN10K_LMAC_LINK_MAX_FRS 16380 /* 16k - FCS */ #define CN10K_LBK_LINK_MAX_FRS 65535 /* 64k */ +#define SDP_LINK_CREDIT 0x320202 /* NIX RX action operation*/ #define NIX_RX_ACTIONOP_DROP (0x0ull) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 5d5a01dbbca11..a5d1e2bddd58d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4672,6 +4672,9 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, rvu_get_lbk_link_max_frs(rvu, &lbk_max_frs); rvu_get_lmac_link_max_frs(rvu, &lmac_max_frs); + /* Set SDP link credit */ + rvu_write64(rvu, blkaddr, NIX_AF_SDP_LINK_CREDIT, SDP_LINK_CREDIT); + /* Set default min/max packet lengths allowed on NIX Rx links. * * With HW reset minlen value of 60byte, HW will treat ARP pkts From 6d544ea21d367cbd9746ae882e67a839391a6594 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 Nov 2024 13:09:06 +0300 Subject: [PATCH 151/366] ASoC: SOF: ipc3-topology: fix resource leaks in sof_ipc3_widget_setup_comp_dai() These error paths should free comp_dai before returning. Fixes: 909dadf21aae ("ASoC: SOF: topology: Make DAI widget parsing IPC agnostic") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/67d185cf-d139-4f8c-970a-dbf0542246a8@stanley.mountain Signed-off-by: Mark Brown --- sound/soc/sof/ipc3-topology.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index c2fce554a674b..e98b53b67d12b 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -1588,14 +1588,14 @@ static int sof_ipc3_widget_setup_comp_dai(struct snd_sof_widget *swidget) ret = sof_update_ipc_object(scomp, comp_dai, SOF_DAI_TOKENS, swidget->tuples, swidget->num_tuples, sizeof(*comp_dai), 1); if (ret < 0) - goto free; + goto free_comp; /* update comp_tokens */ ret = sof_update_ipc_object(scomp, &comp_dai->config, SOF_COMP_TOKENS, swidget->tuples, swidget->num_tuples, sizeof(comp_dai->config), 1); if (ret < 0) - goto free; + goto free_comp; /* Subtract the base to match the FW dai index. */ if (comp_dai->type == SOF_DAI_INTEL_ALH) { @@ -1603,7 +1603,8 @@ static int sof_ipc3_widget_setup_comp_dai(struct snd_sof_widget *swidget) dev_err(sdev->dev, "Invalid ALH dai index %d, only Pin numbers >= %d can be used\n", comp_dai->dai_index, INTEL_ALH_DAI_INDEX_BASE); - return -EINVAL; + ret = -EINVAL; + goto free_comp; } comp_dai->dai_index -= INTEL_ALH_DAI_INDEX_BASE; } From 7f0fa47ceebcff0e3591bb7e32a71a2cd7846149 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 18 Nov 2024 22:00:49 +0100 Subject: [PATCH 152/366] mmc: sdhci-pci: Add DMI quirk for missing CD GPIO on Vexia Edu Atla 10 tablet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Vexia Edu Atla 10 tablet distributed to schools in the Spanish Andalucía region has no ACPI fwnode associated with the SDHCI controller for its microsd-slot and thus has no ACPI GPIO resource info. This causes the following error to be logged and the slot to not work: [ 10.572113] sdhci-pci 0000:00:12.0: failed to setup card detect gpio Add a DMI quirk table for providing gpiod_lookup_tables with manually provided CD GPIO info and use this DMI table to provide the CD GPIO info on this tablet. This fixes the microsd-slot not working. Signed-off-by: Hans de Goede Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Message-ID: <20241118210049.311079-1-hdegoede@redhat.com> Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 72 +++++++++++++++++++++++++++++++ drivers/mmc/host/sdhci-pci.h | 1 + 2 files changed, 73 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 2b300bc4a701f..1f0bd723f0112 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1236,6 +1237,29 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = { .priv_size = sizeof(struct intel_host), }; +/* DMI quirks for devices with missing or broken CD GPIO info */ +static const struct gpiod_lookup_table vexia_edu_atla10_cd_gpios = { + .dev_id = "0000:00:12.0", + .table = { + GPIO_LOOKUP("INT33FC:00", 38, "cd", GPIO_ACTIVE_HIGH), + { } + }, +}; + +static const struct dmi_system_id sdhci_intel_byt_cd_gpio_override[] = { + { + /* Vexia Edu Atla 10 tablet 9V version */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "08/25/2014"), + }, + .driver_data = (void *)&vexia_edu_atla10_cd_gpios, + }, + { } +}; + static const struct sdhci_pci_fixes sdhci_intel_byt_sd = { #ifdef CONFIG_PM_SLEEP .resume = byt_resume, @@ -1254,6 +1278,7 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sd = { .add_host = byt_add_host, .remove_slot = byt_remove_slot, .ops = &sdhci_intel_byt_ops, + .cd_gpio_override = sdhci_intel_byt_cd_gpio_override, .priv_size = sizeof(struct intel_host), }; @@ -2055,6 +2080,42 @@ static const struct dev_pm_ops sdhci_pci_pm_ops = { * * \*****************************************************************************/ +static struct gpiod_lookup_table *sdhci_pci_add_gpio_lookup_table( + struct sdhci_pci_chip *chip) +{ + struct gpiod_lookup_table *cd_gpio_lookup_table; + const struct dmi_system_id *dmi_id = NULL; + size_t count; + + if (chip->fixes && chip->fixes->cd_gpio_override) + dmi_id = dmi_first_match(chip->fixes->cd_gpio_override); + + if (!dmi_id) + return NULL; + + cd_gpio_lookup_table = dmi_id->driver_data; + for (count = 0; cd_gpio_lookup_table->table[count].key; count++) + ; + + cd_gpio_lookup_table = kmemdup(dmi_id->driver_data, + /* count + 1 terminating entry */ + struct_size(cd_gpio_lookup_table, table, count + 1), + GFP_KERNEL); + if (!cd_gpio_lookup_table) + return ERR_PTR(-ENOMEM); + + gpiod_add_lookup_table(cd_gpio_lookup_table); + return cd_gpio_lookup_table; +} + +static void sdhci_pci_remove_gpio_lookup_table(struct gpiod_lookup_table *lookup_table) +{ + if (lookup_table) { + gpiod_remove_lookup_table(lookup_table); + kfree(lookup_table); + } +} + static struct sdhci_pci_slot *sdhci_pci_probe_slot( struct pci_dev *pdev, struct sdhci_pci_chip *chip, int first_bar, int slotno) @@ -2130,8 +2191,19 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot( device_init_wakeup(&pdev->dev, true); if (slot->cd_idx >= 0) { + struct gpiod_lookup_table *cd_gpio_lookup_table; + + cd_gpio_lookup_table = sdhci_pci_add_gpio_lookup_table(chip); + if (IS_ERR(cd_gpio_lookup_table)) { + ret = PTR_ERR(cd_gpio_lookup_table); + goto remove; + } + ret = mmc_gpiod_request_cd(host->mmc, "cd", slot->cd_idx, slot->cd_override_level, 0); + + sdhci_pci_remove_gpio_lookup_table(cd_gpio_lookup_table); + if (ret && ret != -EPROBE_DEFER) ret = mmc_gpiod_request_cd(host->mmc, NULL, slot->cd_idx, diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index e807c039a8b1e..f38f0bd4165c3 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -157,6 +157,7 @@ struct sdhci_pci_fixes { #endif const struct sdhci_ops *ops; + const struct dmi_system_id *cd_gpio_override; size_t priv_size; }; From 87a0d90fcd31c0f36da0332428c9e1a1e0f97432 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 25 Nov 2024 13:24:46 +0100 Subject: [PATCH 153/366] mmc: core: Further prevent card detect during shutdown Disabling card detect from the host's ->shutdown_pre() callback turned out to not be the complete solution. More precisely, beyond the point when the mmc_bus->shutdown() has been called, to gracefully power off the card, we need to prevent card detect. Otherwise the mmc_rescan work may poll for the card with a CMD13, to see if it's still alive, which then will fail and hang as the card has already been powered off. To fix this problem, let's disable mmc_rescan prior to power off the card during shutdown. Reported-by: Anthony Pighin Fixes: 66c915d09b94 ("mmc: core: Disable card detect during shutdown") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Reviewed-by: Adrian Hunter Closes: https://lore.kernel.org/all/BN0PR08MB695133000AF116F04C3A9FFE83212@BN0PR08MB6951.namprd08.prod.outlook.com/ Tested-by: Anthony Pighin Message-ID: <20241125122446.18684-1-ulf.hansson@linaro.org> --- drivers/mmc/core/bus.c | 2 ++ drivers/mmc/core/core.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 9283b28bc69fe..1cf64e0952fbe 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -149,6 +149,8 @@ static void mmc_bus_shutdown(struct device *dev) if (dev->driver && drv->shutdown) drv->shutdown(card); + __mmc_stop_host(host); + if (host->bus_ops->shutdown) { ret = host->bus_ops->shutdown(host); if (ret) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index a499f3c59de55..d996d39c0d6fa 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2335,6 +2335,9 @@ void mmc_start_host(struct mmc_host *host) void __mmc_stop_host(struct mmc_host *host) { + if (host->rescan_disable) + return; + if (host->slot.cd_irq >= 0) { mmc_gpio_set_cd_wake(host, false); disable_irq(host->slot.cd_irq); From f7d306b47a24367302bd4fe846854e07752ffcd9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 2 Dec 2024 15:57:54 +0300 Subject: [PATCH 154/366] ALSA: usb-audio: Fix a DMA to stack memory bug The usb_get_descriptor() function does DMA so we're not allowed to use a stack buffer for that. Doing DMA to the stack is not portable all architectures. Move the "new_device_descriptor" from being stored on the stack and allocate it with kmalloc() instead. Fixes: b909df18ce2a ("ALSA: usb-audio: Fix potential out-of-bound accesses for Extigy and Mbox devices") Cc: stable@kernel.org Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/60e3aa09-039d-46d2-934c-6f123026c2eb@stanley.mountain Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 8bc959b60be3b..7c9d352864da0 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -555,7 +555,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip, static int snd_usb_extigy_boot_quirk(struct usb_device *dev, struct usb_interface *intf) { struct usb_host_config *config = dev->actconfig; - struct usb_device_descriptor new_device_descriptor; + struct usb_device_descriptor *new_device_descriptor __free(kfree) = NULL; int err; if (le16_to_cpu(get_cfg_desc(config)->wTotalLength) == EXTIGY_FIRMWARE_SIZE_OLD || @@ -566,15 +566,19 @@ static int snd_usb_extigy_boot_quirk(struct usb_device *dev, struct usb_interfac 0x10, 0x43, 0x0001, 0x000a, NULL, 0); if (err < 0) dev_dbg(&dev->dev, "error sending boot message: %d\n", err); + + new_device_descriptor = kmalloc(sizeof(*new_device_descriptor), GFP_KERNEL); + if (!new_device_descriptor) + return -ENOMEM; err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, - &new_device_descriptor, sizeof(new_device_descriptor)); + new_device_descriptor, sizeof(*new_device_descriptor)); if (err < 0) dev_dbg(&dev->dev, "error usb_get_descriptor: %d\n", err); - if (new_device_descriptor.bNumConfigurations > dev->descriptor.bNumConfigurations) + if (new_device_descriptor->bNumConfigurations > dev->descriptor.bNumConfigurations) dev_dbg(&dev->dev, "error too large bNumConfigurations: %d\n", - new_device_descriptor.bNumConfigurations); + new_device_descriptor->bNumConfigurations); else - memcpy(&dev->descriptor, &new_device_descriptor, sizeof(dev->descriptor)); + memcpy(&dev->descriptor, new_device_descriptor, sizeof(dev->descriptor)); err = usb_reset_configuration(dev); if (err < 0) dev_dbg(&dev->dev, "error usb_reset_configuration: %d\n", err); @@ -906,7 +910,7 @@ static void mbox2_setup_48_24_magic(struct usb_device *dev) static int snd_usb_mbox2_boot_quirk(struct usb_device *dev) { struct usb_host_config *config = dev->actconfig; - struct usb_device_descriptor new_device_descriptor; + struct usb_device_descriptor *new_device_descriptor __free(kfree) = NULL; int err; u8 bootresponse[0x12]; int fwsize; @@ -941,15 +945,19 @@ static int snd_usb_mbox2_boot_quirk(struct usb_device *dev) dev_dbg(&dev->dev, "device initialised!\n"); + new_device_descriptor = kmalloc(sizeof(*new_device_descriptor), GFP_KERNEL); + if (!new_device_descriptor) + return -ENOMEM; + err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, - &new_device_descriptor, sizeof(new_device_descriptor)); + new_device_descriptor, sizeof(*new_device_descriptor)); if (err < 0) dev_dbg(&dev->dev, "error usb_get_descriptor: %d\n", err); - if (new_device_descriptor.bNumConfigurations > dev->descriptor.bNumConfigurations) + if (new_device_descriptor->bNumConfigurations > dev->descriptor.bNumConfigurations) dev_dbg(&dev->dev, "error too large bNumConfigurations: %d\n", - new_device_descriptor.bNumConfigurations); + new_device_descriptor->bNumConfigurations); else - memcpy(&dev->descriptor, &new_device_descriptor, sizeof(dev->descriptor)); + memcpy(&dev->descriptor, new_device_descriptor, sizeof(dev->descriptor)); err = usb_reset_configuration(dev); if (err < 0) @@ -1259,7 +1267,7 @@ static void mbox3_setup_defaults(struct usb_device *dev) static int snd_usb_mbox3_boot_quirk(struct usb_device *dev) { struct usb_host_config *config = dev->actconfig; - struct usb_device_descriptor new_device_descriptor; + struct usb_device_descriptor *new_device_descriptor __free(kfree) = NULL; int err; int descriptor_size; @@ -1272,15 +1280,19 @@ static int snd_usb_mbox3_boot_quirk(struct usb_device *dev) dev_dbg(&dev->dev, "MBOX3: device initialised!\n"); + new_device_descriptor = kmalloc(sizeof(*new_device_descriptor), GFP_KERNEL); + if (!new_device_descriptor) + return -ENOMEM; + err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, - &new_device_descriptor, sizeof(new_device_descriptor)); + new_device_descriptor, sizeof(*new_device_descriptor)); if (err < 0) dev_dbg(&dev->dev, "MBOX3: error usb_get_descriptor: %d\n", err); - if (new_device_descriptor.bNumConfigurations > dev->descriptor.bNumConfigurations) + if (new_device_descriptor->bNumConfigurations > dev->descriptor.bNumConfigurations) dev_dbg(&dev->dev, "MBOX3: error too large bNumConfigurations: %d\n", - new_device_descriptor.bNumConfigurations); + new_device_descriptor->bNumConfigurations); else - memcpy(&dev->descriptor, &new_device_descriptor, sizeof(dev->descriptor)); + memcpy(&dev->descriptor, new_device_descriptor, sizeof(dev->descriptor)); err = usb_reset_configuration(dev); if (err < 0) From f09f0397db641f99f6c3e109283d82e3584bfb50 Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Mon, 2 Dec 2024 22:17:15 +0900 Subject: [PATCH 155/366] ALSA: usb-audio: Add extra PID for RME Digiface USB It seems there is an alternate version of the hardware with a different PID. User testing reveals this still works with the same interface as far as the kernel is concerned, so just add the extra PID. Thanks to Heiko Engemann for testing with this version. Due to the way quirks-table.h is structured, that means we have to turn the entire quirk struct into a macro to avoid duplicating it... Cc: stable@vger.kernel.org Signed-off-by: Asahi Lina Link: https://patch.msgid.link/20241202-rme-digiface-usb-id-v1-1-50f730d7a46e@asahilina.net Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 1 + sound/usb/quirks-table.h | 341 ++++++++++++++++++++------------------- sound/usb/quirks.c | 2 + 3 files changed, 176 insertions(+), 168 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 8bbf070b36762..23fcd680167d0 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -4116,6 +4116,7 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) err = snd_bbfpro_controls_create(mixer); break; case USB_ID(0x2a39, 0x3f8c): /* RME Digiface USB */ + case USB_ID(0x2a39, 0x3fa0): /* RME Digiface USB (alternate) */ err = snd_rme_digiface_controls_create(mixer); break; case USB_ID(0x2b73, 0x0017): /* Pioneer DJ DJM-250MK2 */ diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index c49383e64678c..8954be23325c5 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3673,176 +3673,181 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, -{ - /* Only claim interface 0 */ - .match_flags = USB_DEVICE_ID_MATCH_VENDOR | - USB_DEVICE_ID_MATCH_PRODUCT | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_NUMBER, - .idVendor = 0x2a39, - .idProduct = 0x3f8c, - .bInterfaceClass = USB_CLASS_VENDOR_SPEC, - .bInterfaceNumber = 0, - QUIRK_DRIVER_INFO { - QUIRK_DATA_COMPOSITE { +#define QUIRK_RME_DIGIFACE(pid) \ +{ \ + /* Only claim interface 0 */ \ + .match_flags = USB_DEVICE_ID_MATCH_VENDOR | \ + USB_DEVICE_ID_MATCH_PRODUCT | \ + USB_DEVICE_ID_MATCH_INT_CLASS | \ + USB_DEVICE_ID_MATCH_INT_NUMBER, \ + .idVendor = 0x2a39, \ + .idProduct = pid, \ + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \ + .bInterfaceNumber = 0, \ + QUIRK_DRIVER_INFO { \ + QUIRK_DATA_COMPOSITE { \ /* * Three modes depending on sample rate band, * with different channel counts for in/out - */ - { QUIRK_DATA_STANDARD_MIXER(0) }, - { - QUIRK_DATA_AUDIOFORMAT(0) { - .formats = SNDRV_PCM_FMTBIT_S32_LE, - .channels = 34, // outputs - .fmt_bits = 24, - .iface = 0, - .altsetting = 1, - .altset_idx = 1, - .endpoint = 0x02, - .ep_idx = 1, - .ep_attr = USB_ENDPOINT_XFER_ISOC | - USB_ENDPOINT_SYNC_ASYNC, - .rates = SNDRV_PCM_RATE_32000 | - SNDRV_PCM_RATE_44100 | - SNDRV_PCM_RATE_48000, - .rate_min = 32000, - .rate_max = 48000, - .nr_rates = 3, - .rate_table = (unsigned int[]) { - 32000, 44100, 48000, - }, - .sync_ep = 0x81, - .sync_iface = 0, - .sync_altsetting = 1, - .sync_ep_idx = 0, - .implicit_fb = 1, - }, - }, - { - QUIRK_DATA_AUDIOFORMAT(0) { - .formats = SNDRV_PCM_FMTBIT_S32_LE, - .channels = 18, // outputs - .fmt_bits = 24, - .iface = 0, - .altsetting = 1, - .altset_idx = 1, - .endpoint = 0x02, - .ep_idx = 1, - .ep_attr = USB_ENDPOINT_XFER_ISOC | - USB_ENDPOINT_SYNC_ASYNC, - .rates = SNDRV_PCM_RATE_64000 | - SNDRV_PCM_RATE_88200 | - SNDRV_PCM_RATE_96000, - .rate_min = 64000, - .rate_max = 96000, - .nr_rates = 3, - .rate_table = (unsigned int[]) { - 64000, 88200, 96000, - }, - .sync_ep = 0x81, - .sync_iface = 0, - .sync_altsetting = 1, - .sync_ep_idx = 0, - .implicit_fb = 1, - }, - }, - { - QUIRK_DATA_AUDIOFORMAT(0) { - .formats = SNDRV_PCM_FMTBIT_S32_LE, - .channels = 10, // outputs - .fmt_bits = 24, - .iface = 0, - .altsetting = 1, - .altset_idx = 1, - .endpoint = 0x02, - .ep_idx = 1, - .ep_attr = USB_ENDPOINT_XFER_ISOC | - USB_ENDPOINT_SYNC_ASYNC, - .rates = SNDRV_PCM_RATE_KNOT | - SNDRV_PCM_RATE_176400 | - SNDRV_PCM_RATE_192000, - .rate_min = 128000, - .rate_max = 192000, - .nr_rates = 3, - .rate_table = (unsigned int[]) { - 128000, 176400, 192000, - }, - .sync_ep = 0x81, - .sync_iface = 0, - .sync_altsetting = 1, - .sync_ep_idx = 0, - .implicit_fb = 1, - }, - }, - { - QUIRK_DATA_AUDIOFORMAT(0) { - .formats = SNDRV_PCM_FMTBIT_S32_LE, - .channels = 32, // inputs - .fmt_bits = 24, - .iface = 0, - .altsetting = 1, - .altset_idx = 1, - .endpoint = 0x81, - .ep_attr = USB_ENDPOINT_XFER_ISOC | - USB_ENDPOINT_SYNC_ASYNC, - .rates = SNDRV_PCM_RATE_32000 | - SNDRV_PCM_RATE_44100 | - SNDRV_PCM_RATE_48000, - .rate_min = 32000, - .rate_max = 48000, - .nr_rates = 3, - .rate_table = (unsigned int[]) { - 32000, 44100, 48000, - } - } - }, - { - QUIRK_DATA_AUDIOFORMAT(0) { - .formats = SNDRV_PCM_FMTBIT_S32_LE, - .channels = 16, // inputs - .fmt_bits = 24, - .iface = 0, - .altsetting = 1, - .altset_idx = 1, - .endpoint = 0x81, - .ep_attr = USB_ENDPOINT_XFER_ISOC | - USB_ENDPOINT_SYNC_ASYNC, - .rates = SNDRV_PCM_RATE_64000 | - SNDRV_PCM_RATE_88200 | - SNDRV_PCM_RATE_96000, - .rate_min = 64000, - .rate_max = 96000, - .nr_rates = 3, - .rate_table = (unsigned int[]) { - 64000, 88200, 96000, - } - } - }, - { - QUIRK_DATA_AUDIOFORMAT(0) { - .formats = SNDRV_PCM_FMTBIT_S32_LE, - .channels = 8, // inputs - .fmt_bits = 24, - .iface = 0, - .altsetting = 1, - .altset_idx = 1, - .endpoint = 0x81, - .ep_attr = USB_ENDPOINT_XFER_ISOC | - USB_ENDPOINT_SYNC_ASYNC, - .rates = SNDRV_PCM_RATE_KNOT | - SNDRV_PCM_RATE_176400 | - SNDRV_PCM_RATE_192000, - .rate_min = 128000, - .rate_max = 192000, - .nr_rates = 3, - .rate_table = (unsigned int[]) { - 128000, 176400, 192000, - } - } - }, - QUIRK_COMPOSITE_END - } - } -}, + */ \ + { QUIRK_DATA_STANDARD_MIXER(0) }, \ + { \ + QUIRK_DATA_AUDIOFORMAT(0) { \ + .formats = SNDRV_PCM_FMTBIT_S32_LE, \ + .channels = 34, /* outputs */ \ + .fmt_bits = 24, \ + .iface = 0, \ + .altsetting = 1, \ + .altset_idx = 1, \ + .endpoint = 0x02, \ + .ep_idx = 1, \ + .ep_attr = USB_ENDPOINT_XFER_ISOC | \ + USB_ENDPOINT_SYNC_ASYNC, \ + .rates = SNDRV_PCM_RATE_32000 | \ + SNDRV_PCM_RATE_44100 | \ + SNDRV_PCM_RATE_48000, \ + .rate_min = 32000, \ + .rate_max = 48000, \ + .nr_rates = 3, \ + .rate_table = (unsigned int[]) { \ + 32000, 44100, 48000, \ + }, \ + .sync_ep = 0x81, \ + .sync_iface = 0, \ + .sync_altsetting = 1, \ + .sync_ep_idx = 0, \ + .implicit_fb = 1, \ + }, \ + }, \ + { \ + QUIRK_DATA_AUDIOFORMAT(0) { \ + .formats = SNDRV_PCM_FMTBIT_S32_LE, \ + .channels = 18, /* outputs */ \ + .fmt_bits = 24, \ + .iface = 0, \ + .altsetting = 1, \ + .altset_idx = 1, \ + .endpoint = 0x02, \ + .ep_idx = 1, \ + .ep_attr = USB_ENDPOINT_XFER_ISOC | \ + USB_ENDPOINT_SYNC_ASYNC, \ + .rates = SNDRV_PCM_RATE_64000 | \ + SNDRV_PCM_RATE_88200 | \ + SNDRV_PCM_RATE_96000, \ + .rate_min = 64000, \ + .rate_max = 96000, \ + .nr_rates = 3, \ + .rate_table = (unsigned int[]) { \ + 64000, 88200, 96000, \ + }, \ + .sync_ep = 0x81, \ + .sync_iface = 0, \ + .sync_altsetting = 1, \ + .sync_ep_idx = 0, \ + .implicit_fb = 1, \ + }, \ + }, \ + { \ + QUIRK_DATA_AUDIOFORMAT(0) { \ + .formats = SNDRV_PCM_FMTBIT_S32_LE, \ + .channels = 10, /* outputs */ \ + .fmt_bits = 24, \ + .iface = 0, \ + .altsetting = 1, \ + .altset_idx = 1, \ + .endpoint = 0x02, \ + .ep_idx = 1, \ + .ep_attr = USB_ENDPOINT_XFER_ISOC | \ + USB_ENDPOINT_SYNC_ASYNC, \ + .rates = SNDRV_PCM_RATE_KNOT | \ + SNDRV_PCM_RATE_176400 | \ + SNDRV_PCM_RATE_192000, \ + .rate_min = 128000, \ + .rate_max = 192000, \ + .nr_rates = 3, \ + .rate_table = (unsigned int[]) { \ + 128000, 176400, 192000, \ + }, \ + .sync_ep = 0x81, \ + .sync_iface = 0, \ + .sync_altsetting = 1, \ + .sync_ep_idx = 0, \ + .implicit_fb = 1, \ + }, \ + }, \ + { \ + QUIRK_DATA_AUDIOFORMAT(0) { \ + .formats = SNDRV_PCM_FMTBIT_S32_LE, \ + .channels = 32, /* inputs */ \ + .fmt_bits = 24, \ + .iface = 0, \ + .altsetting = 1, \ + .altset_idx = 1, \ + .endpoint = 0x81, \ + .ep_attr = USB_ENDPOINT_XFER_ISOC | \ + USB_ENDPOINT_SYNC_ASYNC, \ + .rates = SNDRV_PCM_RATE_32000 | \ + SNDRV_PCM_RATE_44100 | \ + SNDRV_PCM_RATE_48000, \ + .rate_min = 32000, \ + .rate_max = 48000, \ + .nr_rates = 3, \ + .rate_table = (unsigned int[]) { \ + 32000, 44100, 48000, \ + } \ + } \ + }, \ + { \ + QUIRK_DATA_AUDIOFORMAT(0) { \ + .formats = SNDRV_PCM_FMTBIT_S32_LE, \ + .channels = 16, /* inputs */ \ + .fmt_bits = 24, \ + .iface = 0, \ + .altsetting = 1, \ + .altset_idx = 1, \ + .endpoint = 0x81, \ + .ep_attr = USB_ENDPOINT_XFER_ISOC | \ + USB_ENDPOINT_SYNC_ASYNC, \ + .rates = SNDRV_PCM_RATE_64000 | \ + SNDRV_PCM_RATE_88200 | \ + SNDRV_PCM_RATE_96000, \ + .rate_min = 64000, \ + .rate_max = 96000, \ + .nr_rates = 3, \ + .rate_table = (unsigned int[]) { \ + 64000, 88200, 96000, \ + } \ + } \ + }, \ + { \ + QUIRK_DATA_AUDIOFORMAT(0) { \ + .formats = SNDRV_PCM_FMTBIT_S32_LE, \ + .channels = 8, /* inputs */ \ + .fmt_bits = 24, \ + .iface = 0, \ + .altsetting = 1, \ + .altset_idx = 1, \ + .endpoint = 0x81, \ + .ep_attr = USB_ENDPOINT_XFER_ISOC | \ + USB_ENDPOINT_SYNC_ASYNC, \ + .rates = SNDRV_PCM_RATE_KNOT | \ + SNDRV_PCM_RATE_176400 | \ + SNDRV_PCM_RATE_192000, \ + .rate_min = 128000, \ + .rate_max = 192000, \ + .nr_rates = 3, \ + .rate_table = (unsigned int[]) { \ + 128000, 176400, 192000, \ + } \ + } \ + }, \ + QUIRK_COMPOSITE_END \ + } \ + } \ +} + +QUIRK_RME_DIGIFACE(0x3f8c), +QUIRK_RME_DIGIFACE(0x3fa0), + #undef USB_DEVICE_VENDOR_SPEC #undef USB_AUDIO_DEVICE diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 7c9d352864da0..00101875d9a8d 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1665,6 +1665,7 @@ int snd_usb_apply_boot_quirk(struct usb_device *dev, return snd_usb_motu_microbookii_boot_quirk(dev); break; case USB_ID(0x2a39, 0x3f8c): /* RME Digiface USB */ + case USB_ID(0x2a39, 0x3fa0): /* RME Digiface USB (alternate) */ return snd_usb_rme_digiface_boot_quirk(dev); } @@ -1878,6 +1879,7 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, mbox3_set_format_quirk(subs, fmt); /* Digidesign Mbox 3 */ break; case USB_ID(0x2a39, 0x3f8c): /* RME Digiface USB */ + case USB_ID(0x2a39, 0x3fa0): /* RME Digiface USB (alternate) */ rme_digiface_set_format_quirk(subs); break; } From 0d08f0eec961acdb0424a3e2cfb37cfb89154833 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Mon, 2 Dec 2024 22:46:59 +0800 Subject: [PATCH 156/366] ALSA: hda/realtek: fix micmute LEDs don't work on HP Laptops These HP laptops use Realtek HDA codec ALC3315 combined CS35L56 Amplifiers. They need the quirk ALC285_FIXUP_HP_GPIO_LED to get the micmute LED working. Signed-off-by: Chris Chiu Reviewed-by: Simon Trimmer Cc: Link: https://patch.msgid.link/20241202144659.1553504-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index faad84c345eb9..37e1141e201e5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10522,7 +10522,13 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d91, "HP ZBook Firefly 14 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d92, "HP ZBook Firefly 16 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8e18, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8e19, "HP ZBook Firelfy 14 G12A", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From f00582aa4a449dbaefa8df9bb5f3b713928e5a1b Mon Sep 17 00:00:00 2001 From: Fei Shao Date: Tue, 1 Oct 2024 19:27:19 +0800 Subject: [PATCH 157/366] dt-bindings: power: mediatek: Add another nested power-domain layer The MT8188 SoC has a more in-depth power-domain tree, and the CHECK_DTBS=y check could fail because the current MediaTek power dt-binding is insufficient to cover its CAM_SUBA and CAM_SUBB sub-domains. Add one more nested power-domain layer to pass the check. Acked-by: Rob Herring (Arm) Signed-off-by: Fei Shao Message-ID: <20241001113052.3124869-2-fshao@chromium.org> Signed-off-by: Ulf Hansson --- .../devicetree/bindings/power/mediatek,power-controller.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml index 6d37c06b2f65b..591a080ca3ff0 100644 --- a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml +++ b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml @@ -55,6 +55,10 @@ patternProperties: patternProperties: "^power-domain@[0-9a-f]+$": $ref: "#/$defs/power-domain-node" + patternProperties: + "^power-domain@[0-9a-f]+$": + $ref: "#/$defs/power-domain-node" + unevaluatedProperties: false unevaluatedProperties: false unevaluatedProperties: false unevaluatedProperties: false From b8f7bbd1f4ecff6d6277b8c454f62bb0a1c6dbe4 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 22 Nov 2024 14:42:02 +0100 Subject: [PATCH 158/366] pmdomain: core: Add missing put_device() When removing a genpd we don't clean up the genpd->dev correctly. Let's add the missing put_device() in genpd_free_data() to fix this. Fixes: 401ea1572de9 ("PM / Domain: Add struct device to genpd") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Message-ID: <20241122134207.157283-2-ulf.hansson@linaro.org> --- drivers/pmdomain/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index a6c8b85dd0247..4d8b0d18bb4a4 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -2183,6 +2183,7 @@ static int genpd_alloc_data(struct generic_pm_domain *genpd) static void genpd_free_data(struct generic_pm_domain *genpd) { + put_device(&genpd->dev); if (genpd_is_cpu_domain(genpd)) free_cpumask_var(genpd->cpus); if (genpd->free_states) From 3e3b71d35a02cee4b2cc3d4255668a6609165518 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 22 Nov 2024 14:42:03 +0100 Subject: [PATCH 159/366] pmdomain: core: Fix error path in pm_genpd_init() when ida alloc fails When the ida allocation fails we need to free up the previously allocated memory before returning the error code. Let's fix this and while at it, let's also move the ida allocation to genpd_alloc_data() and the freeing to genpd_free_data(), as it better belongs there. Fixes: 899f44531fe6 ("pmdomain: core: Add GENPD_FLAG_DEV_NAME_FW flag") Signed-off-by: Ulf Hansson Message-ID: <20241122134207.157283-3-ulf.hansson@linaro.org> --- drivers/pmdomain/core.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index 4d8b0d18bb4a4..bb11f467dc78c 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -2172,8 +2172,24 @@ static int genpd_alloc_data(struct generic_pm_domain *genpd) } genpd->gd = gd; - return 0; + device_initialize(&genpd->dev); + + if (!genpd_is_dev_name_fw(genpd)) { + dev_set_name(&genpd->dev, "%s", genpd->name); + } else { + ret = ida_alloc(&genpd_ida, GFP_KERNEL); + if (ret < 0) + goto put; + genpd->device_id = ret; + dev_set_name(&genpd->dev, "%s_%u", genpd->name, genpd->device_id); + } + + return 0; +put: + put_device(&genpd->dev); + if (genpd->free_states == genpd_free_default_power_state) + kfree(genpd->states); free: if (genpd_is_cpu_domain(genpd)) free_cpumask_var(genpd->cpus); @@ -2184,6 +2200,8 @@ static int genpd_alloc_data(struct generic_pm_domain *genpd) static void genpd_free_data(struct generic_pm_domain *genpd) { put_device(&genpd->dev); + if (genpd->device_id != -ENXIO) + ida_free(&genpd_ida, genpd->device_id); if (genpd_is_cpu_domain(genpd)) free_cpumask_var(genpd->cpus); if (genpd->free_states) @@ -2272,20 +2290,6 @@ int pm_genpd_init(struct generic_pm_domain *genpd, if (ret) return ret; - device_initialize(&genpd->dev); - - if (!genpd_is_dev_name_fw(genpd)) { - dev_set_name(&genpd->dev, "%s", genpd->name); - } else { - ret = ida_alloc(&genpd_ida, GFP_KERNEL); - if (ret < 0) { - put_device(&genpd->dev); - return ret; - } - genpd->device_id = ret; - dev_set_name(&genpd->dev, "%s_%u", genpd->name, genpd->device_id); - } - mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); @@ -2326,8 +2330,6 @@ static int genpd_remove(struct generic_pm_domain *genpd) genpd_unlock(genpd); genpd_debug_remove(genpd); cancel_work_sync(&genpd->power_off_work); - if (genpd->device_id != -ENXIO) - ida_free(&genpd_ida, genpd->device_id); genpd_free_data(genpd); pr_debug("%s: removed %s\n", __func__, dev_name(&genpd->dev)); From 2379fb937de5333991c567eefd7d11b98977d059 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 21 Nov 2024 15:52:31 +0800 Subject: [PATCH 160/366] pmdomain: imx: gpcv2: Adjust delay after power up handshake The udelay(5) is not enough, sometimes below kernel panic still be triggered: [ 4.012973] Kernel panic - not syncing: Asynchronous SError Interrupt [ 4.012976] CPU: 2 UID: 0 PID: 186 Comm: (udev-worker) Not tainted 6.12.0-rc2-0.0.0-devel-00004-g8b1b79e88956 #1 [ 4.012982] Hardware name: Toradex Verdin iMX8M Plus WB on Dahlia Board (DT) [ 4.012985] Call trace: [...] [ 4.013029] arm64_serror_panic+0x64/0x70 [ 4.013034] do_serror+0x3c/0x70 [ 4.013039] el1h_64_error_handler+0x30/0x54 [ 4.013046] el1h_64_error+0x64/0x68 [ 4.013050] clk_imx8mp_audiomix_runtime_resume+0x38/0x48 [ 4.013059] __genpd_runtime_resume+0x30/0x80 [ 4.013066] genpd_runtime_resume+0x114/0x29c [ 4.013073] __rpm_callback+0x48/0x1e0 [ 4.013079] rpm_callback+0x68/0x80 [ 4.013084] rpm_resume+0x3bc/0x6a0 [ 4.013089] __pm_runtime_resume+0x50/0x9c [ 4.013095] pm_runtime_get_suppliers+0x60/0x8c [ 4.013101] __driver_probe_device+0x4c/0x14c [ 4.013108] driver_probe_device+0x3c/0x120 [ 4.013114] __driver_attach+0xc4/0x200 [ 4.013119] bus_for_each_dev+0x7c/0xe0 [ 4.013125] driver_attach+0x24/0x30 [ 4.013130] bus_add_driver+0x110/0x240 [ 4.013135] driver_register+0x68/0x124 [ 4.013142] __platform_driver_register+0x24/0x30 [ 4.013149] sdma_driver_init+0x20/0x1000 [imx_sdma] [ 4.013163] do_one_initcall+0x60/0x1e0 [ 4.013168] do_init_module+0x5c/0x21c [ 4.013175] load_module+0x1a98/0x205c [ 4.013181] init_module_from_file+0x88/0xd4 [ 4.013187] __arm64_sys_finit_module+0x258/0x350 [ 4.013194] invoke_syscall.constprop.0+0x50/0xe0 [ 4.013202] do_el0_svc+0xa8/0xe0 [ 4.013208] el0_svc+0x3c/0x140 [ 4.013215] el0t_64_sync_handler+0x120/0x12c [ 4.013222] el0t_64_sync+0x190/0x194 [ 4.013228] SMP: stopping secondary CPUs The correct way is to wait handshake, but it needs BUS clock of BLK-CTL be enabled, which is in separate driver. So delay is the only option here. The udelay(10) is a data got by experiment. Fixes: e8dc41afca16 ("pmdomain: imx: gpcv2: Add delay after power up handshake") Reported-by: Francesco Dolcini Closes: https://lore.kernel.org/lkml/20241007132555.GA53279@francesco-nb/ Signed-off-by: Shengjiu Wang Cc: stable@vger.kernel.org Message-ID: <20241121075231.3910922-1-shengjiu.wang@nxp.com> Signed-off-by: Ulf Hansson --- drivers/pmdomain/imx/gpcv2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pmdomain/imx/gpcv2.c b/drivers/pmdomain/imx/gpcv2.c index 6e6ecbf2e152f..760301ad8e39b 100644 --- a/drivers/pmdomain/imx/gpcv2.c +++ b/drivers/pmdomain/imx/gpcv2.c @@ -403,7 +403,7 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd) * already reaches target before udelay() */ regmap_read_bypassed(domain->regmap, domain->regs->hsk, ®_val); - udelay(5); + udelay(10); } /* Disable reset clocks for all devices in the domain */ From 3510398032b445abd034753ce86a60882f41fe27 Mon Sep 17 00:00:00 2001 From: Sedat Dilek Date: Sat, 23 Nov 2024 14:29:28 +0100 Subject: [PATCH 161/366] platform/x86: samsung-laptop: Match MODULE_DESCRIPTION() to functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change module description from "Samsung Backlight driver" to "Samsung Laptop driver" to better match driver's functionality. Signed-off-by: Sedat Dilek Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20241123133041.16042-1-sedat.dilek@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/samsung-laptop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c index 0d3e3ca20b1bf..decde4c9a3d97 100644 --- a/drivers/platform/x86/samsung-laptop.c +++ b/drivers/platform/x86/samsung-laptop.c @@ -1653,5 +1653,5 @@ module_init(samsung_init); module_exit(samsung_exit); MODULE_AUTHOR("Greg Kroah-Hartman "); -MODULE_DESCRIPTION("Samsung Backlight driver"); +MODULE_DESCRIPTION("Samsung Laptop driver"); MODULE_LICENSE("GPL"); From 25fb5f47f34d90aceda2c47a4230315536e97fa8 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 24 Nov 2024 18:19:41 +0100 Subject: [PATCH 162/366] platform/x86: asus-wmi: Ignore return value when writing thermal policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some machines like the ASUS Vivobook S14 writing the thermal policy returns the currently writen thermal policy instead of an error code. Ignore the return code to avoid falsely returning an error when the thermal policy was written successfully. Reported-by: auslands-kv@gmx.de Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219517 Fixes: 2daa86e78c49 ("platform/x86: asus_wmi: Support throttle thermal policy") Signed-off-by: Armin Wolf Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20241124171941.29789-1-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-wmi.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index ba8b6d028f9fe..8bd187e8b47f8 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -3696,7 +3696,6 @@ static int asus_wmi_custom_fan_curve_init(struct asus_wmi *asus) /* Throttle thermal policy ****************************************************/ static int throttle_thermal_policy_write(struct asus_wmi *asus) { - u32 retval; u8 value; int err; @@ -3718,8 +3717,8 @@ static int throttle_thermal_policy_write(struct asus_wmi *asus) value = asus->throttle_thermal_policy_mode; } - err = asus_wmi_set_devstate(asus->throttle_thermal_policy_dev, - value, &retval); + /* Some machines do not return an error code as a result, so we ignore it */ + err = asus_wmi_set_devstate(asus->throttle_thermal_policy_dev, value, NULL); sysfs_notify(&asus->platform_device->dev.kobj, NULL, "throttle_thermal_policy"); @@ -3729,12 +3728,6 @@ static int throttle_thermal_policy_write(struct asus_wmi *asus) return err; } - if (retval != 1) { - pr_warn("Failed to set throttle thermal policy (retval): 0x%x\n", - retval); - return -EIO; - } - /* Must set to disabled if mode is toggled */ if (asus->cpu_fan_curve_available) asus->custom_fan_curves[FAN_CURVE_DEV_CPU].enabled = false; From e9fba20c29e27dc99e55e1c550573a114561bf8c Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sat, 23 Nov 2024 23:47:00 +0100 Subject: [PATCH 163/366] platform/x86: asus-nb-wmi: Ignore unknown event 0xCF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the Asus X541UAK an unknown event 0xCF is emited when the charger is plugged in. This is caused by the following AML code: If (ACPS ()) { ACPF = One Local0 = 0x58 If (ATKP) { ^^^^ATKD.IANE (0xCF) } } Else { ACPF = Zero Local0 = 0x57 } Notify (AC0, 0x80) // Status Change If (ATKP) { ^^^^ATKD.IANE (Local0) } Sleep (0x64) PNOT () Sleep (0x0A) NBAT (0x80) Ignore the 0xCF event to silence the unknown event warning. Reported-by: Pau Espin Pedrol Closes: https://lore.kernel.org/platform-driver-x86/54d4860b-ec9c-4992-acf6-db3f90388293@espeweb.net Signed-off-by: Armin Wolf Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20241123224700.18530-1-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-nb-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index ef04d396f61c7..a5933980ade3d 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -623,6 +623,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0xC4, { KEY_KBDILLUMUP } }, { KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } }, { KE_IGNORE, 0xC6, }, /* Ambient Light Sensor notification */ + { KE_IGNORE, 0xCF, }, /* AC mode */ { KE_KEY, 0xFA, { KEY_PROG2 } }, /* Lid flip action */ { KE_KEY, 0xBD, { KEY_PROG2 } }, /* Lid flip action on ROG xflow laptops */ { KE_END, 0}, From 84909f7decbd8981a24be829f110c248ecb8c51a Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 24 Nov 2024 18:25:53 +0530 Subject: [PATCH 164/366] nvmet: use kzalloc instead of ZERO_PAGE in nvme_execute_identify_ns_nvm() The nvme_execute_identify_ns_nvm function uses ZERO_PAGE for copying SG list with all zeros. As ZERO_PAGE would not necessarily return the virtual-address of the zero page, we need to first convert the page address to kernel virtual-address and then use it as source address for copying the data to SG list with all zeros. Using return address of ZERO_PAGE(0) as source address for copying data to SG list would fill the target buffer with random/garbage value and causes the undesired side effect. As other identify implemenations uses kzalloc for allocating a zero filled buffer, we decided use kzalloc for allocating a zero filled buffer in nvme_execute_identify_ns_nvm function and then use this buffer for copying all zeros to SG list buffers. So esentially, we now avoid using ZERO_PAGE. Reported-by: Yi Zhang Fixes: 64a51080eaba ("nvmet: implement id ns for nvm command set") Link: https://lore.kernel.org/all/CAHj4cs8OVyxmn4XTvA=y4uQ3qWpdw-x3M3FSUYr-KpE-nhaFEA@mail.gmail.com/ Signed-off-by: Nilay Shroff Tested-by: Yi Zhang Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/admin-cmd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 4fa8496a4d967..2962794ce881e 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -902,13 +902,18 @@ static void nvmet_execute_identify_ctrl_nvm(struct nvmet_req *req) static void nvme_execute_identify_ns_nvm(struct nvmet_req *req) { u16 status; + struct nvme_id_ns_nvm *id; status = nvmet_req_find_ns(req); if (status) goto out; - status = nvmet_copy_to_sgl(req, 0, ZERO_PAGE(0), - NVME_IDENTIFY_DATA_SIZE); + id = kzalloc(sizeof(*id), GFP_KERNEL); + if (!id) { + status = NVME_SC_INTERNAL; + goto out; + } + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); out: nvmet_req_complete(req, status); } From 58a0c875ce028678c9594c7bdf3fe33462392808 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 27 Nov 2024 07:42:18 +0100 Subject: [PATCH 165/366] nvme: don't apply NVME_QUIRK_DEALLOCATE_ZEROES when DSM is not supported Commit 63dfa1004322 ("nvme: move NVME_QUIRK_DEALLOCATE_ZEROES out of nvme_config_discard") started applying the NVME_QUIRK_DEALLOCATE_ZEROES quirk even then the Dataset Management is not supported. It turns out that there versions of these old Intel SSDs that have DSM support disabled in the firmware, which will now lead to errors everytime a Write Zeroes command is issued. Fix this by checking for DSM support before applying the quirk. Reported-by: Saeed Mirzamohammadi Fixes: 63dfa1004322 ("nvme: move NVME_QUIRK_DEALLOCATE_ZEROES out of nvme_config_discard") Tested-by: Saeed Mirzamohammadi Signed-off-by: Christoph Hellwig Reviewed-by: Nitesh Shetty Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bfd71511c85f8..5e5c9e15ad0ca 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2043,7 +2043,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, lim->physical_block_size = min(phys_bs, atomic_bs); lim->io_min = phys_bs; lim->io_opt = io_opt; - if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) + if ((ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) && + (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)) lim->max_write_zeroes_sectors = UINT_MAX; else lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors; From c6c2f66372d5cba5ce85eed686901259333ed816 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 22 Nov 2024 14:00:05 -0500 Subject: [PATCH 166/366] drm/amdgpu/jpeg1.0: fix idle work handler On VCN 1.0, VCN and JPEG use the same worker thread so cancel the vcn worker rather than jpeg. On VCN 2.0 and newer there are separate workers for each. Fixes: 93df74873703 ("drm/amdgpu/jpeg: cancel the jpeg worker") Tested-by: George Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 7319299f25aea..03b8b7cd5229b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -604,7 +604,7 @@ static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev) static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work); + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); int cnt = 0; mutex_lock(&adev->vcn.vcn1_jpeg1_workaround); From c9b8dcabb52afe88413ff135a0953e3cc4128483 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 22 Nov 2024 11:22:51 -0500 Subject: [PATCH 167/366] drm/amdgpu/hdp4.0: do a posting read when flushing HDP Need to read back to make sure the write goes through. Cc: David Belanger Reviewed-by: Frank Min Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index e019249883fb2..194026e9be333 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -40,10 +40,12 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - if (!ring || !ring->funcs->emit_wreg) + if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - else + RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + } } static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev, @@ -54,11 +56,13 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev, amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5)) return; - if (!ring || !ring->funcs->emit_wreg) + if (!ring || !ring->funcs->emit_wreg) { WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); - else + RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE); + } else { amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); + } } static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev, From cf424020e040be35df05b682b546b255e74a420f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 22 Nov 2024 11:23:56 -0500 Subject: [PATCH 168/366] drm/amdgpu/hdp5.0: do a posting read when flushing HDP Need to read back to make sure the write goes through. Cc: David Belanger Reviewed-by: Frank Min Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index ed7facacf2fe3..d3962d4690881 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -31,10 +31,12 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - if (!ring || !ring->funcs->emit_wreg) + if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - else + RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + } } static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev, @@ -42,6 +44,7 @@ static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE); } else { amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); From f756dbac1ce1d5f9a2b35e3b55fa429cf6336437 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 22 Nov 2024 11:24:13 -0500 Subject: [PATCH 169/366] drm/amdgpu/hdp5.2: do a posting read when flushing HDP Need to read back to make sure the write goes through. Cc: David Belanger Reviewed-by: Frank Min Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index 29c3484ae1f16..f52552c5fa27b 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -31,13 +31,15 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - if (!ring || !ring->funcs->emit_wreg) + if (!ring || !ring->funcs->emit_wreg) { WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - else + RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + } } static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev, From abe1cbaec6cfe9fde609a15cd6a12c812282ce77 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 22 Nov 2024 11:24:38 -0500 Subject: [PATCH 170/366] drm/amdgpu/hdp6.0: do a posting read when flushing HDP Need to read back to make sure the write goes through. Cc: David Belanger Reviewed-by: Frank Min Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index 33736d361dd0b..6948fe9956ce4 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -34,10 +34,12 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - if (!ring || !ring->funcs->emit_wreg) + if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - else + RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + } } static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev, From 689275140cb8e9f8ae59e545086fce51fb0b994a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Nov 2024 16:05:24 +0800 Subject: [PATCH 171/366] drm/amdgpu/hdp7.0: do a posting read when flushing HDP Need to read back to make sure the write goes through. Cc: David Belanger Reviewed-by: Frank Min Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 1c99bb09e2a12..63820329f67eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -31,10 +31,12 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - if (!ring || !ring->funcs->emit_wreg) + if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - else + RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + } } static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev, From 6a1853bdf17874392476b552398df261f75503e0 Mon Sep 17 00:00:00 2001 From: Aruna Ramakrishna Date: Tue, 19 Nov 2024 17:45:19 +0000 Subject: [PATCH 172/366] x86/pkeys: Change caller of update_pkru_in_sigframe() update_pkru_in_sigframe() will shortly need some information which is only available inside xsave_to_user_sigframe(). Move update_pkru_in_sigframe() inside the other function to make it easier to provide it that information. No functional changes. Signed-off-by: Aruna Ramakrishna Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20241119174520.3987538-2-aruna.ramakrishna%40oracle.com --- arch/x86/kernel/fpu/signal.c | 20 ++------------------ arch/x86/kernel/fpu/xstate.h | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 1065ab995305c..8f62e0666dea5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -63,16 +63,6 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, return true; } -/* - * Update the value of PKRU register that was already pushed onto the signal frame. - */ -static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru) -{ - if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE))) - return 0; - return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU)); -} - /* * Signal frame handlers. */ @@ -168,14 +158,8 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru) { - int err = 0; - - if (use_xsave()) { - err = xsave_to_user_sigframe(buf); - if (!err) - err = update_pkru_in_sigframe(buf, pkru); - return err; - } + if (use_xsave()) + return xsave_to_user_sigframe(buf, pkru); if (use_fxsr()) return fxsave_to_user_sigframe((struct fxregs_state __user *) buf); diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 0b86a5002c846..6b2924fbe5b8d 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -69,6 +69,16 @@ static inline u64 xfeatures_mask_independent(void) return fpu_kernel_cfg.independent_features; } +/* + * Update the value of PKRU register that was already pushed onto the signal frame. + */ +static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru) +{ + if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE))) + return 0; + return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU)); +} + /* XSAVE/XRSTOR wrapper functions */ #ifdef CONFIG_X86_64 @@ -256,7 +266,7 @@ static inline u64 xfeatures_need_sigframe_write(void) * The caller has to zero buf::header before calling this because XSAVE* * does not touch the reserved fields in the header. */ -static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) +static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkru) { /* * Include the features which are not xsaved/rstored by the kernel @@ -281,6 +291,9 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) XSTATE_OP(XSAVE, buf, lmask, hmask, err); clac(); + if (!err) + err = update_pkru_in_sigframe(buf, pkru); + return err; } From ae6012d72fa60c9ff92de5bac7a8021a47458e5b Mon Sep 17 00:00:00 2001 From: Aruna Ramakrishna Date: Tue, 19 Nov 2024 17:45:20 +0000 Subject: [PATCH 173/366] x86/pkeys: Ensure updated PKRU value is XRSTOR'd When XSTATE_BV[i] is 0, and XRSTOR attempts to restore state component 'i' it ignores any value in the XSAVE buffer and instead restores the state component's init value. This means that if XSAVE writes XSTATE_BV[PKRU]=0 then XRSTOR will ignore the value that update_pkru_in_sigframe() writes to the XSAVE buffer. XSTATE_BV[PKRU] only gets written as 0 if PKRU is in its init state. On Intel CPUs, basically never happens because the kernel usually overwrites the init value (aside: this is why we didn't notice this bug until now). But on AMD, the init tracker is more aggressive and will track PKRU as being in its init state upon any wrpkru(0x0). Unfortunately, sig_prepare_pkru() does just that: wrpkru(0x0). This writes XSTATE_BV[PKRU]=0 which makes XRSTOR ignore the PKRU value in the sigframe. To fix this, always overwrite the sigframe XSTATE_BV with a value that has XSTATE_BV[PKRU]==1. This ensures that XRSTOR will not ignore what update_pkru_in_sigframe() wrote. The problematic sequence of events is something like this: Userspace does: * wrpkru(0xffff0000) (or whatever) * Hardware sets: XINUSE[PKRU]=1 Signal happens, kernel is entered: * sig_prepare_pkru() => wrpkru(0x00000000) * Hardware sets: XINUSE[PKRU]=0 (aggressive AMD init tracker) * XSAVE writes most of XSAVE buffer, including XSTATE_BV[PKRU]=XINUSE[PKRU]=0 * update_pkru_in_sigframe() overwrites PKRU in XSAVE buffer ... signal handling * XRSTOR sees XSTATE_BV[PKRU]==0, ignores just-written value from update_pkru_in_sigframe() Fixes: 70044df250d0 ("x86/pkeys: Update PKRU to enable all pkeys before XSAVE") Suggested-by: Rudi Horn Signed-off-by: Aruna Ramakrishna Signed-off-by: Dave Hansen Acked-by: Dave Hansen Link: https://lore.kernel.org/all/20241119174520.3987538-3-aruna.ramakrishna%40oracle.com --- arch/x86/kernel/fpu/xstate.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 6b2924fbe5b8d..aa16f1a1bbcf1 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -72,10 +72,22 @@ static inline u64 xfeatures_mask_independent(void) /* * Update the value of PKRU register that was already pushed onto the signal frame. */ -static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru) +static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 mask, u32 pkru) { + u64 xstate_bv; + int err; + if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE))) return 0; + + /* Mark PKRU as in-use so that it is restored correctly. */ + xstate_bv = (mask & xfeatures_in_use()) | XFEATURE_MASK_PKRU; + + err = __put_user(xstate_bv, &buf->header.xfeatures); + if (err) + return err; + + /* Update PKRU value in the userspace xsave buffer. */ return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU)); } @@ -292,7 +304,7 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkr clac(); if (!err) - err = update_pkru_in_sigframe(buf, pkru); + err = update_pkru_in_sigframe(buf, mask, pkru); return err; } From 0f15cbc203712ccad363611eded31a2c700f3974 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 27 Nov 2024 21:25:00 -0600 Subject: [PATCH 174/366] drm/amd: Sanity check the ACPI EDID An HP Pavilion Aero Laptop 13-be0xxx/8916 has an ACPI EDID, but using it is causing corruption. It's got illogical values of not specifying a digital interface. Sanity check the ACPI EDID to avoid tripping such problems. Suggested-by: Tobias Jakobi Reported-and-tested-by: Chris Bainbridge Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3782 Fixes: c6a837088bed ("drm/amd/display: Fetch the EDID from _DDC if available for eDP") Reviewed-by: Harry Wentland Link: https://lore.kernel.org/r/20241128032500.2088288-1-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index b0fea0856866d..6cbbb71d752be 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -907,14 +907,14 @@ dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len) struct drm_connector *connector = data; struct acpi_device *acpidev = ACPI_COMPANION(connector->dev->dev); unsigned char start = block * EDID_LENGTH; - void *edid; + struct edid *edid; int r; if (!acpidev) return -ENODEV; /* fetch the entire edid from BIOS */ - r = acpi_video_get_edid(acpidev, ACPI_VIDEO_DISPLAY_LCD, -1, &edid); + r = acpi_video_get_edid(acpidev, ACPI_VIDEO_DISPLAY_LCD, -1, (void *)&edid); if (r < 0) { drm_dbg(connector->dev, "Failed to get EDID from ACPI: %d\n", r); return r; @@ -924,7 +924,14 @@ dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len) goto cleanup; } - memcpy(buf, edid + start, len); + /* sanity check */ + if (edid->revision < 4 || !(edid->input & DRM_EDID_INPUT_DIGITAL) || + (edid->input & DRM_EDID_DIGITAL_TYPE_MASK) == DRM_EDID_DIGITAL_TYPE_UNDEF) { + r = -EINVAL; + goto cleanup; + } + + memcpy(buf, (void *)edid + start, len); r = 0; cleanup: From 1c0938620176f451b814e9611b5444cd272b2a65 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 27 Nov 2024 21:22:00 -0600 Subject: [PATCH 175/366] drm/amd/display: Fix programming backlight on OLED panels commit 38077562e059 ("drm/amd/display: Implement new backlight_level_params structure") adjusted DC core to require the backlight type to be programmed in the dc link when changing brightness. This isn't initialized in amdgpu_dm for OLED panels though which broke brightness. Explicitly initialize when aux support is enabled. Reported-and-tested-by: Luke Jones Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3792 Fixes: 38077562e059 ("drm/amd/display: Implement new backlight_level_params structure") Reviewed-by: Harry Wentland Link: https://lore.kernel.org/r/20241128032200.2085398-1-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f0a6816709ca7..48be917e7bc55 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3481,6 +3481,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) caps->aux_support = false; else if (amdgpu_backlight == 1) caps->aux_support = true; + if (caps->aux_support) + aconnector->dc_link->backlight_control_type = BACKLIGHT_CONTROL_AMD_AUX; luminance_range = &conn_base->display_info.luminance_range; From 33114f1057ea5cf40e604021711a9711a060fcb6 Mon Sep 17 00:00:00 2001 From: Sreekant Somasekharan Date: Thu, 28 Nov 2024 12:05:56 -0500 Subject: [PATCH 176/366] drm/amdkfd: add MEC version that supports no PCIe atomics for GFX12 Add MEC version from which alternate support for no PCIe atomics is provided so that device is not skipped during KFD device init in GFX1200/GFX1201. Signed-off-by: Sreekant Somasekharan Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 956198da7859e..9b51dd75fefc7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -235,6 +235,9 @@ static void kfd_device_info_init(struct kfd_dev *kfd, */ kfd->device_info.needs_pci_atomics = true; kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0; + } else if (gc_version < IP_VERSION(13, 0, 0)) { + kfd->device_info.needs_pci_atomics = true; + kfd->device_info.no_atomic_fw_version = 2090; } else { kfd->device_info.needs_pci_atomics = true; } From 55ed120dcfdde2478c3ebfa1c0ac4ed1e430053b Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Tue, 26 Nov 2024 15:18:47 -0500 Subject: [PATCH 177/366] drm/amdkfd: hard-code cacheline for gc943,gc944 Cacheline size is not available in IP discovery for gc943,gc944. Signed-off-by: David Yat Sin Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 723f1220e1cc9..7b826a136ceb9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1510,6 +1510,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, if (adev->gfx.config.gc_tcp_size_per_cu) { pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu; pcache_info[i].cache_level = 1; + /* Cacheline size not available in IP discovery for gc943,gc944 */ + pcache_info[i].cache_line_size = 128; pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); @@ -1521,6 +1523,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, pcache_info[i].cache_size = adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; pcache_info[i].cache_level = 1; + pcache_info[i].cache_line_size = 64; pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); @@ -1531,6 +1534,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; pcache_info[i].cache_level = 1; + pcache_info[i].cache_line_size = 64; pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); @@ -1541,6 +1545,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, if (adev->gfx.config.gc_tcc_size) { pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size; pcache_info[i].cache_level = 2; + pcache_info[i].cache_line_size = 128; pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); @@ -1551,6 +1556,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, if (adev->gmc.mall_size) { pcache_info[i].cache_size = adev->gmc.mall_size / 1024; pcache_info[i].cache_level = 3; + pcache_info[i].cache_line_size = 64; pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); From 63e7ee677c74e981257cedfdd8543510d09096ba Mon Sep 17 00:00:00 2001 From: Peterson Guo Date: Thu, 7 Nov 2024 19:20:02 -0500 Subject: [PATCH 178/366] drm/amd/display: Add a left edge pixel if in YCbCr422 or YCbCr420 and odm [WHY] On some cards when odm is used, the monitor will have 2 separate pipes split vertically. When compression is used on the YCbCr colour space on the second pipe to have correct colours, we need to read a pixel from the end of first pipe to accurately display colours. Hardware was programmed properly to account for this extra pixel but it was not calculated properly in software causing a split screen on some monitors. [HOW] The fix adjusts the second pipe's viewport and timings if the pixel encoding is YCbCr422 or YCbCr420. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: George Shen Signed-off-by: Peterson Guo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/resource/dcn20/dcn20_resource.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 189d0c85872e6..7a5b9aa5292cd 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -1510,6 +1510,7 @@ bool dcn20_split_stream_for_odm( if (prev_odm_pipe->plane_state) { struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data; + struct output_pixel_processor *opp = next_odm_pipe->stream_res.opp; int new_width; /* HACTIVE halved for odm combine */ @@ -1543,7 +1544,28 @@ bool dcn20_split_stream_for_odm( sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int( sd->ratios.horz_c, sd->h_active - sd->recout.x)); sd->recout.x = 0; + + /* + * When odm is used in YcbCr422 or 420 colour space, a split screen + * will be seen with the previous calculations since the extra left + * edge pixel is accounted for in fmt but not in viewport. + * + * Below are calculations which fix the split by fixing the calculations + * if there is an extra left edge pixel. + */ + if (opp && opp->funcs->opp_get_left_edge_extra_pixel_count + && opp->funcs->opp_get_left_edge_extra_pixel_count( + opp, next_odm_pipe->stream->timing.pixel_encoding, + resource_is_pipe_type(next_odm_pipe, OTG_MASTER)) == 1) { + sd->h_active += 1; + sd->recout.width += 1; + sd->viewport.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); + sd->viewport_c.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); + sd->viewport_c.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); + sd->viewport.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); + } } + if (!next_odm_pipe->top_pipe) next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx]; else @@ -2132,6 +2154,7 @@ bool dcn20_fast_validate_bw( ASSERT(0); } } + /* Actual dsc count per stream dsc validation*/ if (!dcn20_validate_dsc(dc, context)) { context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] = From 6a7fd76b949efe40fb6d6677f480e624e0cb6e40 Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Thu, 14 Nov 2024 14:18:13 -0500 Subject: [PATCH 179/366] drm/amd/display: Add option to retrieve detile buffer size [WHY] For better power profiling knowing the detile buffer size at a given point in time would be useful. [HOW] Add interface to retrieve detile buffer from dc state. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Aric Cyr Signed-off-by: Sung Lee Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 18 ++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 2 ++ .../gpu/drm/amd/display/dc/inc/core_types.h | 1 + .../display/dc/resource/dcn31/dcn31_resource.c | 7 +++++++ .../display/dc/resource/dcn31/dcn31_resource.h | 3 +++ .../dc/resource/dcn314/dcn314_resource.c | 1 + .../dc/resource/dcn315/dcn315_resource.c | 1 + .../dc/resource/dcn316/dcn316_resource.c | 1 + .../display/dc/resource/dcn35/dcn35_resource.c | 1 + .../dc/resource/dcn351/dcn351_resource.c | 1 + 10 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 1dd26d5df6b95..49fe7dcf9372e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -6109,3 +6109,21 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state profile.power_level = dc->res_pool->funcs->get_power_profile(context); return profile; } + +/* + ********************************************************************************** + * dc_get_det_buffer_size_from_state() - extracts detile buffer size from dc state + * + * Called when DM wants to log detile buffer size from dc_state + * + ********************************************************************************** + */ +unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context) +{ + struct dc *dc = context->clk_mgr->ctx->dc; + + if (dc->res_pool->funcs->get_det_buffer_size) + return dc->res_pool->funcs->get_det_buffer_size(context); + else + return 0; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 1040519358841..49a31598bb88a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -2550,6 +2550,8 @@ struct dc_power_profile { struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state *context); +unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context); + /* DSC Interfaces */ #include "dc_dsc.h" diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 8597e866bfe6b..3061dca47dd2f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -219,6 +219,7 @@ struct resource_funcs { * Get indicator of power from a context that went through full validation */ int (*get_power_profile)(const struct dc_state *context); + unsigned int (*get_det_buffer_size)(const struct dc_state *context); }; struct audio_support{ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index c16cf1c8f7f9e..54ec3d8e920c9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1720,6 +1720,12 @@ int dcn31_populate_dml_pipes_from_context( return pipe_cnt; } +unsigned int dcn31_get_det_buffer_size( + const struct dc_state *context) +{ + return context->bw_ctx.dml.ip.det_buffer_size_kbytes; +} + void dcn31_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -1842,6 +1848,7 @@ static struct resource_funcs dcn31_res_pool_funcs = { .update_bw_bounding_box = dcn31_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn31_get_panel_config_defaults, + .get_det_buffer_size = dcn31_get_det_buffer_size, }; static struct clock_source *dcn30_clock_source_create( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h index 901436591ed45..551ad912f7bea 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h @@ -63,6 +63,9 @@ struct resource_pool *dcn31_create_resource_pool( const struct dc_init_data *init_data, struct dc *dc); +unsigned int dcn31_get_det_buffer_size( + const struct dc_state *context); + /*temp: B0 specific before switch to dcn313 headers*/ #ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL #define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index c0f48c78e968f..2794473f2aff6 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -1777,6 +1777,7 @@ static struct resource_funcs dcn314_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn314_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia, + .get_det_buffer_size = dcn31_get_det_buffer_size, }; static struct clock_source *dcn30_clock_source_create( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 6c3295259a81e..4ee33eb3381d1 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1845,6 +1845,7 @@ static struct resource_funcs dcn315_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn315_get_panel_config_defaults, .get_power_profile = dcn315_get_power_profile, + .get_det_buffer_size = dcn31_get_det_buffer_size, }; static bool dcn315_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index 6edaaadcb173b..79eddbafe3c29 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -1719,6 +1719,7 @@ static struct resource_funcs dcn316_res_pool_funcs = { .update_bw_bounding_box = dcn316_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn316_get_panel_config_defaults, + .get_det_buffer_size = dcn31_get_det_buffer_size, }; static bool dcn316_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 6cc2960b6104e..09a5a94749030 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1778,6 +1778,7 @@ static struct resource_funcs dcn35_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn35_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn35_get_preferred_eng_id_dpia, + .get_det_buffer_size = dcn31_get_det_buffer_size, }; static bool dcn35_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index d87e2641cda1a..fe382f9b6ff26 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -1757,6 +1757,7 @@ static struct resource_funcs dcn351_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn35_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn351_get_preferred_eng_id_dpia, + .get_det_buffer_size = dcn31_get_det_buffer_size, }; static bool dcn351_resource_construct( From 24d3749c11d949972d8c22e75567dc90ff5482e7 Mon Sep 17 00:00:00 2001 From: Lo-an Chen Date: Thu, 14 Nov 2024 17:53:41 +0800 Subject: [PATCH 180/366] drm/amd/display: Correct prefetch calculation [WHY] The minimum value of the dst_y_prefetch_equ was not correct in prefetch calculation whice causes OPTC underflow. [HOW] Add the min operation of dst_y_prefetch_equ in prefetch calculation. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Signed-off-by: Lo-an Chen Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index d851c081e3768..8dabb1ac0b684 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -1222,6 +1222,7 @@ static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st * s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal); + s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); From a29997b7ac1f5c816b543e0c56aa2b5b56baac24 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 13 Nov 2024 16:44:15 -0500 Subject: [PATCH 181/366] drm/amd/display: Limit VTotal range to max hw cap minus fp [WHY & HOW] Hardware does not support the VTotal to be between fp2 lines of the maximum possible VTotal, so add a capability flag to track it and apply where necessary. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Jun Lei Reviewed-by: Anthony Koo Signed-off-by: Dillon Varone Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../dc/dml2/dml21/dml21_translation_helper.c | 27 +++++++++++++++++-- .../dc/resource/dcn30/dcn30_resource.c | 1 + .../dc/resource/dcn302/dcn302_resource.c | 1 + .../dc/resource/dcn303/dcn303_resource.c | 1 + .../dc/resource/dcn32/dcn32_resource.c | 1 + .../dc/resource/dcn321/dcn321_resource.c | 1 + .../dc/resource/dcn35/dcn35_resource.c | 1 + .../dc/resource/dcn351/dcn351_resource.c | 1 + .../dc/resource/dcn401/dcn401_resource.c | 1 + .../amd/display/modules/freesync/freesync.c | 13 ++++++++- 11 files changed, 46 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 49a31598bb88a..1dda5fc2e2122 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -290,6 +290,7 @@ struct dc_caps { uint16_t subvp_vertical_int_margin_us; bool seamless_odm; uint32_t max_v_total; + bool vtotal_limited_by_fp2; uint32_t max_disp_clock_khz_at_vmin; uint8_t subvp_drr_vblank_start_margin_us; bool cursor_not_scaled; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 138b4b1e42ed7..f66493528f420 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -339,11 +339,22 @@ void dml21_apply_soc_bb_overrides(struct dml2_initialize_instance_in_out *dml_in // } } +static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream) +{ + unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total; + + if (stream->ctx->dc->caps.vtotal_limited_by_fp2) { + max_hw_v_total -= stream->timing.v_front_porch + 1; + } + + return max_hw_v_total; +} + static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing, struct dc_stream_state *stream, struct dml2_context *dml_ctx) { - unsigned int hblank_start, vblank_start; + unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz; timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; @@ -371,11 +382,23 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf - stream->timing.v_border_top - stream->timing.v_border_bottom; timing->drr_config.enabled = stream->ignore_msa_timing_param; - timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz; timing->drr_config.drr_active_variable = stream->vrr_active_variable; timing->drr_config.drr_active_fixed = stream->vrr_active_fixed; timing->drr_config.disallowed = !stream->allow_freesync; + /* limit min refresh rate to DC cap */ + min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz; + if (stream->ctx->dc->caps.max_v_total != 0) { + min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL), + (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream))); + } + + if (stream->timing.min_refresh_in_uhz > min_hardware_refresh_in_uhz) { + timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz; + } else { + timing->drr_config.min_refresh_uhz = min_hardware_refresh_in_uhz; + } + if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase && stream->ctx->dc->config.enable_fpo_flicker_detection == 1) timing->drr_config.max_instant_vtotal_delta = dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase(stream, false); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index cd31e4f16c14b..bfd0eccbed28c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -2353,6 +2353,7 @@ static bool dcn30_resource_construct( dc->caps.dp_hdmi21_pcon_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* read VBIOS LTTPR caps */ { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 02af8b8f4d277..7baefc910a3dc 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -1233,6 +1233,7 @@ static bool dcn302_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index 7002a8dd358a5..8a57d46ad15f8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -1178,6 +1178,7 @@ static bool dcn303_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 01d1a11d55455..984d23bcbc279 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2189,6 +2189,7 @@ static bool dcn32_resource_construct( dc->caps.dmcub_support = true; dc->caps.seamless_odm = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 5cb74fd9cb7d2..06b9479c8bd3a 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1742,6 +1742,7 @@ static bool dcn321_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 09a5a94749030..89e2adcf2a285 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1850,6 +1850,7 @@ static bool dcn35_resource_construct( dc->caps.zstate_support = true; dc->caps.ips_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index fe382f9b6ff26..263a37c1cd3ae 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -1829,6 +1829,7 @@ static bool dcn351_resource_construct( dc->caps.zstate_support = true; dc->caps.ips_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index db93bac247c0f..2a3dabfe3ceac 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -1864,6 +1864,7 @@ static bool dcn401_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) dc->caps.dcc_plane_width_limit = 7680; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index f980a84dceefc..2b3964529539f 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -122,6 +122,17 @@ static unsigned int calc_duration_in_us_from_v_total( return duration_in_us; } +static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream) +{ + unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total; + + if (stream->ctx->dc->caps.vtotal_limited_by_fp2) { + max_hw_v_total -= stream->timing.v_front_porch + 1; + } + + return max_hw_v_total; +} + unsigned int mod_freesync_calc_v_total_from_refresh( const struct dc_stream_state *stream, unsigned int refresh_in_uhz) @@ -1016,7 +1027,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, if (stream->ctx->dc->caps.max_v_total != 0 && stream->timing.h_total != 0) { min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL), - (stream->timing.h_total * (long long)stream->ctx->dc->caps.max_v_total)); + (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream))); } /* Limit minimum refresh rate to what can be supported by hardware */ min_refresh_in_uhz = min_hardware_refresh_in_uhz > in_config->min_refresh_in_uhz ? From 0c0a19430bfdfedab437e77b9262e8e62ced384e Mon Sep 17 00:00:00 2001 From: Chris Park Date: Fri, 15 Nov 2024 15:44:52 -0500 Subject: [PATCH 182/366] drm/amd/display: Add hblank borrowing support [WHY] Some DSC timing failed at bandwidth validation due to hactive can't be evenly divided on each ODM segment. [HOW] Borrow from hblank to increase hactive to support these timing. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Wenjing Liu Signed-off-by: Chris Park Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 42 ++++++++++++++++++- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../gpu/drm/amd/display/dc/dc_spl_translate.c | 2 +- .../dc/dml2/dml21/dml21_translation_helper.c | 21 +++++++++- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 3 +- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 7 +++- .../gpu/drm/amd/display/dc/inc/core_types.h | 2 + .../gpu/drm/amd/display/dc/link/link_dpms.c | 3 +- .../dc/resource/dcn32/dcn32_resource.c | 1 + 9 files changed, 75 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 619fad17de554..626f75b6ad003 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2094,7 +2094,8 @@ int resource_get_odm_slice_dst_width(struct pipe_ctx *otg_master, count = resource_get_odm_slice_count(otg_master); h_active = timing->h_addressable + timing->h_border_left + - timing->h_border_right; + timing->h_border_right + + otg_master->hblank_borrow; width = h_active / count; if (otg_master->stream_res.tg) @@ -4026,6 +4027,41 @@ enum dc_status dc_validate_with_context(struct dc *dc, return res; } +/** + * decide_hblank_borrow - Decides the horizontal blanking borrow value for a given pipe context. + * @pipe_ctx: Pointer to the pipe context structure. + * + * This function calculates the horizontal blanking borrow value for a given pipe context based on the + * display stream compression (DSC) configuration. If the horizontal active pixels (hactive) are less + * than the total width of the DSC slices, it sets the hblank_borrow value to the difference. If the + * total horizontal timing minus the hblank_borrow value is less than 32, it resets the hblank_borrow + * value to 0. + */ +static void decide_hblank_borrow(struct pipe_ctx *pipe_ctx) +{ + uint32_t hactive; + uint32_t ceil_slice_width; + struct dc_stream_state *stream = NULL; + + if (!pipe_ctx) + return; + + stream = pipe_ctx->stream; + + if (stream->timing.flags.DSC) { + hactive = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; + + /* Assume if determined slices does not divide Hactive evenly, Hborrow is needed for padding*/ + if (hactive % stream->timing.dsc_cfg.num_slices_h != 0) { + ceil_slice_width = (hactive / stream->timing.dsc_cfg.num_slices_h) + 1; + pipe_ctx->hblank_borrow = ceil_slice_width * stream->timing.dsc_cfg.num_slices_h - hactive; + + if (stream->timing.h_total - hactive - pipe_ctx->hblank_borrow < 32) + pipe_ctx->hblank_borrow = 0; + } + } +} + /** * dc_validate_global_state() - Determine if hardware can support a given state * @@ -4064,6 +4100,10 @@ enum dc_status dc_validate_global_state( if (pipe_ctx->stream != stream) continue; + /* Decide whether hblank borrow is needed and save it in pipe_ctx */ + if (dc->debug.enable_hblank_borrow) + decide_hblank_borrow(pipe_ctx); + if (dc->res_pool->funcs->patch_unknown_plane_state && pipe_ctx->plane_state && pipe_ctx->plane_state->tiling_info.gfx9.swizzle == DC_SW_UNKNOWN) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 1dda5fc2e2122..e9b9126c04017 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1069,6 +1069,7 @@ struct dc_debug_options { unsigned int scale_to_sharpness_policy; bool skip_full_updated_if_possible; unsigned int enable_oled_edp_power_up_opt; + bool enable_hblank_borrow; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index c8d8e335fa37a..0e310fd48b5c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -120,7 +120,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->odm_slice_index = resource_get_odm_slice_index(pipe_ctx); // Make spl input basic out info output_size width point to stream h active spl_in->basic_out.output_size.width = - stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; + stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->hblank_borrow; // Make spl input basic out info output_size height point to v active spl_in->basic_out.output_size.height = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index f66493528f420..c6a5a86146797 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -445,6 +445,21 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf timing->vblank_nom = timing->v_total - timing->v_active; } +/** + * adjust_dml21_hblank_timing_config_from_pipe_ctx - Adjusts the horizontal blanking timing configuration + * based on the pipe context. + * @timing: Pointer to the dml2_timing_cfg structure to be adjusted. + * @pipe: Pointer to the pipe_ctx structure containing the horizontal blanking borrow value. + * + * This function modifies the horizontal active and blank end timings by adding and subtracting + * the horizontal blanking borrow value from the pipe context, respectively. + */ +static void adjust_dml21_hblank_timing_config_from_pipe_ctx(struct dml2_timing_cfg *timing, struct pipe_ctx *pipe) +{ + timing->h_active += pipe->hblank_borrow; + timing->h_blank_end -= pipe->hblank_borrow; +} + static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output, struct dc_stream_state *stream, const struct pipe_ctx *pipe) { @@ -732,6 +747,7 @@ static const struct scaler_data *get_scaler_data_for_plane( temp_pipe->plane_state = pipe->plane_state; temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; temp_pipe->stream_res = pipe->stream_res; + temp_pipe->hblank_borrow = pipe->hblank_borrow; dml_ctx->config.callbacks.build_scaling_params(temp_pipe); break; } @@ -996,6 +1012,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx); + adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]); populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index]); @@ -1134,12 +1151,12 @@ void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_ struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; - hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right; + hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right + pipe_ctx->hblank_borrow; vactive = timing->v_addressable + timing->v_border_bottom + timing->v_border_top; hblank_start = pipe_ctx->stream->timing.h_total - pipe_ctx->stream->timing.h_front_porch; vblank_start = pipe_ctx->stream->timing.v_total - pipe_ctx->stream->timing.v_front_porch; - hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right; + hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right - pipe_ctx->hblank_borrow; vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index d7f8b2dcaa6b4..fa11f075d1f9a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1049,7 +1049,8 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) } /* Enable DSC hw block */ - dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; + dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->hblank_borrow + + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom; dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; dsc_cfg.color_depth = stream->timing.display_color_depth; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 5de11e2837c01..307782592789c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -820,6 +820,7 @@ enum dc_status dcn401_enable_stream_timing( int opp_cnt = 1; int opp_inst[MAX_PIPES] = {0}; struct pipe_ctx *opp_heads[MAX_PIPES] = {0}; + struct dc_crtc_timing patched_crtc_timing = stream->timing; bool manual_mode; unsigned int tmds_div = PIXEL_RATE_DIV_NA; unsigned int unused_div = PIXEL_RATE_DIV_NA; @@ -874,9 +875,13 @@ enum dc_status dcn401_enable_stream_timing( if (dc->hwseq->funcs.PLAT_58856_wa && (!dc_is_dp_signal(stream->signal))) dc->hwseq->funcs.PLAT_58856_wa(context, pipe_ctx); + /* if we are borrowing from hblank, h_addressable needs to be adjusted */ + if (dc->debug.enable_hblank_borrow) + patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->hblank_borrow; + pipe_ctx->stream_res.tg->funcs->program_timing( pipe_ctx->stream_res.tg, - &stream->timing, + &patched_crtc_timing, pipe_ctx->pipe_dlg_param.vready_offset, pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 3061dca47dd2f..2edd5b38ce4f8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -478,6 +478,8 @@ struct pipe_ctx { /* subvp_index: only valid if the pipe is a SUBVP_MAIN*/ uint8_t subvp_index; struct pixel_rate_divider pixel_rate_divider; + /* pixels borrowed from hblank to hactive */ + uint8_t hblank_borrow; }; /* Data used for dynamic link encoder assignment. diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 41cab9ad6885a..5d66bfc7fe6e6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -808,7 +808,8 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) enum optc_dsc_mode optc_dsc_mode; /* Enable DSC hw block */ - dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; + dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->hblank_borrow + + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom; dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; dsc_cfg.color_depth = stream->timing.display_color_depth; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 984d23bcbc279..12d247a7ec454 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2804,6 +2804,7 @@ struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head( free_pipe->plane_res.xfm = pool->transforms[free_pipe_idx]; free_pipe->plane_res.dpp = pool->dpps[free_pipe_idx]; free_pipe->plane_res.mpcc_inst = pool->dpps[free_pipe_idx]->inst; + free_pipe->hblank_borrow = otg_master->hblank_borrow; if (free_pipe->stream->timing.flags.DSC == 1) { dcn20_acquire_dsc(free_pipe->stream->ctx->dc, &new_ctx->res_ctx, From 274e3f4596446955bf17680fd4eb5489f5ecac00 Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Fri, 29 Nov 2024 14:52:08 -0500 Subject: [PATCH 183/366] drm/amdgpu: Fix ISP hw init issue ISP hw_init is not called with the recent changes related to hw init levels. AMDGPU_INIT_LEVEL_DEFAULT is ignoring the ISP IP block as AMDGPU_IP_BLK_MASK_ALL is derived using incorrect max number of IP blocks. Update AMDGPU_IP_BLK_MASK_ALL to use AMDGPU_MAX_IP_NUM instead of (AMDGPU_MAX_IP_NUM - 1) to fix the issue. Reviewed-by: Mario Limonciello Fixes: 14f2fe34f5c6 ("drm/amdgpu: Add init levels") Signed-off-by: Pratap Nirujogi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9095c05e0269f..c3ca217b8d86b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -145,7 +145,7 @@ const char *amdgpu_asic_name[] = { "LAST", }; -#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0) +#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM, 0) /* * Default init level where all blocks are expected to be initialized. This is * the level of initialization expected by default and also after a full reset From f3bb57b66dc439dd129eb509a4965f1e1aeea2b8 Mon Sep 17 00:00:00 2001 From: Yiqing Yao Date: Tue, 26 Nov 2024 18:36:11 +0800 Subject: [PATCH 184/366] drm/amdgpu: fix sriov reinit late orders Use found block to call correct init/resume function on the block. Set status.hw for resume and init. Print re-init result again. Change to use dev_info. Use amdgpu_device_ip_get_ip_block to get target block instead of loop. Fixes: 502d76308d45 ("drm/amdgpu: validate resume before function call") Signed-off-by: Yiqing Yao Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 43 ++++++++++------------ 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c3ca217b8d86b..913eb4f08ee6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3670,9 +3670,11 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) continue; r = block->version->funcs->hw_init(&adev->ip_blocks[i]); - DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); - if (r) + if (r) { + dev_err(adev->dev, "RE-INIT-early: %s failed\n", + block->version->funcs->name); return r; + } block->status.hw = true; } } @@ -3682,7 +3684,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) { - int i, r; + struct amdgpu_ip_block *block; + int i, r = 0; static enum amd_ip_block_type ip_order[] = { AMD_IP_BLOCK_TYPE_SMC, @@ -3697,34 +3700,28 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) }; for (i = 0; i < ARRAY_SIZE(ip_order); i++) { - int j; - struct amdgpu_ip_block *block; - - for (j = 0; j < adev->num_ip_blocks; j++) { - block = &adev->ip_blocks[j]; + block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]); - if (block->version->type != ip_order[i] || - !block->status.valid || - block->status.hw) - continue; + if (!block) + continue; + if (block->status.valid && !block->status.hw) { if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) { - r = amdgpu_ip_block_resume(&adev->ip_blocks[i]); - if (r) - return r; + r = amdgpu_ip_block_resume(block); } else { - r = block->version->funcs->hw_init(&adev->ip_blocks[i]); - if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } - block->status.hw = true; + r = block->version->funcs->hw_init(block); + } + + if (r) { + dev_err(adev->dev, "RE-INIT-late: %s failed\n", + block->version->funcs->name); + break; } + block->status.hw = true; } } - return 0; + return r; } /** From c3d06a3b6acd6b8c9595d677d049555f475703df Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 16 Nov 2024 09:22:14 -0500 Subject: [PATCH 185/366] Revert "drm/amd/pm: correct the workload setting" This reverts commit 74e1006430a5377228e49310f6d915628609929e. This causes a regression in the workload selection. A more extensive fix is being worked on. For now, revert. This came back after a merge in 6.13-rc1, so revert again. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3618 Fixes: 74e1006430a5 ("drm/amd/pm: correct the workload setting") Signed-off-by: Alex Deucher (cherry picked from commit 44f392fbf628a7ff2d8bb8e83ca1851261f81a6f) --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 49 ++++++------------- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 4 +- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 5 +- .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 5 +- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 5 +- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +- .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 4 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 20 ++------ .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 5 +- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 9 ++-- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 8 --- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 2 - 12 files changed, 36 insertions(+), 84 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index b8355293518f8..563824f42b1d1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1261,33 +1261,26 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) smu->watermarks_bitmap = 0; smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; - smu->user_dpm_profile.user_workload_mask = 0; atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); - smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; - smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; - smu->workload_priority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; - smu->workload_priority[PP_SMC_POWER_PROFILE_VIDEO] = 3; - smu->workload_priority[PP_SMC_POWER_PROFILE_VR] = 4; - smu->workload_priority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; - smu->workload_priority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; + smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; + smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; + smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; + smu->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3; + smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; + smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; + smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; if (smu->is_apu || - !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) { - smu->driver_workload_mask = - 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; - } else { - smu->driver_workload_mask = - 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; - smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; - } + !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) + smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; + else + smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; - smu->workload_mask = smu->driver_workload_mask | - smu->user_dpm_profile.user_workload_mask; smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING; @@ -2366,20 +2359,17 @@ static int smu_switch_power_profile(void *handle, return -EINVAL; if (!en) { - smu->driver_workload_mask &= ~(1 << smu->workload_priority[type]); + smu->workload_mask &= ~(1 << smu->workload_prority[type]); index = fls(smu->workload_mask); index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; } else { - smu->driver_workload_mask |= (1 << smu->workload_priority[type]); + smu->workload_mask |= (1 << smu->workload_prority[type]); index = fls(smu->workload_mask); index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; } - smu->workload_mask = smu->driver_workload_mask | - smu->user_dpm_profile.user_workload_mask; - if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) smu_bump_power_profile_mode(smu, workload, 0); @@ -3074,23 +3064,12 @@ static int smu_set_power_profile_mode(void *handle, uint32_t param_size) { struct smu_context *smu = handle; - int ret; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || !smu->ppt_funcs->set_power_profile_mode) return -EOPNOTSUPP; - if (smu->user_dpm_profile.user_workload_mask & - (1 << smu->workload_priority[param[param_size]])) - return 0; - - smu->user_dpm_profile.user_workload_mask = - (1 << smu->workload_priority[param[param_size]]); - smu->workload_mask = smu->user_dpm_profile.user_workload_mask | - smu->driver_workload_mask; - ret = smu_bump_power_profile_mode(smu, param, param_size); - - return ret; + return smu_bump_power_profile_mode(smu, param, param_size); } static int smu_get_fan_control_mode(void *handle, u32 *fan_mode) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index d665c47f19b77..4ebcc1e53ea2f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -240,7 +240,6 @@ struct smu_user_dpm_profile { /* user clock state information */ uint32_t clk_mask[SMU_CLK_COUNT]; uint32_t clk_dependency; - uint32_t user_workload_mask; }; #define SMU_TABLE_INIT(tables, table_id, s, a, d) \ @@ -558,8 +557,7 @@ struct smu_context { bool disable_uclk_switch; uint32_t workload_mask; - uint32_t driver_workload_mask; - uint32_t workload_priority[WORKLOAD_POLICY_MAX]; + uint32_t workload_prority[WORKLOAD_POLICY_MAX]; uint32_t workload_setting[WORKLOAD_POLICY_MAX]; uint32_t power_profile_mode; uint32_t default_power_profile_mode; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 12125303bb799..a15754b1989f4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1459,6 +1459,7 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, return -EINVAL; } + if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) && (smu->smc_fw_version >= 0x360d00)) { if (size != 10) @@ -1526,14 +1527,14 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - smu->workload_mask, + 1 << workload_type, NULL); if (ret) { dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type); return ret; } - smu_cmn_assign_power_profile(smu); + smu->power_profile_mode = profile_mode; return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 211635dabed85..faa8e7d9c3c62 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2083,13 +2083,10 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u smu->power_profile_mode); if (workload_type < 0) return -EINVAL; - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - smu->workload_mask, NULL); + 1 << workload_type, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); - else - smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index d0ed0d060a8a3..a9cb28ce21337 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1788,13 +1788,10 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * smu->power_profile_mode); if (workload_type < 0) return -EINVAL; - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - smu->workload_mask, NULL); + 1 << workload_type, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); - else - smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index f89c487dce723..cd3e9ba3eff44 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1081,7 +1081,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, } ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, - smu->workload_mask, + 1 << workload_type, NULL); if (ret) { dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", @@ -1089,7 +1089,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, return ret; } - smu_cmn_assign_power_profile(smu); + smu->power_profile_mode = profile_mode; return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 75a9ea87f419a..a34797f3576bf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -892,14 +892,14 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u } ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, - smu->workload_mask, + 1 << workload_type, NULL); if (ret) { dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type); return ret; } - smu_cmn_assign_power_profile(smu); + smu->power_profile_mode = profile_mode; return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 80c6b1e523aae..199bdd9720d36 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2579,7 +2579,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); int workload_type, ret = 0; - u32 workload_mask; + u32 workload_mask, selected_workload_mask; smu->power_profile_mode = input[size]; @@ -2646,7 +2646,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - workload_mask = 1 << workload_type; + selected_workload_mask = workload_mask = 1 << workload_type; /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && @@ -2661,22 +2661,12 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, workload_mask |= 1 << workload_type; } - smu->workload_mask |= workload_mask; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - smu->workload_mask, + workload_mask, NULL); - if (!ret) { - smu_cmn_assign_power_profile(smu); - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) { - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - PP_SMC_POWER_PROFILE_FULLSCREEN3D); - smu->power_profile_mode = smu->workload_mask & (1 << workload_type) - ? PP_SMC_POWER_PROFILE_FULLSCREEN3D - : PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; - } - } + if (!ret) + smu->workload_mask = selected_workload_mask; return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 4fd0354bd312f..34c1e0c7e1e49 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2595,14 +2595,13 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp smu->power_profile_mode); if (workload_type < 0) return -EINVAL; - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - smu->workload_mask, NULL); + 1 << workload_type, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); else - smu_cmn_assign_power_profile(smu); + smu->workload_mask = (1 << workload_type); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 687a0f5ac94f5..98e01a06add82 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1817,11 +1817,12 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - smu->workload_mask, NULL); - + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetWorkloadMask, + 1 << workload_type, + NULL); if (!ret) - smu_cmn_assign_power_profile(smu); + smu->workload_mask = 1 << workload_type; return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index dbbd3759bff37..0bc6fb7d7223a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1144,14 +1144,6 @@ int smu_cmn_set_mp1_state(struct smu_context *smu, return ret; } -void smu_cmn_assign_power_profile(struct smu_context *smu) -{ - uint32_t index; - index = fls(smu->workload_mask); - index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; - smu->power_profile_mode = smu->workload_setting[index]; -} - bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev) { struct pci_dev *p = NULL; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 8a801e389659d..1de685defe85b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -130,8 +130,6 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev); int smu_cmn_set_mp1_state(struct smu_context *smu, enum pp_mp1_state mp1_state); -void smu_cmn_assign_power_profile(struct smu_context *smu); - /* * Helper function to make sysfs_emit_at() happy. Align buf to * the current page boundary and record the offset. From 1443dd3c67f6d1a8bd1f810e598e2f0c6f19205c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 16 Nov 2024 08:20:59 -0500 Subject: [PATCH 186/366] drm/amd/pm: fix and simplify workload handling smu->workload_mask is IP specific and should not be messed with in the common code. The mask bits vary across SMU versions. Move all handling of smu->workload_mask in to the backends and simplify the code. Store the user's preference in smu->power_profile_mode which will be reflected in sysfs. For internal driver profile switches for KFD or VCN, just update the workload mask so that the user's preference is retained. Remove all of the extra now unused workload related elements in the smu structure. v2: use refcounts for workload profiles v3: rework based on feedback from Lijo v4: fix the refcount on failure, drop backend mask v5: rework custom handling v6: handle failure cleanup with custom profile v7: Update documentation Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: Kenneth Feng Cc: Lijo Lazar Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 6 +- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 150 ++++++++++------ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 15 +- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 166 +++++++++-------- .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 167 ++++++++++------- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 168 +++++++++++------- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 41 ++--- .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 43 ++--- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 167 +++++++++-------- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 138 ++++++++------ .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 168 +++++++++++------- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 25 +++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 4 + 13 files changed, 741 insertions(+), 517 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 136e8193867c3..e8ae7681bf0a3 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1361,7 +1361,11 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, * create a custom set of heuristics, write a string of numbers to the file * starting with the number of the custom profile along with a setting * for each heuristic parameter. Due to differences across asic families - * the heuristic parameters vary from family to family. + * the heuristic parameters vary from family to family. Additionally, + * you can apply the custom heuristics to different clock domains. Each + * clock domain is considered a distinct operation so if you modify the + * gfxclk heuristics and then the memclk heuristics, the all of the + * custom heuristics will be retained until you switch to another profile. * */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 563824f42b1d1..5eae14fe79f17 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -72,6 +72,10 @@ static int smu_set_power_limit(void *handle, uint32_t limit); static int smu_set_fan_speed_rpm(void *handle, uint32_t speed); static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled); static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state); +static void smu_power_profile_mode_get(struct smu_context *smu, + enum PP_SMC_POWER_PROFILE profile_mode); +static void smu_power_profile_mode_put(struct smu_context *smu, + enum PP_SMC_POWER_PROFILE profile_mode); static int smu_sys_get_pp_feature_mask(void *handle, char *buf) @@ -1259,35 +1263,19 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) INIT_WORK(&smu->interrupt_work, smu_interrupt_work_fn); atomic64_set(&smu->throttle_int_counter, 0); smu->watermarks_bitmap = 0; - smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; - smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); - smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; - smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; - smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; - smu->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3; - smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; - smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; - smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; - if (smu->is_apu || !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) - smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; + smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; else - smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; - - smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; - smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; - smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING; - smu->workload_setting[3] = PP_SMC_POWER_PROFILE_VIDEO; - smu->workload_setting[4] = PP_SMC_POWER_PROFILE_VR; - smu->workload_setting[5] = PP_SMC_POWER_PROFILE_COMPUTE; - smu->workload_setting[6] = PP_SMC_POWER_PROFILE_CUSTOM; + smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + smu_power_profile_mode_get(smu, smu->power_profile_mode); + smu->display_config = &adev->pm.pm_display_cfg; smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; @@ -1340,6 +1328,11 @@ static int smu_sw_fini(struct amdgpu_ip_block *ip_block) return ret; } + if (smu->custom_profile_params) { + kfree(smu->custom_profile_params); + smu->custom_profile_params = NULL; + } + smu_fini_microcode(smu); return 0; @@ -2124,6 +2117,9 @@ static int smu_suspend(struct amdgpu_ip_block *ip_block) if (!ret) adev->gfx.gfx_off_entrycount = count; + /* clear this on suspend so it will get reprogrammed on resume */ + smu->workload_mask = 0; + return 0; } @@ -2236,25 +2232,49 @@ static int smu_enable_umd_pstate(void *handle, } static int smu_bump_power_profile_mode(struct smu_context *smu, - long *param, - uint32_t param_size) + long *custom_params, + u32 custom_params_max_idx) { - int ret = 0; + u32 workload_mask = 0; + int i, ret = 0; + + for (i = 0; i < PP_SMC_POWER_PROFILE_COUNT; i++) { + if (smu->workload_refcount[i]) + workload_mask |= 1 << i; + } + + if (smu->workload_mask == workload_mask) + return 0; if (smu->ppt_funcs->set_power_profile_mode) - ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size); + ret = smu->ppt_funcs->set_power_profile_mode(smu, workload_mask, + custom_params, + custom_params_max_idx); + + if (!ret) + smu->workload_mask = workload_mask; return ret; } +static void smu_power_profile_mode_get(struct smu_context *smu, + enum PP_SMC_POWER_PROFILE profile_mode) +{ + smu->workload_refcount[profile_mode]++; +} + +static void smu_power_profile_mode_put(struct smu_context *smu, + enum PP_SMC_POWER_PROFILE profile_mode) +{ + if (smu->workload_refcount[profile_mode]) + smu->workload_refcount[profile_mode]--; +} + static int smu_adjust_power_state_dynamic(struct smu_context *smu, enum amd_dpm_forced_level level, - bool skip_display_settings, - bool init) + bool skip_display_settings) { int ret = 0; - int index = 0; - long workload[1]; struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); if (!skip_display_settings) { @@ -2291,14 +2311,8 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, } if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && - smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { - index = fls(smu->workload_mask); - index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; - workload[0] = smu->workload_setting[index]; - - if (init || smu->power_profile_mode != workload[0]) - smu_bump_power_profile_mode(smu, workload, 0); - } + smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) + smu_bump_power_profile_mode(smu, NULL, 0); return ret; } @@ -2317,13 +2331,13 @@ static int smu_handle_task(struct smu_context *smu, ret = smu_pre_display_config_changed(smu); if (ret) return ret; - ret = smu_adjust_power_state_dynamic(smu, level, false, false); + ret = smu_adjust_power_state_dynamic(smu, level, false); break; case AMD_PP_TASK_COMPLETE_INIT: - ret = smu_adjust_power_state_dynamic(smu, level, true, true); + ret = smu_adjust_power_state_dynamic(smu, level, true); break; case AMD_PP_TASK_READJUST_POWER_STATE: - ret = smu_adjust_power_state_dynamic(smu, level, true, false); + ret = smu_adjust_power_state_dynamic(smu, level, true); break; default: break; @@ -2345,12 +2359,11 @@ static int smu_handle_dpm_task(void *handle, static int smu_switch_power_profile(void *handle, enum PP_SMC_POWER_PROFILE type, - bool en) + bool enable) { struct smu_context *smu = handle; struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); - long workload[1]; - uint32_t index; + int ret; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; @@ -2358,21 +2371,21 @@ static int smu_switch_power_profile(void *handle, if (!(type < PP_SMC_POWER_PROFILE_CUSTOM)) return -EINVAL; - if (!en) { - smu->workload_mask &= ~(1 << smu->workload_prority[type]); - index = fls(smu->workload_mask); - index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; - workload[0] = smu->workload_setting[index]; - } else { - smu->workload_mask |= (1 << smu->workload_prority[type]); - index = fls(smu->workload_mask); - index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; - workload[0] = smu->workload_setting[index]; - } - if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && - smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) - smu_bump_power_profile_mode(smu, workload, 0); + smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { + if (enable) + smu_power_profile_mode_get(smu, type); + else + smu_power_profile_mode_put(smu, type); + ret = smu_bump_power_profile_mode(smu, NULL, 0); + if (ret) { + if (enable) + smu_power_profile_mode_put(smu, type); + else + smu_power_profile_mode_get(smu, type); + return ret; + } + } return 0; } @@ -3064,12 +3077,35 @@ static int smu_set_power_profile_mode(void *handle, uint32_t param_size) { struct smu_context *smu = handle; + bool custom = false; + int ret = 0; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || !smu->ppt_funcs->set_power_profile_mode) return -EOPNOTSUPP; - return smu_bump_power_profile_mode(smu, param, param_size); + if (param[param_size] == PP_SMC_POWER_PROFILE_CUSTOM) { + custom = true; + /* clear frontend mask so custom changes propogate */ + smu->workload_mask = 0; + } + + if ((param[param_size] != smu->power_profile_mode) || custom) { + /* clear the old user preference */ + smu_power_profile_mode_put(smu, smu->power_profile_mode); + /* set the new user preference */ + smu_power_profile_mode_get(smu, param[param_size]); + ret = smu_bump_power_profile_mode(smu, + custom ? param : NULL, + custom ? param_size : 0); + if (ret) + smu_power_profile_mode_put(smu, param[param_size]); + else + /* store the user's preference */ + smu->power_profile_mode = param[param_size]; + } + + return ret; } static int smu_get_fan_control_mode(void *handle, u32 *fan_mode) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 4ebcc1e53ea2f..3925815358ce9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -556,11 +556,13 @@ struct smu_context { uint32_t hard_min_uclk_req_from_dal; bool disable_uclk_switch; + /* asic agnostic workload mask */ uint32_t workload_mask; - uint32_t workload_prority[WORKLOAD_POLICY_MAX]; - uint32_t workload_setting[WORKLOAD_POLICY_MAX]; + /* default/user workload preference */ uint32_t power_profile_mode; - uint32_t default_power_profile_mode; + uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT]; + /* backend specific custom workload settings */ + long *custom_profile_params; bool pm_enabled; bool is_apu; @@ -731,9 +733,12 @@ struct pptable_funcs { * @set_power_profile_mode: Set a power profile mode. Also used to * create/set custom power profile modes. * &input: Power profile mode parameters. - * &size: Size of &input. + * &workload_mask: mask of workloads to enable + * &custom_params: custom profile parameters + * &custom_params_max_idx: max valid idx into custom_params */ - int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size); + int (*set_power_profile_mode)(struct smu_context *smu, u32 workload_mask, + long *custom_params, u32 custom_params_max_idx); /** * @dpm_set_vcn_enable: Enable/disable VCN engine dynamic power diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index a15754b1989f4..8aa61a9f77782 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1445,98 +1445,120 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, return size; } -static int arcturus_set_power_profile_mode(struct smu_context *smu, - long *input, - uint32_t size) +#define ARCTURUS_CUSTOM_PARAMS_COUNT 10 +#define ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT 2 +#define ARCTURUS_CUSTOM_PARAMS_SIZE (ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT * ARCTURUS_CUSTOM_PARAMS_COUNT * sizeof(long)) + +static int arcturus_set_power_profile_mode_coeff(struct smu_context *smu, + long *input) { DpmActivityMonitorCoeffInt_t activity_monitor; - int workload_type = 0; - uint32_t profile_mode = input[size]; - int ret = 0; + int ret, idx; - if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { - dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode); - return -EINVAL; + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor), + false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; } + idx = 0 * ARCTURUS_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Gfxclk */ + activity_monitor.Gfx_FPS = input[idx + 1]; + activity_monitor.Gfx_UseRlcBusy = input[idx + 2]; + activity_monitor.Gfx_MinActiveFreqType = input[idx + 3]; + activity_monitor.Gfx_MinActiveFreq = input[idx + 4]; + activity_monitor.Gfx_BoosterFreqType = input[idx + 5]; + activity_monitor.Gfx_BoosterFreq = input[idx + 6]; + activity_monitor.Gfx_PD_Data_limit_c = input[idx + 7]; + activity_monitor.Gfx_PD_Data_error_coeff = input[idx + 8]; + activity_monitor.Gfx_PD_Data_error_rate_coeff = input[idx + 9]; + } + idx = 1 * ARCTURUS_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Uclk */ + activity_monitor.Mem_FPS = input[idx + 1]; + activity_monitor.Mem_UseRlcBusy = input[idx + 2]; + activity_monitor.Mem_MinActiveFreqType = input[idx + 3]; + activity_monitor.Mem_MinActiveFreq = input[idx + 4]; + activity_monitor.Mem_BoosterFreqType = input[idx + 5]; + activity_monitor.Mem_BoosterFreq = input[idx + 6]; + activity_monitor.Mem_PD_Data_limit_c = input[idx + 7]; + activity_monitor.Mem_PD_Data_error_coeff = input[idx + 8]; + activity_monitor.Mem_PD_Data_error_rate_coeff = input[idx + 9]; + } - if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) && - (smu->smc_fw_version >= 0x360d00)) { - if (size != 10) - return -EINVAL; + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor), + true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; + } - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, - WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor), - false); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); - return ret; - } + return ret; +} - switch (input[0]) { - case 0: /* Gfxclk */ - activity_monitor.Gfx_FPS = input[1]; - activity_monitor.Gfx_UseRlcBusy = input[2]; - activity_monitor.Gfx_MinActiveFreqType = input[3]; - activity_monitor.Gfx_MinActiveFreq = input[4]; - activity_monitor.Gfx_BoosterFreqType = input[5]; - activity_monitor.Gfx_BoosterFreq = input[6]; - activity_monitor.Gfx_PD_Data_limit_c = input[7]; - activity_monitor.Gfx_PD_Data_error_coeff = input[8]; - activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9]; - break; - case 1: /* Uclk */ - activity_monitor.Mem_FPS = input[1]; - activity_monitor.Mem_UseRlcBusy = input[2]; - activity_monitor.Mem_MinActiveFreqType = input[3]; - activity_monitor.Mem_MinActiveFreq = input[4]; - activity_monitor.Mem_BoosterFreqType = input[5]; - activity_monitor.Mem_BoosterFreq = input[6]; - activity_monitor.Mem_PD_Data_limit_c = input[7]; - activity_monitor.Mem_PD_Data_error_coeff = input[8]; - activity_monitor.Mem_PD_Data_error_rate_coeff = input[9]; - break; - default: +static int arcturus_set_power_profile_mode(struct smu_context *smu, + u32 workload_mask, + long *custom_params, + u32 custom_params_max_idx) +{ + u32 backend_workload_mask = 0; + int ret, idx = -1, i; + + smu_cmn_get_backend_workload_mask(smu, workload_mask, + &backend_workload_mask); + + if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) { + if (smu->smc_fw_version < 0x360d00) return -EINVAL; + if (!smu->custom_profile_params) { + smu->custom_profile_params = + kzalloc(ARCTURUS_CUSTOM_PARAMS_SIZE, GFP_KERNEL); + if (!smu->custom_profile_params) + return -ENOMEM; } - - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, - WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor), - true); + if (custom_params && custom_params_max_idx) { + if (custom_params_max_idx != ARCTURUS_CUSTOM_PARAMS_COUNT) + return -EINVAL; + if (custom_params[0] >= ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT) + return -EINVAL; + idx = custom_params[0] * ARCTURUS_CUSTOM_PARAMS_COUNT; + smu->custom_profile_params[idx] = 1; + for (i = 1; i < custom_params_max_idx; i++) + smu->custom_profile_params[idx + i] = custom_params[i]; + } + ret = arcturus_set_power_profile_mode_coeff(smu, + smu->custom_profile_params); if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + if (idx != -1) + smu->custom_profile_params[idx] = 0; return ret; } - } - - /* - * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT - * Not all profile modes are supported on arcturus. - */ - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - profile_mode); - if (workload_type < 0) { - dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode); - return -EINVAL; + } else if (smu->custom_profile_params) { + memset(smu->custom_profile_params, 0, ARCTURUS_CUSTOM_PARAMS_SIZE); } ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetWorkloadMask, - 1 << workload_type, - NULL); + SMU_MSG_SetWorkloadMask, + backend_workload_mask, + NULL); if (ret) { - dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type); + dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n", + workload_mask); + if (idx != -1) + smu->custom_profile_params[idx] = 0; return ret; } - smu->power_profile_mode = profile_mode; - - return 0; + return ret; } static int arcturus_set_performance_level(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index faa8e7d9c3c62..7fad5dfb39c44 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2006,87 +2006,122 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) return size; } -static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) +#define NAVI10_CUSTOM_PARAMS_COUNT 10 +#define NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT 3 +#define NAVI10_CUSTOM_PARAMS_SIZE (NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT * NAVI10_CUSTOM_PARAMS_COUNT * sizeof(long)) + +static int navi10_set_power_profile_mode_coeff(struct smu_context *smu, + long *input) { DpmActivityMonitorCoeffInt_t activity_monitor; - int workload_type, ret = 0; + int ret, idx; - smu->power_profile_mode = input[size]; + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor), false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; + } - if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { - dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); - return -EINVAL; + idx = 0 * NAVI10_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Gfxclk */ + activity_monitor.Gfx_FPS = input[idx + 1]; + activity_monitor.Gfx_MinFreqStep = input[idx + 2]; + activity_monitor.Gfx_MinActiveFreqType = input[idx + 3]; + activity_monitor.Gfx_MinActiveFreq = input[idx + 4]; + activity_monitor.Gfx_BoosterFreqType = input[idx + 5]; + activity_monitor.Gfx_BoosterFreq = input[idx + 6]; + activity_monitor.Gfx_PD_Data_limit_c = input[idx + 7]; + activity_monitor.Gfx_PD_Data_error_coeff = input[idx + 8]; + activity_monitor.Gfx_PD_Data_error_rate_coeff = input[idx + 9]; + } + idx = 1 * NAVI10_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Socclk */ + activity_monitor.Soc_FPS = input[idx + 1]; + activity_monitor.Soc_MinFreqStep = input[idx + 2]; + activity_monitor.Soc_MinActiveFreqType = input[idx + 3]; + activity_monitor.Soc_MinActiveFreq = input[idx + 4]; + activity_monitor.Soc_BoosterFreqType = input[idx + 5]; + activity_monitor.Soc_BoosterFreq = input[idx + 6]; + activity_monitor.Soc_PD_Data_limit_c = input[idx + 7]; + activity_monitor.Soc_PD_Data_error_coeff = input[idx + 8]; + activity_monitor.Soc_PD_Data_error_rate_coeff = input[idx + 9]; + } + idx = 2 * NAVI10_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Memclk */ + activity_monitor.Mem_FPS = input[idx + 1]; + activity_monitor.Mem_MinFreqStep = input[idx + 2]; + activity_monitor.Mem_MinActiveFreqType = input[idx + 3]; + activity_monitor.Mem_MinActiveFreq = input[idx + 4]; + activity_monitor.Mem_BoosterFreqType = input[idx + 5]; + activity_monitor.Mem_BoosterFreq = input[idx + 6]; + activity_monitor.Mem_PD_Data_limit_c = input[idx + 7]; + activity_monitor.Mem_PD_Data_error_coeff = input[idx + 8]; + activity_monitor.Mem_PD_Data_error_rate_coeff = input[idx + 9]; + } + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor), true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; } - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { - if (size != 10) - return -EINVAL; + return ret; +} - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor), false); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); - return ret; - } +static int navi10_set_power_profile_mode(struct smu_context *smu, + u32 workload_mask, + long *custom_params, + u32 custom_params_max_idx) +{ + u32 backend_workload_mask = 0; + int ret, idx = -1, i; - switch (input[0]) { - case 0: /* Gfxclk */ - activity_monitor.Gfx_FPS = input[1]; - activity_monitor.Gfx_MinFreqStep = input[2]; - activity_monitor.Gfx_MinActiveFreqType = input[3]; - activity_monitor.Gfx_MinActiveFreq = input[4]; - activity_monitor.Gfx_BoosterFreqType = input[5]; - activity_monitor.Gfx_BoosterFreq = input[6]; - activity_monitor.Gfx_PD_Data_limit_c = input[7]; - activity_monitor.Gfx_PD_Data_error_coeff = input[8]; - activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9]; - break; - case 1: /* Socclk */ - activity_monitor.Soc_FPS = input[1]; - activity_monitor.Soc_MinFreqStep = input[2]; - activity_monitor.Soc_MinActiveFreqType = input[3]; - activity_monitor.Soc_MinActiveFreq = input[4]; - activity_monitor.Soc_BoosterFreqType = input[5]; - activity_monitor.Soc_BoosterFreq = input[6]; - activity_monitor.Soc_PD_Data_limit_c = input[7]; - activity_monitor.Soc_PD_Data_error_coeff = input[8]; - activity_monitor.Soc_PD_Data_error_rate_coeff = input[9]; - break; - case 2: /* Memclk */ - activity_monitor.Mem_FPS = input[1]; - activity_monitor.Mem_MinFreqStep = input[2]; - activity_monitor.Mem_MinActiveFreqType = input[3]; - activity_monitor.Mem_MinActiveFreq = input[4]; - activity_monitor.Mem_BoosterFreqType = input[5]; - activity_monitor.Mem_BoosterFreq = input[6]; - activity_monitor.Mem_PD_Data_limit_c = input[7]; - activity_monitor.Mem_PD_Data_error_coeff = input[8]; - activity_monitor.Mem_PD_Data_error_rate_coeff = input[9]; - break; - default: - return -EINVAL; - } + smu_cmn_get_backend_workload_mask(smu, workload_mask, + &backend_workload_mask); - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor), true); + if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) { + if (!smu->custom_profile_params) { + smu->custom_profile_params = kzalloc(NAVI10_CUSTOM_PARAMS_SIZE, GFP_KERNEL); + if (!smu->custom_profile_params) + return -ENOMEM; + } + if (custom_params && custom_params_max_idx) { + if (custom_params_max_idx != NAVI10_CUSTOM_PARAMS_COUNT) + return -EINVAL; + if (custom_params[0] >= NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT) + return -EINVAL; + idx = custom_params[0] * NAVI10_CUSTOM_PARAMS_COUNT; + smu->custom_profile_params[idx] = 1; + for (i = 1; i < custom_params_max_idx; i++) + smu->custom_profile_params[idx + i] = custom_params[i]; + } + ret = navi10_set_power_profile_mode_coeff(smu, + smu->custom_profile_params); if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + if (idx != -1) + smu->custom_profile_params[idx] = 0; return ret; } + } else if (smu->custom_profile_params) { + memset(smu->custom_profile_params, 0, NAVI10_CUSTOM_PARAMS_SIZE); } - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - smu->power_profile_mode); - if (workload_type < 0) - return -EINVAL; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); - if (ret) - dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + backend_workload_mask, NULL); + if (ret) { + dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n", + workload_mask); + if (idx != -1) + smu->custom_profile_params[idx] = 0; + return ret; + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index a9cb28ce21337..286777ada1dfc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1708,90 +1708,126 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * return size; } -static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) +#define SIENNA_CICHLID_CUSTOM_PARAMS_COUNT 10 +#define SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT 3 +#define SIENNA_CICHLID_CUSTOM_PARAMS_SIZE (SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT * sizeof(long)) + +static int sienna_cichlid_set_power_profile_mode_coeff(struct smu_context *smu, + long *input) { DpmActivityMonitorCoeffIntExternal_t activity_monitor_external; DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); - int workload_type, ret = 0; + int ret, idx; - smu->power_profile_mode = input[size]; + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor_external), false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; + } - if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { - dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); - return -EINVAL; + idx = 0 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Gfxclk */ + activity_monitor->Gfx_FPS = input[idx + 1]; + activity_monitor->Gfx_MinFreqStep = input[idx + 2]; + activity_monitor->Gfx_MinActiveFreqType = input[idx + 3]; + activity_monitor->Gfx_MinActiveFreq = input[idx + 4]; + activity_monitor->Gfx_BoosterFreqType = input[idx + 5]; + activity_monitor->Gfx_BoosterFreq = input[idx + 6]; + activity_monitor->Gfx_PD_Data_limit_c = input[idx + 7]; + activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 8]; + activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 9]; + } + idx = 1 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Socclk */ + activity_monitor->Fclk_FPS = input[idx + 1]; + activity_monitor->Fclk_MinFreqStep = input[idx + 2]; + activity_monitor->Fclk_MinActiveFreqType = input[idx + 3]; + activity_monitor->Fclk_MinActiveFreq = input[idx + 4]; + activity_monitor->Fclk_BoosterFreqType = input[idx + 5]; + activity_monitor->Fclk_BoosterFreq = input[idx + 6]; + activity_monitor->Fclk_PD_Data_limit_c = input[idx + 7]; + activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 8]; + activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 9]; + } + idx = 2 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Memclk */ + activity_monitor->Mem_FPS = input[idx + 1]; + activity_monitor->Mem_MinFreqStep = input[idx + 2]; + activity_monitor->Mem_MinActiveFreqType = input[idx + 3]; + activity_monitor->Mem_MinActiveFreq = input[idx + 4]; + activity_monitor->Mem_BoosterFreqType = input[idx + 5]; + activity_monitor->Mem_BoosterFreq = input[idx + 6]; + activity_monitor->Mem_PD_Data_limit_c = input[idx + 7]; + activity_monitor->Mem_PD_Data_error_coeff = input[idx + 8]; + activity_monitor->Mem_PD_Data_error_rate_coeff = input[idx + 9]; } - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { - if (size != 10) - return -EINVAL; + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor_external), true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; + } - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor_external), false); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); - return ret; - } + return ret; +} - switch (input[0]) { - case 0: /* Gfxclk */ - activity_monitor->Gfx_FPS = input[1]; - activity_monitor->Gfx_MinFreqStep = input[2]; - activity_monitor->Gfx_MinActiveFreqType = input[3]; - activity_monitor->Gfx_MinActiveFreq = input[4]; - activity_monitor->Gfx_BoosterFreqType = input[5]; - activity_monitor->Gfx_BoosterFreq = input[6]; - activity_monitor->Gfx_PD_Data_limit_c = input[7]; - activity_monitor->Gfx_PD_Data_error_coeff = input[8]; - activity_monitor->Gfx_PD_Data_error_rate_coeff = input[9]; - break; - case 1: /* Socclk */ - activity_monitor->Fclk_FPS = input[1]; - activity_monitor->Fclk_MinFreqStep = input[2]; - activity_monitor->Fclk_MinActiveFreqType = input[3]; - activity_monitor->Fclk_MinActiveFreq = input[4]; - activity_monitor->Fclk_BoosterFreqType = input[5]; - activity_monitor->Fclk_BoosterFreq = input[6]; - activity_monitor->Fclk_PD_Data_limit_c = input[7]; - activity_monitor->Fclk_PD_Data_error_coeff = input[8]; - activity_monitor->Fclk_PD_Data_error_rate_coeff = input[9]; - break; - case 2: /* Memclk */ - activity_monitor->Mem_FPS = input[1]; - activity_monitor->Mem_MinFreqStep = input[2]; - activity_monitor->Mem_MinActiveFreqType = input[3]; - activity_monitor->Mem_MinActiveFreq = input[4]; - activity_monitor->Mem_BoosterFreqType = input[5]; - activity_monitor->Mem_BoosterFreq = input[6]; - activity_monitor->Mem_PD_Data_limit_c = input[7]; - activity_monitor->Mem_PD_Data_error_coeff = input[8]; - activity_monitor->Mem_PD_Data_error_rate_coeff = input[9]; - break; - default: - return -EINVAL; - } +static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, + u32 workload_mask, + long *custom_params, + u32 custom_params_max_idx) +{ + u32 backend_workload_mask = 0; + int ret, idx = -1, i; - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor_external), true); + smu_cmn_get_backend_workload_mask(smu, workload_mask, + &backend_workload_mask); + + if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) { + if (!smu->custom_profile_params) { + smu->custom_profile_params = + kzalloc(SIENNA_CICHLID_CUSTOM_PARAMS_SIZE, GFP_KERNEL); + if (!smu->custom_profile_params) + return -ENOMEM; + } + if (custom_params && custom_params_max_idx) { + if (custom_params_max_idx != SIENNA_CICHLID_CUSTOM_PARAMS_COUNT) + return -EINVAL; + if (custom_params[0] >= SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT) + return -EINVAL; + idx = custom_params[0] * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT; + smu->custom_profile_params[idx] = 1; + for (i = 1; i < custom_params_max_idx; i++) + smu->custom_profile_params[idx + i] = custom_params[i]; + } + ret = sienna_cichlid_set_power_profile_mode_coeff(smu, + smu->custom_profile_params); if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + if (idx != -1) + smu->custom_profile_params[idx] = 0; return ret; } + } else if (smu->custom_profile_params) { + memset(smu->custom_profile_params, 0, SIENNA_CICHLID_CUSTOM_PARAMS_SIZE); } - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - smu->power_profile_mode); - if (workload_type < 0) - return -EINVAL; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); - if (ret) - dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + backend_workload_mask, NULL); + if (ret) { + dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n", + workload_mask); + if (idx != -1) + smu->custom_profile_params[idx] = 0; + return ret; + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index cd3e9ba3eff44..a55ea76d73996 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1056,42 +1056,27 @@ static int vangogh_get_power_profile_mode(struct smu_context *smu, return size; } -static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) +static int vangogh_set_power_profile_mode(struct smu_context *smu, + u32 workload_mask, + long *custom_params, + u32 custom_params_max_idx) { - int workload_type, ret; - uint32_t profile_mode = input[size]; + u32 backend_workload_mask = 0; + int ret; - if (profile_mode >= PP_SMC_POWER_PROFILE_COUNT) { - dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode); - return -EINVAL; - } - - if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT || - profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) - return 0; - - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - profile_mode); - if (workload_type < 0) { - dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n", - profile_mode); - return -EINVAL; - } + smu_cmn_get_backend_workload_mask(smu, workload_mask, + &backend_workload_mask); ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, - 1 << workload_type, - NULL); + backend_workload_mask, + NULL); if (ret) { - dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", - workload_type); + dev_err_once(smu->adev->dev, "Fail to set workload mask 0x%08x\n", + workload_mask); return ret; } - smu->power_profile_mode = profile_mode; - - return 0; + return ret; } static int vangogh_set_soft_freq_limited_range(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index a34797f3576bf..37d82a71a2d7c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -864,44 +864,27 @@ static int renoir_force_clk_levels(struct smu_context *smu, return ret; } -static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) +static int renoir_set_power_profile_mode(struct smu_context *smu, + u32 workload_mask, + long *custom_params, + u32 custom_params_max_idx) { - int workload_type, ret; - uint32_t profile_mode = input[size]; + int ret; + u32 backend_workload_mask = 0; - if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { - dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode); - return -EINVAL; - } - - if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT || - profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) - return 0; - - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - profile_mode); - if (workload_type < 0) { - /* - * TODO: If some case need switch to powersave/default power mode - * then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving. - */ - dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode); - return -EINVAL; - } + smu_cmn_get_backend_workload_mask(smu, workload_mask, + &backend_workload_mask); ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, - 1 << workload_type, - NULL); + backend_workload_mask, + NULL); if (ret) { - dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type); + dev_err_once(smu->adev->dev, "Failed to set workload mask 0x08%x\n", + workload_mask); return ret; } - smu->power_profile_mode = profile_mode; - - return 0; + return ret; } static int renoir_set_peak_clock_by_device(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 199bdd9720d36..3aa705aae4c01 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2571,82 +2571,76 @@ static int smu_v13_0_0_get_power_profile_mode(struct smu_context *smu, return size; } -static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, - long *input, - uint32_t size) +#define SMU_13_0_0_CUSTOM_PARAMS_COUNT 9 +#define SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT 2 +#define SMU_13_0_0_CUSTOM_PARAMS_SIZE (SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT * SMU_13_0_0_CUSTOM_PARAMS_COUNT * sizeof(long)) + +static int smu_v13_0_0_set_power_profile_mode_coeff(struct smu_context *smu, + long *input) { DpmActivityMonitorCoeffIntExternal_t activity_monitor_external; DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); - int workload_type, ret = 0; - u32 workload_mask, selected_workload_mask; - - smu->power_profile_mode = input[size]; + int ret, idx; - if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) { - dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); - return -EINVAL; + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor_external), + false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; } - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { - if (size != 9) - return -EINVAL; - - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, - WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor_external), - false); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); - return ret; - } - - switch (input[0]) { - case 0: /* Gfxclk */ - activity_monitor->Gfx_FPS = input[1]; - activity_monitor->Gfx_MinActiveFreqType = input[2]; - activity_monitor->Gfx_MinActiveFreq = input[3]; - activity_monitor->Gfx_BoosterFreqType = input[4]; - activity_monitor->Gfx_BoosterFreq = input[5]; - activity_monitor->Gfx_PD_Data_limit_c = input[6]; - activity_monitor->Gfx_PD_Data_error_coeff = input[7]; - activity_monitor->Gfx_PD_Data_error_rate_coeff = input[8]; - break; - case 1: /* Fclk */ - activity_monitor->Fclk_FPS = input[1]; - activity_monitor->Fclk_MinActiveFreqType = input[2]; - activity_monitor->Fclk_MinActiveFreq = input[3]; - activity_monitor->Fclk_BoosterFreqType = input[4]; - activity_monitor->Fclk_BoosterFreq = input[5]; - activity_monitor->Fclk_PD_Data_limit_c = input[6]; - activity_monitor->Fclk_PD_Data_error_coeff = input[7]; - activity_monitor->Fclk_PD_Data_error_rate_coeff = input[8]; - break; - default: - return -EINVAL; - } + idx = 0 * SMU_13_0_0_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Gfxclk */ + activity_monitor->Gfx_FPS = input[idx + 1]; + activity_monitor->Gfx_MinActiveFreqType = input[idx + 2]; + activity_monitor->Gfx_MinActiveFreq = input[idx + 3]; + activity_monitor->Gfx_BoosterFreqType = input[idx + 4]; + activity_monitor->Gfx_BoosterFreq = input[idx + 5]; + activity_monitor->Gfx_PD_Data_limit_c = input[idx + 6]; + activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 7]; + activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 8]; + } + idx = 1 * SMU_13_0_0_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Fclk */ + activity_monitor->Fclk_FPS = input[idx + 1]; + activity_monitor->Fclk_MinActiveFreqType = input[idx + 2]; + activity_monitor->Fclk_MinActiveFreq = input[idx + 3]; + activity_monitor->Fclk_BoosterFreqType = input[idx + 4]; + activity_monitor->Fclk_BoosterFreq = input[idx + 5]; + activity_monitor->Fclk_PD_Data_limit_c = input[idx + 6]; + activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 7]; + activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 8]; + } - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, - WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor_external), - true); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); - return ret; - } + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor_external), + true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; } - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - smu->power_profile_mode); + return ret; +} - if (workload_type < 0) - return -EINVAL; +static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, + u32 workload_mask, + long *custom_params, + u32 custom_params_max_idx) +{ + u32 backend_workload_mask = 0; + int workload_type, ret, idx = -1, i; - selected_workload_mask = workload_mask = 1 << workload_type; + smu_cmn_get_backend_workload_mask(smu, workload_mask, + &backend_workload_mask); /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && @@ -2658,15 +2652,48 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, CMN2ASIC_MAPPING_WORKLOAD, PP_SMC_POWER_PROFILE_POWERSAVING); if (workload_type >= 0) - workload_mask |= 1 << workload_type; + backend_workload_mask |= 1 << workload_type; + } + + if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) { + if (!smu->custom_profile_params) { + smu->custom_profile_params = + kzalloc(SMU_13_0_0_CUSTOM_PARAMS_SIZE, GFP_KERNEL); + if (!smu->custom_profile_params) + return -ENOMEM; + } + if (custom_params && custom_params_max_idx) { + if (custom_params_max_idx != SMU_13_0_0_CUSTOM_PARAMS_COUNT) + return -EINVAL; + if (custom_params[0] >= SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT) + return -EINVAL; + idx = custom_params[0] * SMU_13_0_0_CUSTOM_PARAMS_COUNT; + smu->custom_profile_params[idx] = 1; + for (i = 1; i < custom_params_max_idx; i++) + smu->custom_profile_params[idx + i] = custom_params[i]; + } + ret = smu_v13_0_0_set_power_profile_mode_coeff(smu, + smu->custom_profile_params); + if (ret) { + if (idx != -1) + smu->custom_profile_params[idx] = 0; + return ret; + } + } else if (smu->custom_profile_params) { + memset(smu->custom_profile_params, 0, SMU_13_0_0_CUSTOM_PARAMS_SIZE); } ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetWorkloadMask, - workload_mask, - NULL); - if (!ret) - smu->workload_mask = selected_workload_mask; + SMU_MSG_SetWorkloadMask, + backend_workload_mask, + NULL); + if (ret) { + dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n", + workload_mask); + if (idx != -1) + smu->custom_profile_params[idx] = 0; + return ret; + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 34c1e0c7e1e49..f4ac403b8b360 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2530,78 +2530,110 @@ do { \ return result; } -static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) +#define SMU_13_0_7_CUSTOM_PARAMS_COUNT 8 +#define SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT 2 +#define SMU_13_0_7_CUSTOM_PARAMS_SIZE (SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT * SMU_13_0_7_CUSTOM_PARAMS_COUNT * sizeof(long)) + +static int smu_v13_0_7_set_power_profile_mode_coeff(struct smu_context *smu, + long *input) { DpmActivityMonitorCoeffIntExternal_t activity_monitor_external; DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); - int workload_type, ret = 0; + int ret, idx; - smu->power_profile_mode = input[size]; + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor_external), false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; + } - if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_WINDOW3D) { - dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); - return -EINVAL; + idx = 0 * SMU_13_0_7_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Gfxclk */ + activity_monitor->Gfx_ActiveHystLimit = input[idx + 1]; + activity_monitor->Gfx_IdleHystLimit = input[idx + 2]; + activity_monitor->Gfx_FPS = input[idx + 3]; + activity_monitor->Gfx_MinActiveFreqType = input[idx + 4]; + activity_monitor->Gfx_BoosterFreqType = input[idx + 5]; + activity_monitor->Gfx_MinActiveFreq = input[idx + 6]; + activity_monitor->Gfx_BoosterFreq = input[idx + 7]; + } + idx = 1 * SMU_13_0_7_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Fclk */ + activity_monitor->Fclk_ActiveHystLimit = input[idx + 1]; + activity_monitor->Fclk_IdleHystLimit = input[idx + 2]; + activity_monitor->Fclk_FPS = input[idx + 3]; + activity_monitor->Fclk_MinActiveFreqType = input[idx + 4]; + activity_monitor->Fclk_BoosterFreqType = input[idx + 5]; + activity_monitor->Fclk_MinActiveFreq = input[idx + 6]; + activity_monitor->Fclk_BoosterFreq = input[idx + 7]; } - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { - if (size != 8) - return -EINVAL; + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor_external), true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; + } - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor_external), false); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); - return ret; - } + return ret; +} - switch (input[0]) { - case 0: /* Gfxclk */ - activity_monitor->Gfx_ActiveHystLimit = input[1]; - activity_monitor->Gfx_IdleHystLimit = input[2]; - activity_monitor->Gfx_FPS = input[3]; - activity_monitor->Gfx_MinActiveFreqType = input[4]; - activity_monitor->Gfx_BoosterFreqType = input[5]; - activity_monitor->Gfx_MinActiveFreq = input[6]; - activity_monitor->Gfx_BoosterFreq = input[7]; - break; - case 1: /* Fclk */ - activity_monitor->Fclk_ActiveHystLimit = input[1]; - activity_monitor->Fclk_IdleHystLimit = input[2]; - activity_monitor->Fclk_FPS = input[3]; - activity_monitor->Fclk_MinActiveFreqType = input[4]; - activity_monitor->Fclk_BoosterFreqType = input[5]; - activity_monitor->Fclk_MinActiveFreq = input[6]; - activity_monitor->Fclk_BoosterFreq = input[7]; - break; - default: - return -EINVAL; +static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, + u32 workload_mask, + long *custom_params, + u32 custom_params_max_idx) +{ + u32 backend_workload_mask = 0; + int ret, idx = -1, i; + + smu_cmn_get_backend_workload_mask(smu, workload_mask, + &backend_workload_mask); + + if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) { + if (!smu->custom_profile_params) { + smu->custom_profile_params = + kzalloc(SMU_13_0_7_CUSTOM_PARAMS_SIZE, GFP_KERNEL); + if (!smu->custom_profile_params) + return -ENOMEM; } - - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor_external), true); + if (custom_params && custom_params_max_idx) { + if (custom_params_max_idx != SMU_13_0_7_CUSTOM_PARAMS_COUNT) + return -EINVAL; + if (custom_params[0] >= SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT) + return -EINVAL; + idx = custom_params[0] * SMU_13_0_7_CUSTOM_PARAMS_COUNT; + smu->custom_profile_params[idx] = 1; + for (i = 1; i < custom_params_max_idx; i++) + smu->custom_profile_params[idx + i] = custom_params[i]; + } + ret = smu_v13_0_7_set_power_profile_mode_coeff(smu, + smu->custom_profile_params); if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + if (idx != -1) + smu->custom_profile_params[idx] = 0; return ret; } + } else if (smu->custom_profile_params) { + memset(smu->custom_profile_params, 0, SMU_13_0_7_CUSTOM_PARAMS_SIZE); } - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - smu->power_profile_mode); - if (workload_type < 0) - return -EINVAL; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); + backend_workload_mask, NULL); - if (ret) - dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); - else - smu->workload_mask = (1 << workload_type); + if (ret) { + dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n", + workload_mask); + if (idx != -1) + smu->custom_profile_params[idx] = 0; + return ret; + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 98e01a06add82..6a565ce74d5b3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1739,90 +1739,120 @@ static int smu_v14_0_2_get_power_profile_mode(struct smu_context *smu, return size; } -static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, - long *input, - uint32_t size) +#define SMU_14_0_2_CUSTOM_PARAMS_COUNT 9 +#define SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT 2 +#define SMU_14_0_2_CUSTOM_PARAMS_SIZE (SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT * SMU_14_0_2_CUSTOM_PARAMS_COUNT * sizeof(long)) + +static int smu_v14_0_2_set_power_profile_mode_coeff(struct smu_context *smu, + long *input) { DpmActivityMonitorCoeffIntExternal_t activity_monitor_external; DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); - int workload_type, ret = 0; - uint32_t current_profile_mode = smu->power_profile_mode; - smu->power_profile_mode = input[size]; + int ret, idx; - if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) { - dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); - return -EINVAL; + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor_external), + false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; } - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { - if (size != 9) - return -EINVAL; + idx = 0 * SMU_14_0_2_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Gfxclk */ + activity_monitor->Gfx_FPS = input[idx + 1]; + activity_monitor->Gfx_MinActiveFreqType = input[idx + 2]; + activity_monitor->Gfx_MinActiveFreq = input[idx + 3]; + activity_monitor->Gfx_BoosterFreqType = input[idx + 4]; + activity_monitor->Gfx_BoosterFreq = input[idx + 5]; + activity_monitor->Gfx_PD_Data_limit_c = input[idx + 6]; + activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 7]; + activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 8]; + } + idx = 1 * SMU_14_0_2_CUSTOM_PARAMS_COUNT; + if (input[idx]) { + /* Fclk */ + activity_monitor->Fclk_FPS = input[idx + 1]; + activity_monitor->Fclk_MinActiveFreqType = input[idx + 2]; + activity_monitor->Fclk_MinActiveFreq = input[idx + 3]; + activity_monitor->Fclk_BoosterFreqType = input[idx + 4]; + activity_monitor->Fclk_BoosterFreq = input[idx + 5]; + activity_monitor->Fclk_PD_Data_limit_c = input[idx + 6]; + activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 7]; + activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 8]; + } - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, - WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor_external), - false); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); - return ret; - } + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor_external), + true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; + } - switch (input[0]) { - case 0: /* Gfxclk */ - activity_monitor->Gfx_FPS = input[1]; - activity_monitor->Gfx_MinActiveFreqType = input[2]; - activity_monitor->Gfx_MinActiveFreq = input[3]; - activity_monitor->Gfx_BoosterFreqType = input[4]; - activity_monitor->Gfx_BoosterFreq = input[5]; - activity_monitor->Gfx_PD_Data_limit_c = input[6]; - activity_monitor->Gfx_PD_Data_error_coeff = input[7]; - activity_monitor->Gfx_PD_Data_error_rate_coeff = input[8]; - break; - case 1: /* Fclk */ - activity_monitor->Fclk_FPS = input[1]; - activity_monitor->Fclk_MinActiveFreqType = input[2]; - activity_monitor->Fclk_MinActiveFreq = input[3]; - activity_monitor->Fclk_BoosterFreqType = input[4]; - activity_monitor->Fclk_BoosterFreq = input[5]; - activity_monitor->Fclk_PD_Data_limit_c = input[6]; - activity_monitor->Fclk_PD_Data_error_coeff = input[7]; - activity_monitor->Fclk_PD_Data_error_rate_coeff = input[8]; - break; - default: - return -EINVAL; - } + return ret; +} - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, - WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor_external), - true); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); - return ret; - } - } +static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, + u32 workload_mask, + long *custom_params, + u32 custom_params_max_idx) +{ + u32 backend_workload_mask = 0; + int ret, idx = -1, i; + + smu_cmn_get_backend_workload_mask(smu, workload_mask, + &backend_workload_mask); - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) + /* disable deep sleep if compute is enabled */ + if (workload_mask & (1 << PP_SMC_POWER_PROFILE_COMPUTE)) smu_v14_0_deep_sleep_control(smu, false); - else if (current_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) + else smu_v14_0_deep_sleep_control(smu, true); - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - smu->power_profile_mode); - if (workload_type < 0) - return -EINVAL; + if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) { + if (!smu->custom_profile_params) { + smu->custom_profile_params = + kzalloc(SMU_14_0_2_CUSTOM_PARAMS_SIZE, GFP_KERNEL); + if (!smu->custom_profile_params) + return -ENOMEM; + } + if (custom_params && custom_params_max_idx) { + if (custom_params_max_idx != SMU_14_0_2_CUSTOM_PARAMS_COUNT) + return -EINVAL; + if (custom_params[0] >= SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT) + return -EINVAL; + idx = custom_params[0] * SMU_14_0_2_CUSTOM_PARAMS_COUNT; + smu->custom_profile_params[idx] = 1; + for (i = 1; i < custom_params_max_idx; i++) + smu->custom_profile_params[idx + i] = custom_params[i]; + } + ret = smu_v14_0_2_set_power_profile_mode_coeff(smu, + smu->custom_profile_params); + if (ret) { + if (idx != -1) + smu->custom_profile_params[idx] = 0; + return ret; + } + } else if (smu->custom_profile_params) { + memset(smu->custom_profile_params, 0, SMU_14_0_2_CUSTOM_PARAMS_SIZE); + } - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetWorkloadMask, - 1 << workload_type, - NULL); - if (!ret) - smu->workload_mask = 1 << workload_type; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, + backend_workload_mask, NULL); + if (ret) { + dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n", + workload_mask); + if (idx != -1) + smu->custom_profile_params[idx] = 0; + return ret; + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 0bc6fb7d7223a..9f55207ea9bc3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1221,3 +1221,28 @@ void smu_cmn_generic_plpd_policy_desc(struct smu_dpm_policy *policy) { policy->desc = &xgmi_plpd_policy_desc; } + +void smu_cmn_get_backend_workload_mask(struct smu_context *smu, + u32 workload_mask, + u32 *backend_workload_mask) +{ + int workload_type; + u32 profile_mode; + + *backend_workload_mask = 0; + + for (profile_mode = 0; profile_mode < PP_SMC_POWER_PROFILE_COUNT; profile_mode++) { + if (!(workload_mask & (1 << profile_mode))) + continue; + + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + profile_mode); + + if (workload_type < 0) + continue; + + *backend_workload_mask |= 1 << workload_type; + } +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 1de685defe85b..a020277dec3e9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -147,5 +147,9 @@ bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev); void smu_cmn_generic_soc_policy_desc(struct smu_dpm_policy *policy); void smu_cmn_generic_plpd_policy_desc(struct smu_dpm_policy *policy); +void smu_cmn_get_backend_workload_mask(struct smu_context *smu, + u32 workload_mask, + u32 *backend_workload_mask); + #endif #endif From c889aa2e7c2f0040484182d6cbbc0837b193a93f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 30 Nov 2024 13:41:00 -0800 Subject: [PATCH 187/366] MAINTAINERS: list PTP drivers under networking PTP patches go via the netdev trees, add drivers/ptp/ to the networking entry so that get_maintainer.pl --scm lists those trees above Linus's tree. Thanks to the real entry using drivers/ptp/* the original entry will still be considered more specific / higher prio. Acked-by: Richard Cochran Link: https://patch.msgid.link/20241130214100.125325-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0456a33ef6579..f9f2f009aecfc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16194,6 +16194,7 @@ F: Documentation/devicetree/bindings/net/ F: Documentation/networking/net_cachelines/net_device.rst F: drivers/connector/ F: drivers/net/ +F: drivers/ptp/ F: include/dt-bindings/net/ F: include/linux/cn_proc.h F: include/linux/etherdevice.h From 12659d28615d606b36e382f4de2dd05550d202af Mon Sep 17 00:00:00 2001 From: Tao Lyu Date: Mon, 2 Dec 2024 16:02:37 -0800 Subject: [PATCH 188/366] bpf: Ensure reg is PTR_TO_STACK in process_iter_arg Currently, KF_ARG_PTR_TO_ITER handling missed checking the reg->type and ensuring it is PTR_TO_STACK. Instead of enforcing this in the caller of process_iter_arg, move the check into it instead so that all callers will gain the check by default. This is similar to process_dynptr_func. An existing selftest in verifier_bits_iter.c fails due to this change, but it's because it was passing a NULL pointer into iter_next helper and getting an error further down the checks, but probably meant to pass an uninitialized iterator on the stack (as is done in the subsequent test below it). We will gain coverage for non-PTR_TO_STACK arguments in later patches hence just change the declaration to zero-ed stack object. Fixes: 06accc8779c1 ("bpf: add support for open-coded iterator loops") Suggested-by: Andrii Nakryiko Signed-off-by: Tao Lyu [ Kartikeya: move check into process_iter_arg, rewrite commit log ] Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241203000238.3602922-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 5 +++++ tools/testing/selftests/bpf/progs/verifier_bits_iter.c | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1c4ebb326785d..358a3566bb60a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8189,6 +8189,11 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id const struct btf_type *t; int spi, err, i, nr_slots, btf_id; + if (reg->type != PTR_TO_STACK) { + verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1); + return -EINVAL; + } + /* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs() * ensures struct convention, so we wouldn't need to do any BTF * validation here. But given iter state can be passed as a parameter diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c index 7c881bca9af5c..a7a6ae6c162fe 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c +++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c @@ -35,9 +35,9 @@ __description("uninitialized iter in ->next()") __failure __msg("expected an initialized iter_bits as arg #1") int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) { - struct bpf_iter_bits *it = NULL; + struct bpf_iter_bits it = {}; - bpf_iter_bits_next(it); + bpf_iter_bits_next(&it); return 0; } From 7f71197001e35f46aca97204986edf9301f8dd09 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 2 Dec 2024 16:02:38 -0800 Subject: [PATCH 189/366] selftests/bpf: Add tests for iter arg check Add selftests to cover argument type check for iterator kfuncs, and cover all three kinds (new, next, destroy). Without the fix in the previous patch, the selftest would not cause a verifier error. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241203000238.3602922-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/iters.c | 26 +++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index ef70b88bccb25..7c969c1275737 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1486,4 +1486,30 @@ int iter_subprog_check_stacksafe(const void *ctx) return 0; } +struct bpf_iter_num global_it; + +SEC("raw_tp") +__failure __msg("arg#0 expected pointer to an iterator on stack") +int iter_new_bad_arg(const void *ctx) +{ + bpf_iter_num_new(&global_it, 0, 1); + return 0; +} + +SEC("raw_tp") +__failure __msg("arg#0 expected pointer to an iterator on stack") +int iter_next_bad_arg(const void *ctx) +{ + bpf_iter_num_next(&global_it); + return 0; +} + +SEC("raw_tp") +__failure __msg("arg#0 expected pointer to an iterator on stack") +int iter_destroy_bad_arg(const void *ctx) +{ + bpf_iter_num_destroy(&global_it); + return 0; +} + char _license[] SEC("license") = "GPL"; From bd74e238ae6944b462f57ce8752440a011ba4530 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 2 Dec 2024 16:22:35 -0800 Subject: [PATCH 190/366] bpf: Zero index arg error string for dynptr and iter Andrii spotted that process_dynptr_func's rejection of incorrect argument register type will print an error string where argument numbers are not zero-indexed, unlike elsewhere in the verifier. Fix this by subtracting 1 from regno. The same scenario exists for iterator messages. Fix selftest error strings that match on the exact argument number while we're at it to ensure clean bisection. Suggested-by: Andrii Nakryiko Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241203002235.3776418-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 12 +++++----- .../testing/selftests/bpf/progs/dynptr_fail.c | 22 +++++++++---------- .../selftests/bpf/progs/iters_state_safety.c | 14 ++++++------ .../selftests/bpf/progs/iters_testmod_seq.c | 4 ++-- .../bpf/progs/test_kfunc_dynptr_param.c | 2 +- .../selftests/bpf/progs/verifier_bits_iter.c | 4 ++-- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 358a3566bb60a..2fd35465d650a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8071,7 +8071,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) { verbose(env, "arg#%d expected pointer to stack or const struct bpf_dynptr\n", - regno); + regno - 1); return -EINVAL; } @@ -8125,7 +8125,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn if (!is_dynptr_reg_valid_init(env, reg)) { verbose(env, "Expected an initialized dynptr as arg #%d\n", - regno); + regno - 1); return -EINVAL; } @@ -8133,7 +8133,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) { verbose(env, "Expected a dynptr of type %s as arg #%d\n", - dynptr_type_str(arg_to_dynptr_type(arg_type)), regno); + dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1); return -EINVAL; } @@ -8202,7 +8202,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id */ btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1); if (btf_id < 0) { - verbose(env, "expected valid iter pointer as arg #%d\n", regno); + verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1); return -EINVAL; } t = btf_type_by_id(meta->btf, btf_id); @@ -8212,7 +8212,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id /* bpf_iter__new() expects pointer to uninit iter state */ if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) { verbose(env, "expected uninitialized iter_%s as arg #%d\n", - iter_type_str(meta->btf, btf_id), regno); + iter_type_str(meta->btf, btf_id), regno - 1); return -EINVAL; } @@ -8236,7 +8236,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id break; case -EINVAL: verbose(env, "expected an initialized iter_%s as arg #%d\n", - iter_type_str(meta->btf, btf_id), regno); + iter_type_str(meta->btf, btf_id), regno - 1); return err; case -EPROTO: verbose(env, "expected an RCU CS when using %s\n", meta->func_name); diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 8f36c9de75915..dfd817d0348c4 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -149,7 +149,7 @@ int ringbuf_release_uninit_dynptr(void *ctx) /* A dynptr can't be used after it has been invalidated */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int use_after_invalid(void *ctx) { struct bpf_dynptr ptr; @@ -428,7 +428,7 @@ int invalid_helper2(void *ctx) /* A bpf_dynptr is invalidated if it's been written into */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int invalid_write1(void *ctx) { struct bpf_dynptr ptr; @@ -1407,7 +1407,7 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) /* bpf_dynptr_adjust can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_adjust_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1420,7 +1420,7 @@ int dynptr_adjust_invalid(void *ctx) /* bpf_dynptr_is_null can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_is_null_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1433,7 +1433,7 @@ int dynptr_is_null_invalid(void *ctx) /* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_is_rdonly_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1446,7 +1446,7 @@ int dynptr_is_rdonly_invalid(void *ctx) /* bpf_dynptr_size can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_size_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1459,7 +1459,7 @@ int dynptr_size_invalid(void *ctx) /* Only initialized dynptrs can be cloned */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int clone_invalid1(void *ctx) { struct bpf_dynptr ptr1 = {}; @@ -1493,7 +1493,7 @@ int clone_invalid2(struct xdp_md *xdp) /* Invalidating a dynptr should invalidate its clones */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int clone_invalidate1(void *ctx) { struct bpf_dynptr clone; @@ -1514,7 +1514,7 @@ int clone_invalidate1(void *ctx) /* Invalidating a dynptr should invalidate its parent */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int clone_invalidate2(void *ctx) { struct bpf_dynptr ptr; @@ -1535,7 +1535,7 @@ int clone_invalidate2(void *ctx) /* Invalidating a dynptr should invalidate its siblings */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int clone_invalidate3(void *ctx) { struct bpf_dynptr ptr; @@ -1723,7 +1723,7 @@ __noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr) } SEC("?raw_tp") -__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr") +__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr") int test_dynptr_reg_type(void *ctx) { struct task_struct *current = NULL; diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c index d47e59aba6de3..f41257eadbb25 100644 --- a/tools/testing/selftests/bpf/progs/iters_state_safety.c +++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c @@ -73,7 +73,7 @@ int create_and_forget_to_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int destroy_without_creating_fail(void *ctx) { /* init with zeros to stop verifier complaining about uninit stack */ @@ -91,7 +91,7 @@ int destroy_without_creating_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int compromise_iter_w_direct_write_fail(void *ctx) { struct bpf_iter_num iter; @@ -143,7 +143,7 @@ int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int compromise_iter_w_helper_write_fail(void *ctx) { struct bpf_iter_num iter; @@ -230,7 +230,7 @@ int valid_stack_reuse(void *ctx) } SEC("?raw_tp") -__failure __msg("expected uninitialized iter_num as arg #1") +__failure __msg("expected uninitialized iter_num as arg #0") int double_create_fail(void *ctx) { struct bpf_iter_num iter; @@ -258,7 +258,7 @@ int double_create_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int double_destroy_fail(void *ctx) { struct bpf_iter_num iter; @@ -284,7 +284,7 @@ int double_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int next_without_new_fail(void *ctx) { struct bpf_iter_num iter; @@ -305,7 +305,7 @@ int next_without_new_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int next_after_destroy_fail(void *ctx) { struct bpf_iter_num iter; diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c index 4a176e6aede89..6543d5b6e0a97 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c @@ -79,7 +79,7 @@ int testmod_seq_truncated(const void *ctx) SEC("?raw_tp") __failure -__msg("expected an initialized iter_testmod_seq as arg #2") +__msg("expected an initialized iter_testmod_seq as arg #1") int testmod_seq_getter_before_bad(const void *ctx) { struct bpf_iter_testmod_seq it; @@ -89,7 +89,7 @@ int testmod_seq_getter_before_bad(const void *ctx) SEC("?raw_tp") __failure -__msg("expected an initialized iter_testmod_seq as arg #2") +__msg("expected an initialized iter_testmod_seq as arg #1") int testmod_seq_getter_after_bad(const void *ctx) { struct bpf_iter_testmod_seq it; diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index e68667aec6a65..cd4d752bd089c 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -45,7 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) } SEC("?lsm.s/bpf") -__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr") +__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr") int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) { unsigned long val = 0; diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c index a7a6ae6c162fe..8bcddadfc4dae 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c +++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c @@ -32,7 +32,7 @@ int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp) SEC("iter/cgroup") __description("uninitialized iter in ->next()") -__failure __msg("expected an initialized iter_bits as arg #1") +__failure __msg("expected an initialized iter_bits as arg #0") int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) { struct bpf_iter_bits it = {}; @@ -43,7 +43,7 @@ int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) SEC("iter/cgroup") __description("uninitialized iter in ->destroy()") -__failure __msg("expected an initialized iter_bits as arg #1") +__failure __msg("expected an initialized iter_bits as arg #0") int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) { struct bpf_iter_bits it = {}; From ccb989e4d1efe0dd81b28c437443532d80d9ecee Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 25 Nov 2024 09:40:50 +0100 Subject: [PATCH 191/366] net: phy: microchip: Reset LAN88xx PHY to ensure clean link state on LAN7800/7850 Fix outdated MII_LPA data in the LAN88xx PHY, which is used in LAN7800 and LAN7850 USB Ethernet controllers. Due to a hardware limitation, the PHY cannot reliably update link status after parallel detection when the link partner does not support auto-negotiation. To mitigate this, add a PHY reset in `lan88xx_link_change_notify()` when `phydev->state` is `PHY_NOLINK`, ensuring the PHY starts in a clean state and reports accurate fixed link parallel detection results. Fixes: 792aec47d59d9 ("add microchip LAN88xx phy driver") Signed-off-by: Oleksij Rempel Link: https://patch.msgid.link/20241125084050.414352-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/microchip.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index d3273bc0da4a1..691969a4910f2 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -351,6 +351,22 @@ static int lan88xx_config_aneg(struct phy_device *phydev) static void lan88xx_link_change_notify(struct phy_device *phydev) { int temp; + int ret; + + /* Reset PHY to ensure MII_LPA provides up-to-date information. This + * issue is reproducible only after parallel detection, as described + * in IEEE 802.3-2022, Section 28.2.3.1 ("Parallel detection function"), + * where the link partner does not support auto-negotiation. + */ + if (phydev->state == PHY_NOLINK) { + ret = phy_init_hw(phydev); + if (ret < 0) + goto link_change_notify_failed; + + ret = _phy_start_aneg(phydev); + if (ret < 0) + goto link_change_notify_failed; + } /* At forced 100 F/H mode, chip may fail to set mode correctly * when cable is switched between long(~50+m) and short one. @@ -377,6 +393,11 @@ static void lan88xx_link_change_notify(struct phy_device *phydev) temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; phy_write(phydev, LAN88XX_INT_MASK, temp); } + + return; + +link_change_notify_failed: + phydev_err(phydev, "Link change process failed %pe\n", ERR_PTR(ret)); } /** From 3301ab7d5aeb0fe270f73a3d4810c9d1b6a9f045 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Thu, 28 Nov 2024 09:59:50 +0100 Subject: [PATCH 192/366] net/ipv6: release expired exception dst cached in socket Dst objects get leaked in ip6_negative_advice() when this function is executed for an expired IPv6 route located in the exception table. There are several conditions that must be fulfilled for the leak to occur: * an ICMPv6 packet indicating a change of the MTU for the path is received, resulting in an exception dst being created * a TCP connection that uses the exception dst for routing packets must start timing out so that TCP begins retransmissions * after the exception dst expires, the FIB6 garbage collector must not run before TCP executes ip6_negative_advice() for the expired exception dst When TCP executes ip6_negative_advice() for an exception dst that has expired and if no other socket holds a reference to the exception dst, the refcount of the exception dst is 2, which corresponds to the increment made by dst_init() and the increment made by the TCP socket for which the connection is timing out. The refcount made by the socket is never released. The refcount of the dst is decremented in sk_dst_reset() but that decrement is counteracted by a dst_hold() intentionally placed just before the sk_dst_reset() in ip6_negative_advice(). After ip6_negative_advice() has finished, there is no other object tied to the dst. The socket lost its reference stored in sk_dst_cache and the dst is no longer in the exception table. The exception dst becomes a leaked object. As a result of this dst leak, an unbalanced refcount is reported for the loopback device of a net namespace being destroyed under kernels that do not contain e5f80fcf869a ("ipv6: give an IPv6 dev to blackhole_netdev"): unregister_netdevice: waiting for lo to become free. Usage count = 2 Fix the dst leak by removing the dst_hold() in ip6_negative_advice(). The patch that introduced the dst_hold() in ip6_negative_advice() was 92f1655aa2b22 ("net: fix __dst_negative_advice() race"). But 92f1655aa2b22 merely refactored the code with regards to the dst refcount so the issue was present even before 92f1655aa2b22. The bug was introduced in 54c1a859efd9f ("ipv6: Don't drop cache route entry unless timer actually expired.") where the expired cached route is deleted and the sk_dst_cache member of the socket is set to NULL by calling dst_negative_advice() but the refcount belonging to the socket is left unbalanced. The IPv4 version - ipv4_negative_advice() - is not affected by this bug. When the TCP connection times out ipv4_negative_advice() merely resets the sk_dst_cache of the socket while decrementing the refcount of the exception dst. Fixes: 92f1655aa2b22 ("net: fix __dst_negative_advice() race") Fixes: 54c1a859efd9f ("ipv6: Don't drop cache route entry unless timer actually expired.") Link: https://lore.kernel.org/netdev/20241113105611.GA6723@incl/T/#u Signed-off-by: Jiri Wiesner Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241128085950.GA4505@incl Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 63d7681c929fc..67ff16c047180 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2780,10 +2780,10 @@ static void ip6_negative_advice(struct sock *sk, if (rt->rt6i_flags & RTF_CACHE) { rcu_read_lock(); if (rt6_check_expired(rt)) { - /* counteract the dst_release() in sk_dst_reset() */ - dst_hold(dst); + /* rt/dst can not be destroyed yet, + * because of rcu_read_lock() + */ sk_dst_reset(sk); - rt6_remove_exception_rt(rt); } rcu_read_unlock(); From 22be4727a8f898442066bcac34f8a1ad0bc72e14 Mon Sep 17 00:00:00 2001 From: Ivan Solodovnikov Date: Tue, 26 Nov 2024 17:39:02 +0300 Subject: [PATCH 193/366] dccp: Fix memory leak in dccp_feat_change_recv If dccp_feat_push_confirm() fails after new value for SP feature was accepted without reconciliation ('entry == NULL' branch), memory allocated for that value with dccp_feat_clone_sp_val() is never freed. Here is the kmemleak stack for this: unreferenced object 0xffff88801d4ab488 (size 8): comm "syz-executor310", pid 1127, jiffies 4295085598 (age 41.666s) hex dump (first 8 bytes): 01 b4 4a 1d 80 88 ff ff ..J..... backtrace: [<00000000db7cabfe>] kmemdup+0x23/0x50 mm/util.c:128 [<0000000019b38405>] kmemdup include/linux/string.h:465 [inline] [<0000000019b38405>] dccp_feat_clone_sp_val net/dccp/feat.c:371 [inline] [<0000000019b38405>] dccp_feat_clone_sp_val net/dccp/feat.c:367 [inline] [<0000000019b38405>] dccp_feat_change_recv net/dccp/feat.c:1145 [inline] [<0000000019b38405>] dccp_feat_parse_options+0x1196/0x2180 net/dccp/feat.c:1416 [<00000000b1f6d94a>] dccp_parse_options+0xa2a/0x1260 net/dccp/options.c:125 [<0000000030d7b621>] dccp_rcv_state_process+0x197/0x13d0 net/dccp/input.c:650 [<000000001f74c72e>] dccp_v4_do_rcv+0xf9/0x1a0 net/dccp/ipv4.c:688 [<00000000a6c24128>] sk_backlog_rcv include/net/sock.h:1041 [inline] [<00000000a6c24128>] __release_sock+0x139/0x3b0 net/core/sock.c:2570 [<00000000cf1f3a53>] release_sock+0x54/0x1b0 net/core/sock.c:3111 [<000000008422fa23>] inet_wait_for_connect net/ipv4/af_inet.c:603 [inline] [<000000008422fa23>] __inet_stream_connect+0x5d0/0xf70 net/ipv4/af_inet.c:696 [<0000000015b6f64d>] inet_stream_connect+0x53/0xa0 net/ipv4/af_inet.c:735 [<0000000010122488>] __sys_connect_file+0x15c/0x1a0 net/socket.c:1865 [<00000000b4b70023>] __sys_connect+0x165/0x1a0 net/socket.c:1882 [<00000000f4cb3815>] __do_sys_connect net/socket.c:1892 [inline] [<00000000f4cb3815>] __se_sys_connect net/socket.c:1889 [inline] [<00000000f4cb3815>] __x64_sys_connect+0x6e/0xb0 net/socket.c:1889 [<00000000e7b1e839>] do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 [<0000000055e91434>] entry_SYSCALL_64_after_hwframe+0x67/0xd1 Clean up the allocated memory in case of dccp_feat_push_confirm() failure and bail out with an error reset code. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: e77b8363b2ea ("dccp: Process incoming Change feature-negotiation options") Signed-off-by: Ivan Solodovnikov Link: https://patch.msgid.link/20241126143902.190853-1-solodovnikov.ia@phystech.edu Signed-off-by: Paolo Abeni --- net/dccp/feat.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 54086bb05c42c..f7554dcdaaba9 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1166,8 +1166,12 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt, goto not_valid_or_not_known; } - return dccp_feat_push_confirm(fn, feat, local, &fval); + if (dccp_feat_push_confirm(fn, feat, local, &fval)) { + kfree(fval.sp.vec); + return DCCP_RESET_CODE_TOO_BUSY; + } + return 0; } else if (entry->state == FEAT_UNSTABLE) { /* 6.6.2 */ return 0; } From 6a2fa13312e51a621f652d522d7e2df7066330b6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 27 Nov 2024 14:05:12 +0900 Subject: [PATCH 194/366] tipc: Fix use-after-free of kernel socket in cleanup_bearer(). syzkaller reported a use-after-free of UDP kernel socket in cleanup_bearer() without repro. [0][1] When bearer_disable() calls tipc_udp_disable(), cleanup of the UDP kernel socket is deferred by work calling cleanup_bearer(). tipc_net_stop() waits for such works to finish by checking tipc_net(net)->wq_count. However, the work decrements the count too early before releasing the kernel socket, unblocking cleanup_net() and resulting in use-after-free. Let's move the decrement after releasing the socket in cleanup_bearer(). [0]: ref_tracker: net notrefcnt@000000009b3d1faf has 1/1 users at sk_alloc+0x438/0x608 inet_create+0x4c8/0xcb0 __sock_create+0x350/0x6b8 sock_create_kern+0x58/0x78 udp_sock_create4+0x68/0x398 udp_sock_create+0x88/0xc8 tipc_udp_enable+0x5e8/0x848 __tipc_nl_bearer_enable+0x84c/0xed8 tipc_nl_bearer_enable+0x38/0x60 genl_family_rcv_msg_doit+0x170/0x248 genl_rcv_msg+0x400/0x5b0 netlink_rcv_skb+0x1dc/0x398 genl_rcv+0x44/0x68 netlink_unicast+0x678/0x8b0 netlink_sendmsg+0x5e4/0x898 ____sys_sendmsg+0x500/0x830 [1]: BUG: KMSAN: use-after-free in udp_hashslot include/net/udp.h:85 [inline] BUG: KMSAN: use-after-free in udp_lib_unhash+0x3b8/0x930 net/ipv4/udp.c:1979 udp_hashslot include/net/udp.h:85 [inline] udp_lib_unhash+0x3b8/0x930 net/ipv4/udp.c:1979 sk_common_release+0xaf/0x3f0 net/core/sock.c:3820 inet_release+0x1e0/0x260 net/ipv4/af_inet.c:437 inet6_release+0x6f/0xd0 net/ipv6/af_inet6.c:489 __sock_release net/socket.c:658 [inline] sock_release+0xa0/0x210 net/socket.c:686 cleanup_bearer+0x42d/0x4c0 net/tipc/udp_media.c:819 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xcaf/0x1c90 kernel/workqueue.c:3310 worker_thread+0xf6c/0x1510 kernel/workqueue.c:3391 kthread+0x531/0x6b0 kernel/kthread.c:389 ret_from_fork+0x60/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:244 Uninit was created at: slab_free_hook mm/slub.c:2269 [inline] slab_free mm/slub.c:4580 [inline] kmem_cache_free+0x207/0xc40 mm/slub.c:4682 net_free net/core/net_namespace.c:454 [inline] cleanup_net+0x16f2/0x19d0 net/core/net_namespace.c:647 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xcaf/0x1c90 kernel/workqueue.c:3310 worker_thread+0xf6c/0x1510 kernel/workqueue.c:3391 kthread+0x531/0x6b0 kernel/kthread.c:389 ret_from_fork+0x60/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:244 CPU: 0 UID: 0 PID: 54 Comm: kworker/0:2 Not tainted 6.12.0-rc1-00131-gf66ebf37d69c #7 91723d6f74857f70725e1583cba3cf4adc716cfa Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 Workqueue: events cleanup_bearer Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241127050512.28438-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- net/tipc/udp_media.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 439f755399772..b7e25e7e9933b 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -814,10 +814,10 @@ static void cleanup_bearer(struct work_struct *work) kfree_rcu(rcast, rcu); } - atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); dst_cache_destroy(&ub->rcast.dst_cache); udp_tunnel_sock_release(ub->ubsock); synchronize_net(); + atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); kfree(ub); } From 4495816122cc39c428ebbc4ffd30110bb2877df9 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Tue, 26 Nov 2024 12:10:52 -0800 Subject: [PATCH 195/366] drm/xe/guc: Fix missing init value and add register order check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix missing initial value for last_value. For GuC capture register definition, it is required to define 64bit register in a pair of 2 consecutive 32bit register entries, low first, then hi. Add code to check this order. Changes from prior revs: v5:- Correct cross-line comment format v4:- Fix warn on condition and remove skipping v3:- Move break inside brace v2:- Correct the fix tag pointed commit Add examples in comments for warning Add 1 missing hi condition check Fixes: ecb633646391 ("drm/xe/guc: Plumb GuC-capture into dev coredump") Signed-off-by: Zhanjun Dong Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20241126201052.1937079-1-zhanjun.dong@intel.com (cherry picked from commit 6f59fbcfa041e7d69e5e5f39d4c8cffa06fdc50b) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_guc_capture.c | 77 +++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index cc72446a5de11..d63912d282467 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -102,6 +102,7 @@ struct __guc_capture_parsed_output { * A 64 bit register define requires 2 consecutive entries, * with low dword first and hi dword the second. * 2. Register name: null for incompleted define + * 3. Incorrect order will trigger XE_WARN. */ #define COMMON_XELP_BASE_GLOBAL \ { FORCEWAKE_GT, REG_32BIT, 0, 0, "FORCEWAKE_GT"} @@ -1675,10 +1676,10 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_ struct xe_devcoredump *devcoredump = &xe->devcoredump; struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot; struct gcap_reg_list_info *reginfo = NULL; - u32 last_value, i; - bool is_ext; + u32 i, last_value = 0; + bool is_ext, low32_ready = false; - if (!list || list->num_regs == 0) + if (!list || !list->list || list->num_regs == 0) return; XE_WARN_ON(!devcore_snapshot->matched_node); @@ -1701,29 +1702,75 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_ continue; value = reg->value; - if (reg_desc->data_type == REG_64BIT_LOW_DW) { + switch (reg_desc->data_type) { + case REG_64BIT_LOW_DW: last_value = value; + + /* + * A 64 bit register define requires 2 consecutive + * entries in register list, with low dword first + * and hi dword the second, like: + * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, + * { XXX_REG_HI(0), REG_64BIT_HI_DW, 0, 0, "XXX_REG"}, + * + * Incorrect order will trigger XE_WARN. + * + * Possible double low here, for example: + * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, + * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, + */ + XE_WARN_ON(low32_ready); + low32_ready = true; /* Low 32 bit dword saved, continue for high 32 bit */ - continue; - } else if (reg_desc->data_type == REG_64BIT_HI_DW) { + break; + + case REG_64BIT_HI_DW: { u64 value_qw = ((u64)value << 32) | last_value; + /* + * Incorrect 64bit register order. Possible missing low. + * for example: + * { XXX_REG(0), REG_32BIT, 0, 0, NULL}, + * { XXX_REG_HI(0), REG_64BIT_HI_DW, 0, 0, NULL}, + */ + XE_WARN_ON(!low32_ready); + low32_ready = false; + drm_printf(p, "\t%s: 0x%016llx\n", reg_desc->regname, value_qw); - continue; + break; } - if (is_ext) { - int dss, group, instance; + case REG_32BIT: + /* + * Incorrect 64bit register order. Possible missing high. + * for example: + * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, + * { XXX_REG(0), REG_32BIT, 0, 0, "XXX_REG"}, + */ + XE_WARN_ON(low32_ready); + + if (is_ext) { + int dss, group, instance; - group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags); - instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags); - dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance); + group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags); + instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags); + dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance); - drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value); - } else { - drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value); + drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value); + } else { + drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value); + } + break; } } + + /* + * Incorrect 64bit register order. Possible missing high. + * for example: + * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, + * } // <- Register list end + */ + XE_WARN_ON(low32_ready); } /** From 5dce85fecb87751ec94526e1ac516dd7871e2e0c Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 28 Nov 2024 13:08:23 -0800 Subject: [PATCH 196/366] drm/xe: Move the coredump registration to the worker thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding lockdep checking to the coredump code showed that there was an existing violation. The dev_coredumpm_timeout() call is used to register the dump with the base coredump subsystem. However, that makes multiple memory allocations, only some of which use the GFP_ flags passed in. So that also needs to be deferred to the worker function where it is safe to allocate with arbitrary flags. In order to not add protoypes for the callback functions, moving the _timeout call also means moving the worker thread function to later in the file. v2: Rebased after other changes to the worker function. Fixes: e799485044cb ("drm/xe: Introduce the dev_coredump infrastructure.") Cc: Thomas Hellström Cc: Matthew Brost Cc: Jani Nikula Cc: Daniel Vetter Cc: Francois Dugast Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: Sumit Semwal Cc: "Christian König" Cc: intel-xe@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: # v6.8+ Signed-off-by: John Harrison Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241128210824.3302147-3-John.C.Harrison@Intel.com (cherry picked from commit 90f51a7f4ec1004fc4ddfbc6d1f1068d85ef4771) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_devcoredump.c | 73 +++++++++++++++-------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 0b0cd6aa1d9fb..f8947e7e917ec 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -155,36 +155,6 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) ss->vm = NULL; } -static void xe_devcoredump_deferred_snap_work(struct work_struct *work) -{ - struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); - struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); - struct xe_device *xe = coredump_to_xe(coredump); - unsigned int fw_ref; - - xe_pm_runtime_get(xe); - - /* keep going if fw fails as we still want to save the memory and SW data */ - fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); - xe_vm_snapshot_capture_delayed(ss->vm); - xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); - xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); - - xe_pm_runtime_put(xe); - - /* Calculate devcoredump size */ - ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); - - ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); - if (!ss->read.buffer) - return; - - __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); - xe_devcoredump_snapshot_free(ss); -} - static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { @@ -234,6 +204,45 @@ static void xe_devcoredump_free(void *data) "Xe device coredump has been deleted.\n"); } +static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +{ + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); + struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); + struct xe_device *xe = coredump_to_xe(coredump); + unsigned int fw_ref; + + /* + * NB: Despite passing a GFP_ flags parameter here, more allocations are done + * internally using GFP_KERNEL expliictly. Hence this call must be in the worker + * thread and not in the initial capture call. + */ + dev_coredumpm_timeout(gt_to_xe(ss->gt)->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, + xe_devcoredump_read, xe_devcoredump_free, + XE_COREDUMP_TIMEOUT_JIFFIES); + + xe_pm_runtime_get(xe); + + /* keep going if fw fails as we still want to save the memory and SW data */ + fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); + xe_vm_snapshot_capture_delayed(ss->vm); + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); + xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); + + xe_pm_runtime_put(xe); + + /* Calculate devcoredump size */ + ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); + + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + return; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); + xe_devcoredump_snapshot_free(ss); +} + static void devcoredump_snapshot(struct xe_devcoredump *coredump, struct xe_sched_job *job) { @@ -310,10 +319,6 @@ void xe_devcoredump(struct xe_sched_job *job) drm_info(&xe->drm, "Xe device coredump has been created\n"); drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", xe->drm.primary->index); - - dev_coredumpm_timeout(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, - xe_devcoredump_read, xe_devcoredump_free, - XE_COREDUMP_TIMEOUT_JIFFIES); } static void xe_driver_devcoredump_fini(void *arg) From 0541db8ee32c09463a72d0987382b3a3336b0043 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Wed, 27 Nov 2024 21:30:13 +0800 Subject: [PATCH 197/366] net/smc: initialize close_work early to avoid warning We encountered a warning that close_work was canceled before initialization. WARNING: CPU: 7 PID: 111103 at kernel/workqueue.c:3047 __flush_work+0x19e/0x1b0 Workqueue: events smc_lgr_terminate_work [smc] RIP: 0010:__flush_work+0x19e/0x1b0 Call Trace: ? __wake_up_common+0x7a/0x190 ? work_busy+0x80/0x80 __cancel_work_timer+0xe3/0x160 smc_close_cancel_work+0x1a/0x70 [smc] smc_close_active_abort+0x207/0x360 [smc] __smc_lgr_terminate.part.38+0xc8/0x180 [smc] process_one_work+0x19e/0x340 worker_thread+0x30/0x370 ? process_one_work+0x340/0x340 kthread+0x117/0x130 ? __kthread_cancel_work+0x50/0x50 ret_from_fork+0x22/0x30 This is because when smc_close_cancel_work is triggered, e.g. the RDMA driver is rmmod and the LGR is terminated, the conn->close_work is flushed before initialization, resulting in WARN_ON(!work->func). __smc_lgr_terminate | smc_connect_{rdma|ism} ------------------------------------------------------------- | smc_conn_create | \- smc_lgr_register_conn for conn in lgr->conns_all | \- smc_conn_kill | \- smc_close_active_abort | \- smc_close_cancel_work | \- cancel_work_sync | \- __flush_work | (close_work) | | smc_close_init | \- INIT_WORK(&close_work) So fix this by initializing close_work before establishing the connection. Fixes: 46c28dbd4c23 ("net/smc: no socket state changes in tasklet context") Fixes: 413498440e30 ("net/smc: add SMC-D support in af_smc") Signed-off-by: Wen Gu Reviewed-by: Wenjia Zhang Reviewed-by: Alexandra Winter Signed-off-by: Paolo Abeni --- net/smc/af_smc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9d76e902fd770..ed6d4d520bc7a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -383,6 +383,7 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol) smc->limit_smc_hs = net->smc.limit_smc_hs; smc->use_fallback = false; /* assume rdma capability first */ smc->fallback_rsn = 0; + smc_close_init(smc); } static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, @@ -1299,7 +1300,6 @@ static int smc_connect_rdma(struct smc_sock *smc, goto connect_abort; } - smc_close_init(smc); smc_rx_init(smc); if (ini->first_contact_local) { @@ -1435,7 +1435,6 @@ static int smc_connect_ism(struct smc_sock *smc, goto connect_abort; } } - smc_close_init(smc); smc_rx_init(smc); smc_tx_init(smc); @@ -2479,7 +2478,6 @@ static void smc_listen_work(struct work_struct *work) goto out_decl; mutex_lock(&smc_server_lgr_pending); - smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); From 2c7f14ed9c19ec0f149479d1c2842ec1f9bf76d7 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Wed, 27 Nov 2024 21:30:14 +0800 Subject: [PATCH 198/366] net/smc: fix LGR and link use-after-free issue We encountered a LGR/link use-after-free issue, which manifested as the LGR/link refcnt reaching 0 early and entering the clear process, making resource access unsafe. refcount_t: addition on 0; use-after-free. WARNING: CPU: 14 PID: 107447 at lib/refcount.c:25 refcount_warn_saturate+0x9c/0x140 Workqueue: events smc_lgr_terminate_work [smc] Call trace: refcount_warn_saturate+0x9c/0x140 __smc_lgr_terminate.part.45+0x2a8/0x370 [smc] smc_lgr_terminate_work+0x28/0x30 [smc] process_one_work+0x1b8/0x420 worker_thread+0x158/0x510 kthread+0x114/0x118 or refcount_t: underflow; use-after-free. WARNING: CPU: 6 PID: 93140 at lib/refcount.c:28 refcount_warn_saturate+0xf0/0x140 Workqueue: smc_hs_wq smc_listen_work [smc] Call trace: refcount_warn_saturate+0xf0/0x140 smcr_link_put+0x1cc/0x1d8 [smc] smc_conn_free+0x110/0x1b0 [smc] smc_conn_abort+0x50/0x60 [smc] smc_listen_find_device+0x75c/0x790 [smc] smc_listen_work+0x368/0x8a0 [smc] process_one_work+0x1b8/0x420 worker_thread+0x158/0x510 kthread+0x114/0x118 It is caused by repeated release of LGR/link refcnt. One suspect is that smc_conn_free() is called repeatedly because some smc_conn_free() from server listening path are not protected by sock lock. e.g. Calls under socklock | smc_listen_work ------------------------------------------------------- lock_sock(sk) | smc_conn_abort smc_conn_free | \- smc_conn_free \- smcr_link_put | \- smcr_link_put (duplicated) release_sock(sk) So here add sock lock protection in smc_listen_work() path, making it exclusive with other connection operations. Fixes: 3b2dec2603d5 ("net/smc: restructure client and server code in af_smc") Co-developed-by: Guangguan Wang Signed-off-by: Guangguan Wang Co-developed-by: Kai Signed-off-by: Kai Signed-off-by: Wen Gu Reviewed-by: Wenjia Zhang Signed-off-by: Paolo Abeni --- net/smc/af_smc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ed6d4d520bc7a..9e6c69d18581c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1900,6 +1900,7 @@ static void smc_listen_out(struct smc_sock *new_smc) if (tcp_sk(new_smc->clcsock->sk)->syn_smc) atomic_dec(&lsmc->queued_smc_hs); + release_sock(newsmcsk); /* lock in smc_listen_work() */ if (lsmc->sk.sk_state == SMC_LISTEN) { lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); smc_accept_enqueue(&lsmc->sk, newsmcsk); @@ -2421,6 +2422,7 @@ static void smc_listen_work(struct work_struct *work) u8 accept_version; int rc = 0; + lock_sock(&new_smc->sk); /* release in smc_listen_out() */ if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) return smc_listen_out_err(new_smc); From 7a0ea70da56ee8c2716d0b79e9959d3c47efab62 Mon Sep 17 00:00:00 2001 From: Louis Leseur Date: Thu, 28 Nov 2024 09:33:58 +0100 Subject: [PATCH 199/366] net/qed: allow old cards not supporting "num_images" to work Commit 43645ce03e00 ("qed: Populate nvm image attribute shadow.") added support for populating flash image attributes, notably "num_images". However, some cards were not able to return this information. In such cases, the driver would return EINVAL, causing the driver to exit. Add check to return EOPNOTSUPP instead of EINVAL when the card is not able to return these information. The caller function already handles EOPNOTSUPP without error. Fixes: 43645ce03e00 ("qed: Populate nvm image attribute shadow.") Co-developed-by: Florian Forestier Signed-off-by: Florian Forestier Signed-off-by: Louis Leseur Link: https://patch.msgid.link/20241128083633.26431-1-louis.leseur@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 26a714bfad4ec..b45efc272fdbb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -3301,7 +3301,9 @@ int qed_mcp_bist_nvm_get_num_images(struct qed_hwfn *p_hwfn, if (rc) return rc; - if (((rsp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK)) + if (((rsp & FW_MSG_CODE_MASK) == FW_MSG_CODE_UNSUPPORTED)) + rc = -EOPNOTSUPP; + else if (((rsp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK)) rc = -EINVAL; return rc; From 48327566769a6ff2e873b6bf075392bd756625ca Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 29 Nov 2024 13:25:19 -0800 Subject: [PATCH 200/366] rtnetlink: fix double call of rtnl_link_get_net_ifla() Currently rtnl_link_get_net_ifla() gets called twice when we create peer devices, once in rtnl_add_peer_net() and once in each ->newlink() implementation. This looks safer, however, it leads to a classic Time-of-Check to Time-of-Use (TOCTOU) bug since IFLA_NET_NS_PID is very dynamic. And because of the lack of checking error pointer of the second call, it also leads to a kernel crash as reported by syzbot. Fix this by getting rid of the second call, which already becomes redudant after Kuniyuki's work. We have to propagate the result of the first rtnl_link_get_net_ifla() down to each ->newlink(). Reported-by: syzbot+21ba4d5adff0b6a7cfc6@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=21ba4d5adff0b6a7cfc6 Fixes: 0eb87b02a705 ("veth: Set VETH_INFO_PEER to veth_link_ops.peer_type.") Fixes: 6b84e558e95d ("vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type.") Fixes: fefd5d082172 ("netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type.") Cc: Kuniyuki Iwashima Signed-off-by: Cong Wang Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241129212519.825567-1-xiyou.wangcong@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/can/vxcan.c | 10 ++-------- drivers/net/netkit.c | 11 +++-------- drivers/net/veth.c | 12 +++-------- net/core/rtnetlink.c | 44 +++++++++++++++++++++-------------------- 4 files changed, 31 insertions(+), 46 deletions(-) diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index da7c72105fb6e..ca88119410852 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -172,13 +172,12 @@ static void vxcan_setup(struct net_device *dev) /* forward declaration for rtnl_create_link() */ static struct rtnl_link_ops vxcan_link_ops; -static int vxcan_newlink(struct net *net, struct net_device *dev, +static int vxcan_newlink(struct net *peer_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct vxcan_priv *priv; struct net_device *peer; - struct net *peer_net; struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb; char ifname[IFNAMSIZ]; @@ -203,20 +202,15 @@ static int vxcan_newlink(struct net *net, struct net_device *dev, name_assign_type = NET_NAME_ENUM; } - peer_net = rtnl_link_get_net(net, tbp); peer = rtnl_create_link(peer_net, ifname, name_assign_type, &vxcan_link_ops, tbp, extack); - if (IS_ERR(peer)) { - put_net(peer_net); + if (IS_ERR(peer)) return PTR_ERR(peer); - } if (ifmp && dev->ifindex) peer->ifindex = ifmp->ifi_index; err = register_netdevice(peer); - put_net(peer_net); - peer_net = NULL; if (err < 0) { free_netdev(peer); return err; diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index bb07725d1c72b..c1d881dc6409a 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -327,7 +327,7 @@ static int netkit_validate(struct nlattr *tb[], struct nlattr *data[], static struct rtnl_link_ops netkit_link_ops; -static int netkit_new_link(struct net *src_net, struct net_device *dev, +static int netkit_new_link(struct net *peer_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { @@ -342,7 +342,6 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev, struct net_device *peer; char ifname[IFNAMSIZ]; struct netkit *nk; - struct net *net; int err; if (data) { @@ -385,13 +384,10 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev, (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS])) return -EOPNOTSUPP; - net = rtnl_link_get_net(src_net, tbp); - peer = rtnl_create_link(net, ifname, ifname_assign_type, + peer = rtnl_create_link(peer_net, ifname, ifname_assign_type, &netkit_link_ops, tbp, extack); - if (IS_ERR(peer)) { - put_net(net); + if (IS_ERR(peer)) return PTR_ERR(peer); - } netif_inherit_tso_max(peer, dev); @@ -408,7 +404,6 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev, bpf_mprog_bundle_init(&nk->bundle); err = register_netdevice(peer); - put_net(net); if (err < 0) goto err_register_peer; netif_carrier_off(peer); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 0d6d0d749d440..07ebb800edf17 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1765,7 +1765,7 @@ static int veth_init_queues(struct net_device *dev, struct nlattr *tb[]) return 0; } -static int veth_newlink(struct net *src_net, struct net_device *dev, +static int veth_newlink(struct net *peer_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { @@ -1776,7 +1776,6 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; unsigned char name_assign_type; struct ifinfomsg *ifmp; - struct net *net; /* * create and register peer first @@ -1800,13 +1799,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, name_assign_type = NET_NAME_ENUM; } - net = rtnl_link_get_net(src_net, tbp); - peer = rtnl_create_link(net, ifname, name_assign_type, + peer = rtnl_create_link(peer_net, ifname, name_assign_type, &veth_link_ops, tbp, extack); - if (IS_ERR(peer)) { - put_net(net); + if (IS_ERR(peer)) return PTR_ERR(peer); - } if (!ifmp || !tbp[IFLA_ADDRESS]) eth_hw_addr_random(peer); @@ -1817,8 +1813,6 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, netif_inherit_tso_max(peer, dev); err = register_netdevice(peer); - put_net(net); - net = NULL; if (err < 0) goto err_register_peer; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 58df76fe408a4..ab5f201bf0ab4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3746,6 +3746,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb, static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, const struct rtnl_link_ops *ops, struct net *tgt_net, struct net *link_net, + struct net *peer_net, const struct nlmsghdr *nlh, struct nlattr **tb, struct nlattr **data, struct netlink_ext_ack *extack) @@ -3776,8 +3777,13 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, dev->ifindex = ifm->ifi_index; + if (link_net) + net = link_net; + if (peer_net) + net = peer_net; + if (ops->newlink) - err = ops->newlink(link_net ? : net, dev, tb, data, extack); + err = ops->newlink(net, dev, tb, data, extack); else err = register_netdevice(dev); if (err < 0) { @@ -3812,40 +3818,33 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, goto out; } -static int rtnl_add_peer_net(struct rtnl_nets *rtnl_nets, - const struct rtnl_link_ops *ops, - struct nlattr *data[], - struct netlink_ext_ack *extack) +static struct net *rtnl_get_peer_net(const struct rtnl_link_ops *ops, + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_MAX + 1]; - struct net *net; int err; if (!data || !data[ops->peer_type]) - return 0; + return NULL; err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack); if (err < 0) - return err; + return ERR_PTR(err); if (ops->validate) { err = ops->validate(tb, NULL, extack); if (err < 0) - return err; + return ERR_PTR(err); } - net = rtnl_link_get_net_ifla(tb); - if (IS_ERR(net)) - return PTR_ERR(net); - if (net) - rtnl_nets_add(rtnl_nets, net); - - return 0; + return rtnl_link_get_net_ifla(tb); } static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, const struct rtnl_link_ops *ops, struct net *tgt_net, struct net *link_net, + struct net *peer_net, struct rtnl_newlink_tbs *tbs, struct nlattr **data, struct netlink_ext_ack *extack) @@ -3894,14 +3893,15 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -EOPNOTSUPP; } - return rtnl_newlink_create(skb, ifm, ops, tgt_net, link_net, nlh, tb, data, extack); + return rtnl_newlink_create(skb, ifm, ops, tgt_net, link_net, peer_net, nlh, + tb, data, extack); } static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct net *tgt_net, *link_net = NULL, *peer_net = NULL; struct nlattr **tb, **linkinfo, **data = NULL; - struct net *tgt_net, *link_net = NULL; struct rtnl_link_ops *ops = NULL; struct rtnl_newlink_tbs *tbs; struct rtnl_nets rtnl_nets; @@ -3971,9 +3971,11 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, } if (ops->peer_type) { - ret = rtnl_add_peer_net(&rtnl_nets, ops, data, extack); - if (ret < 0) + peer_net = rtnl_get_peer_net(ops, data, extack); + if (IS_ERR(peer_net)) goto put_ops; + if (peer_net) + rtnl_nets_add(&rtnl_nets, peer_net); } } @@ -4004,7 +4006,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, } rtnl_nets_lock(&rtnl_nets); - ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, tbs, data, extack); + ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, peer_net, tbs, data, extack); rtnl_nets_unlock(&rtnl_nets); put_net: From af8edaeddbc52e53207d859c912b017fd9a77629 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Dec 2024 10:05:58 +0000 Subject: [PATCH 201/366] net: hsr: must allocate more bytes for RedBox support Blamed commit forgot to change hsr_init_skb() to allocate larger skb for RedBox case. Indeed, send_hsr_supervision_frame() will add two additional components (struct hsr_sup_tlv and struct hsr_sup_payload) syzbot reported the following crash: skbuff: skb_over_panic: text:ffffffff8afd4b0a len:34 put:6 head:ffff88802ad29e00 data:ffff88802ad29f22 tail:0x144 end:0x140 dev:gretap0 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:206 ! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 2 UID: 0 PID: 7611 Comm: syz-executor Not tainted 6.12.0-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:skb_panic+0x157/0x1d0 net/core/skbuff.c:206 Code: b6 04 01 84 c0 74 04 3c 03 7e 21 8b 4b 70 41 56 45 89 e8 48 c7 c7 a0 7d 9b 8c 41 57 56 48 89 ee 52 4c 89 e2 e8 9a 76 79 f8 90 <0f> 0b 4c 89 4c 24 10 48 89 54 24 08 48 89 34 24 e8 94 76 fb f8 4c RSP: 0018:ffffc90000858ab8 EFLAGS: 00010282 RAX: 0000000000000087 RBX: ffff8880598c08c0 RCX: ffffffff816d3e69 RDX: 0000000000000000 RSI: ffffffff816de786 RDI: 0000000000000005 RBP: ffffffff8c9b91c0 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000302 R11: ffffffff961cc1d0 R12: ffffffff8afd4b0a R13: 0000000000000006 R14: ffff88804b938130 R15: 0000000000000140 FS: 000055558a3d6500(0000) GS:ffff88806a800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1295974ff8 CR3: 000000002ab6e000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_over_panic net/core/skbuff.c:211 [inline] skb_put+0x174/0x1b0 net/core/skbuff.c:2617 send_hsr_supervision_frame+0x6fa/0x9e0 net/hsr/hsr_device.c:342 hsr_proxy_announce+0x1a3/0x4a0 net/hsr/hsr_device.c:436 call_timer_fn+0x1a0/0x610 kernel/time/timer.c:1794 expire_timers kernel/time/timer.c:1845 [inline] __run_timers+0x6e8/0x930 kernel/time/timer.c:2419 __run_timer_base kernel/time/timer.c:2430 [inline] __run_timer_base kernel/time/timer.c:2423 [inline] run_timer_base+0x111/0x190 kernel/time/timer.c:2439 run_timer_softirq+0x1a/0x40 kernel/time/timer.c:2449 handle_softirqs+0x213/0x8f0 kernel/softirq.c:554 __do_softirq kernel/softirq.c:588 [inline] invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu kernel/softirq.c:637 [inline] irq_exit_rcu+0xbb/0x120 kernel/softirq.c:649 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1049 [inline] sysvec_apic_timer_interrupt+0xa4/0xc0 arch/x86/kernel/apic/apic.c:1049 Fixes: 5055cccfc2d1 ("net: hsr: Provide RedBox support (HSR-SAN)") Reported-by: syzbot+7f4643b267cc680bfa1c@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Lukasz Majewski Link: https://patch.msgid.link/20241202100558.507765-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/hsr/hsr_device.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 31a416ee21ad9..03eadd6c51fd1 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -246,20 +246,22 @@ static const struct header_ops hsr_header_ops = { .parse = eth_header_parse, }; -static struct sk_buff *hsr_init_skb(struct hsr_port *master) +static struct sk_buff *hsr_init_skb(struct hsr_port *master, int extra) { struct hsr_priv *hsr = master->hsr; struct sk_buff *skb; int hlen, tlen; + int len; hlen = LL_RESERVED_SPACE(master->dev); tlen = master->dev->needed_tailroom; + len = sizeof(struct hsr_sup_tag) + sizeof(struct hsr_sup_payload); /* skb size is same for PRP/HSR frames, only difference * being, for PRP it is a trailer and for HSR it is a - * header + * header. + * RedBox might use @extra more bytes. */ - skb = dev_alloc_skb(sizeof(struct hsr_sup_tag) + - sizeof(struct hsr_sup_payload) + hlen + tlen); + skb = dev_alloc_skb(len + extra + hlen + tlen); if (!skb) return skb; @@ -295,6 +297,7 @@ static void send_hsr_supervision_frame(struct hsr_port *port, struct hsr_sup_tlv *hsr_stlv; struct hsr_sup_tag *hsr_stag; struct sk_buff *skb; + int extra = 0; *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); if (hsr->announce_count < 3 && hsr->prot_version == 0) { @@ -303,7 +306,11 @@ static void send_hsr_supervision_frame(struct hsr_port *port, hsr->announce_count++; } - skb = hsr_init_skb(port); + if (hsr->redbox) + extra = sizeof(struct hsr_sup_tlv) + + sizeof(struct hsr_sup_payload); + + skb = hsr_init_skb(port, extra); if (!skb) { netdev_warn_once(port->dev, "HSR: Could not send supervision frame\n"); return; @@ -362,7 +369,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, struct hsr_sup_tag *hsr_stag; struct sk_buff *skb; - skb = hsr_init_skb(master); + skb = hsr_init_skb(master, 0); if (!skb) { netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; From f58326c70df0dc413bb58848b188523b3662cf0f Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 2 Dec 2024 12:25:18 +0100 Subject: [PATCH 202/366] irqchip/gic-v3: Fix irq_complete_ack() comment When the GIC is in EOImode == 1 in irq_complete_ack() it executes a priority drop not a deactivation. Fix the function comment to clarify the behaviour. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241202112518.51178-1-lpieralisi@kernel.org --- drivers/irqchip/irq-gic-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 8b6159f4cdafa..34db379d066a5 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -817,7 +817,7 @@ static void gic_deactivate_unhandled(u32 irqnr) * register state is not stale, as these may have been indirectly written * *after* exception entry. * - * (2) Deactivate the interrupt when EOI mode 1 is in use. + * (2) Execute an interrupt priority drop when EOI mode 1 is in use. */ static inline void gic_complete_ack(u32 irqnr) { From ee3878b84cc27ee62cdf78d2842830f4dcdab117 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 2 Dec 2024 12:54:37 +0100 Subject: [PATCH 203/366] irqchip/bcm2836: Enable SKIP_SET_WAKE and MASK_ON_SUSPEND The BCM2836 interrupt controller doesn't provide any facility to configure the wakeup sources. That's the reason why the driver lacks the irq_set_wake() callback for the interrupt chip. Enable the flags IRQCHIP_SKIP_SET_WAKE and IRQCHIP_MASK_ON_SUSPEND so the interrupt suspend logic can handle the chip correctly equivalently to the corresponding bcm2835 change (9a58480e5e53 ("irqchip/bcm2835: Enable SKIP_SET_WAKE and MASK_ON_SUSPEND"). Signed-off-by: Stefan Wahren Signed-off-by: Thomas Gleixner Acked-by: Florian Fainelli Link: https://lore.kernel.org/all/20241202115437.33552-1-wahrenst@gmx.net --- drivers/irqchip/irq-bcm2836.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c index e5f1059b989fe..e366257684b58 100644 --- a/drivers/irqchip/irq-bcm2836.c +++ b/drivers/irqchip/irq-bcm2836.c @@ -58,6 +58,7 @@ static struct irq_chip bcm2836_arm_irqchip_timer = { .name = "bcm2836-timer", .irq_mask = bcm2836_arm_irqchip_mask_timer_irq, .irq_unmask = bcm2836_arm_irqchip_unmask_timer_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE, }; static void bcm2836_arm_irqchip_mask_pmu_irq(struct irq_data *d) @@ -74,6 +75,7 @@ static struct irq_chip bcm2836_arm_irqchip_pmu = { .name = "bcm2836-pmu", .irq_mask = bcm2836_arm_irqchip_mask_pmu_irq, .irq_unmask = bcm2836_arm_irqchip_unmask_pmu_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE, }; static void bcm2836_arm_irqchip_mask_gpu_irq(struct irq_data *d) @@ -88,6 +90,7 @@ static struct irq_chip bcm2836_arm_irqchip_gpu = { .name = "bcm2836-gpu", .irq_mask = bcm2836_arm_irqchip_mask_gpu_irq, .irq_unmask = bcm2836_arm_irqchip_unmask_gpu_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE, }; static void bcm2836_arm_irqchip_dummy_op(struct irq_data *d) From 7f71507851fc7764b36a3221839607d3a45c2025 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 3 Dec 2024 19:49:28 +0800 Subject: [PATCH 204/366] LoongArch: KVM: Protect kvm_io_bus_{read,write}() with SRCU When we enable lockdep we get such a warning: ============================= WARNING: suspicious RCU usage 6.12.0-rc7+ #1891 Tainted: G W ----------------------------- arch/loongarch/kvm/../../../virt/kvm/kvm_main.c:5945 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by qemu-system-loo/948: #0: 90000001184a00a8 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0xf4/0xe20 [kvm] stack backtrace: CPU: 2 UID: 0 PID: 948 Comm: qemu-system-loo Tainted: G W 6.12.0-rc7+ #1891 Tainted: [W]=WARN Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.0-prebeta9 10/21/2022 Stack : 0000000000000089 9000000005a0db9c 90000000071519c8 900000012c578000 900000012c57b940 0000000000000000 900000012c57b948 9000000007e53788 900000000815bcc8 900000000815bcc0 900000012c57b7b0 0000000000000001 0000000000000001 4b031894b9d6b725 0000000005dec000 9000000100427b00 00000000000003d2 0000000000000001 000000000000002d 0000000000000003 0000000000000030 00000000000003b4 0000000005dec000 0000000000000000 900000000806d000 9000000007e53788 00000000000000b4 0000000000000004 0000000000000004 0000000000000000 0000000000000000 9000000107baf600 9000000008916000 9000000007e53788 9000000005924778 000000001fe001e5 00000000000000b0 0000000000000007 0000000000000000 0000000000071c1d ... Call Trace: [<9000000005924778>] show_stack+0x38/0x180 [<90000000071519c4>] dump_stack_lvl+0x94/0xe4 [<90000000059eb754>] lockdep_rcu_suspicious+0x194/0x240 [] kvm_io_bus_read+0x19c/0x1e0 [kvm] [] kvm_emu_mmio_read+0xd8/0x440 [kvm] [] kvm_handle_read_fault+0x3c/0xe0 [kvm] [] kvm_handle_exit+0x228/0x480 [kvm] Fix it by protecting kvm_io_bus_{read,write}() with SRCU. Cc: stable@vger.kernel.org Reviewed-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/exit.c | 31 +++++++++++++++++++++---------- arch/loongarch/kvm/intc/ipi.c | 6 +++++- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 69f3e3782cc9e..a7893bd01e732 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -156,7 +156,7 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) { - int ret; + int idx, ret; unsigned long *val; u32 addr, rd, rj, opcode; @@ -167,7 +167,6 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) rj = inst.reg2_format.rj; opcode = inst.reg2_format.opcode; addr = vcpu->arch.gprs[rj]; - ret = EMULATE_DO_IOCSR; run->iocsr_io.phys_addr = addr; run->iocsr_io.is_write = 0; val = &vcpu->arch.gprs[rd]; @@ -207,20 +206,28 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) } if (run->iocsr_io.is_write) { - if (!kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (ret == 0) ret = EMULATE_DONE; - else + else { + ret = EMULATE_DO_IOCSR; /* Save data and let user space to write it */ memcpy(run->iocsr_io.data, val, run->iocsr_io.len); - + } trace_kvm_iocsr(KVM_TRACE_IOCSR_WRITE, run->iocsr_io.len, addr, val); } else { - if (!kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (ret == 0) ret = EMULATE_DONE; - else + else { + ret = EMULATE_DO_IOCSR; /* Save register id for iocsr read completion */ vcpu->arch.io_gpr = rd; - + } trace_kvm_iocsr(KVM_TRACE_IOCSR_READ, run->iocsr_io.len, addr, NULL); } @@ -359,7 +366,7 @@ static int kvm_handle_gspr(struct kvm_vcpu *vcpu) int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) { - int ret; + int idx, ret; unsigned int op8, opcode, rd; struct kvm_run *run = vcpu->run; @@ -464,8 +471,10 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) * it need not return to user space to handle the mmio * exception. */ + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, run->mmio.len, &vcpu->arch.gprs[rd]); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (!ret) { update_pc(&vcpu->arch); vcpu->mmio_needed = 0; @@ -531,7 +540,7 @@ int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) { - int ret; + int idx, ret; unsigned int rd, op8, opcode; unsigned long curr_pc, rd_val = 0; struct kvm_run *run = vcpu->run; @@ -631,7 +640,9 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) * it need not return to user space to handle the mmio * exception. */ + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, run->mmio.len, data); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (!ret) return EMULATE_DONE; diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index a233a323e2957..93f4acd445236 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -98,7 +98,7 @@ static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) { - int i, ret; + int i, idx, ret; uint32_t val = 0, mask = 0; /* @@ -107,7 +107,9 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) */ if ((data >> 27) & 0xf) { /* Read the old val */ + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); return ret; @@ -121,7 +123,9 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) val &= mask; } val |= ((uint32_t)(data >> 32) & ~mask); + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); From a07d2d7930c75e6bf88683b376d09ab1f3fed2aa Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Tue, 3 Dec 2024 11:31:05 +0100 Subject: [PATCH 205/366] io_uring: Change res2 parameter type in io_uring_cmd_done Change the type of the res2 parameter in io_uring_cmd_done from ssize_t to u64. This aligns the parameter type with io_req_set_cqe32_extra, which expects u64 arguments. The change eliminates potential issues on 32-bit architectures where ssize_t might be 32-bit. Only user of passing res2 is drivers/nvme/host/ioctl.c and it actually passes u64. Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Cc: stable@vger.kernel.org Reviewed-by: Kanchan Joshi Tested-by: Li Zetao Reviewed-by: Li Zetao Signed-off-by: Bernd Schubert Link: https://lore.kernel.org/r/20241203-io_uring_cmd_done-res2-as-u64-v2-1-5e59ae617151@ddn.com Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 4 ++-- io_uring/uring_cmd.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 578a3fdf5c719..0d5448c0b86cd 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -43,7 +43,7 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, * Note: the caller should never hard code @issue_flags and is only allowed * to pass the mask provided by the core io_uring code. */ -void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, +void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, u64 res2, unsigned issue_flags); void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, @@ -67,7 +67,7 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, return -EOPNOTSUPP; } static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, - ssize_t ret2, unsigned issue_flags) + u64 ret2, unsigned issue_flags) { } static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index d9fb2143f56ff..af842e9b4eb97 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -151,7 +151,7 @@ static inline void io_req_set_cqe32_extra(struct io_kiocb *req, * Called by consumers of io_uring_cmd, if they originally returned * -EIOCBQUEUED upon receiving the command. */ -void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2, +void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2, unsigned issue_flags) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); From 3c93e4e4a2aeb92ea99e1eac3e1180f5ed49538c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 30 Nov 2024 18:45:21 +0900 Subject: [PATCH 206/366] block: rnull: add missing MODULE_DESCRIPTION Add the missing description to fix the following warning: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/block/rnull_mod.o Signed-off-by: FUJITA Tomonori Acked-by: Andreas Hindborg Link: https://lore.kernel.org/r/20241130094521.193924-1-fujita.tomonori@gmail.com Signed-off-by: Jens Axboe --- drivers/block/rnull.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rnull.rs b/drivers/block/rnull.rs index 5de7223beb4d5..9cca05dcf772c 100644 --- a/drivers/block/rnull.rs +++ b/drivers/block/rnull.rs @@ -28,6 +28,7 @@ module! { type: NullBlkModule, name: "rnull_mod", author: "Andreas Hindborg", + description: "Rust implementation of the C null block driver", license: "GPL v2", } From 9d9f204bdf7243bfc2c6a023d63c63f7cbf8ef0b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Dec 2024 11:40:43 +0100 Subject: [PATCH 207/366] genirq/proc: Add missing space separator back The recent conversion of show_interrupts() to seq_put_decimal_ull_width() caused a formatting regression as it drops a previosuly existing space separator. Add it back by unconditionally inserting a space after the interrupt counts and removing the extra leading space from the chip name prints. Fixes: f9ed1f7c2e26 ("genirq/proc: Use seq_put_decimal_ull_width() for decimal values") Reported-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Tested-by: Geert Uytterhoeven Reviewed-by: David Wang <00107082@163.com> Link: https://lore.kernel.org/all/87zfldt5g4.ffs@tglx Closes: https://lore.kernel.org/all/4ce18851-6e9f-bbe-8319-cc5e69fb45c@linux-m68k.org --- kernel/irq/proc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index f36c33bd2da46..8e29809de38de 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -501,17 +501,18 @@ int show_interrupts(struct seq_file *p, void *v) seq_put_decimal_ull_width(p, " ", cnt, 10); } + seq_putc(p, ' '); raw_spin_lock_irqsave(&desc->lock, flags); if (desc->irq_data.chip) { if (desc->irq_data.chip->irq_print_chip) desc->irq_data.chip->irq_print_chip(&desc->irq_data, p); else if (desc->irq_data.chip->name) - seq_printf(p, " %8s", desc->irq_data.chip->name); + seq_printf(p, "%8s", desc->irq_data.chip->name); else - seq_printf(p, " %8s", "-"); + seq_printf(p, "%8s", "-"); } else { - seq_printf(p, " %8s", "None"); + seq_printf(p, "%8s", "None"); } if (desc->irq_data.domain) seq_printf(p, " %*lu", prec, desc->irq_data.hwirq); From 7937a1bf32e31f80032c71511cc24d707753d07d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Nov 2024 19:51:59 -0800 Subject: [PATCH 208/366] iommufd: Fix typos in kernel-doc comments Fix typos/spellos in kernel-doc comments for readability. Fixes: aad37e71d5c4 ("iommufd: IOCTLs for the io_pagetable") Fixes: b7a0855eb95f ("iommu: Add new flag to explictly request PASID capable domain") Fixes: d68beb276ba2 ("iommu/arm-smmu-v3: Support IOMMU_HWPT_INVALIDATE using a VIOMMU object") Link: https://patch.msgid.link/r/20241128035159.374624-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Acked-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 059b6537f2b7b..d7ce322fb93a5 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -297,7 +297,7 @@ struct iommu_ioas_unmap { * ioctl(IOMMU_OPTION_HUGE_PAGES) * @IOMMU_OPTION_RLIMIT_MODE: * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege - * to invoke this. Value 0 (default) is user based accouting, 1 uses process + * to invoke this. Value 0 (default) is user based accounting, 1 uses process * based accounting. Global option, object_id must be 0 * @IOMMU_OPTION_HUGE_PAGES: * Value 1 (default) allows contiguous pages to be combined when generating @@ -390,7 +390,7 @@ struct iommu_vfio_ioas { * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The * domain can be attached to any PASID on the device. * Any domain attached to the non-PASID part of the - * device must also be flaged, otherwise attaching a + * device must also be flagged, otherwise attaching a * PASID will blocked. * If IOMMU does not support PASID it will return * error (-EOPNOTSUPP). @@ -766,7 +766,7 @@ struct iommu_hwpt_vtd_s1_invalidate { }; /** - * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation + * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cache invalidation * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3) * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ. * Must be little-endian. From af7f4780514f850322b2959032ecaa96e4b26472 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 3 Dec 2024 00:02:54 -0800 Subject: [PATCH 209/366] iommufd: Fix out_fput in iommufd_fault_alloc() As fput() calls the file->f_op->release op, where fault obj and ictx are getting released, there is no need to release these two after fput() one more time, which would result in imbalanced refcounts: refcount_t: decrement hit 0; leaking memory. WARNING: CPU: 48 PID: 2369 at lib/refcount.c:31 refcount_warn_saturate+0x60/0x230 Call trace: refcount_warn_saturate+0x60/0x230 (P) refcount_warn_saturate+0x60/0x230 (L) iommufd_fault_fops_release+0x9c/0xe0 [iommufd] ... VFS: Close: file count is 0 (f_op=iommufd_fops [iommufd]) WARNING: CPU: 48 PID: 2369 at fs/open.c:1507 filp_flush+0x3c/0xf0 Call trace: filp_flush+0x3c/0xf0 (P) filp_flush+0x3c/0xf0 (L) __arm64_sys_close+0x34/0x98 ... imbalanced put on file reference count WARNING: CPU: 48 PID: 2369 at fs/file.c:74 __file_ref_put+0x100/0x138 Call trace: __file_ref_put+0x100/0x138 (P) __file_ref_put+0x100/0x138 (L) __fput_sync+0x4c/0xd0 Drop those two lines to fix the warnings above. Cc: stable@vger.kernel.org Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/b5651beb3a6b1adeef26fffac24607353bf67ba1.1733212723.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/fault.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 053b0e30f55a9..1fe804e28a866 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -420,8 +420,6 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) put_unused_fd(fdno); out_fput: fput(filep); - refcount_dec(&fault->obj.users); - iommufd_ctx_put(fault->ictx); out_abort: iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj); From a8c9df25f90e5155b70f5e2474c7299841e3335a Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 3 Dec 2024 00:02:55 -0800 Subject: [PATCH 210/366] iommufd/selftest: Cover IOMMU_FAULT_QUEUE_ALLOC in iommufd_fail_nth This was missing in the series introducing the fault object. Thus, add it. Link: https://patch.msgid.link/r/d61b9b7f73276cc8f1aef9602bd35c486917506e.1733212723.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd_fail_nth.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index 22f6fd5f0f741..64b1f8e1b0cf1 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -615,7 +615,12 @@ TEST_FAIL_NTH(basic_fail_nth, access_pin_domain) /* device.c */ TEST_FAIL_NTH(basic_fail_nth, device) { + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; struct iommu_test_hw_info info; + uint32_t fault_id, fault_fd; + uint32_t fault_hwpt_id; uint32_t ioas_id; uint32_t ioas_id2; uint32_t stdev_id; @@ -678,6 +683,15 @@ TEST_FAIL_NTH(basic_fail_nth, device) if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id)) return -1; + if (_test_ioctl_fault_alloc(self->fd, &fault_id, &fault_fd)) + return -1; + close(fault_fd); + + if (_test_cmd_hwpt_alloc(self->fd, idev_id, hwpt_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID, &fault_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data))) + return -1; + return 0; } From 62aa6f2ede976dfb3539e59ee55c62cfd3c3faa3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Dec 2024 19:21:05 +0900 Subject: [PATCH 211/366] scripts/nsdeps: get 'make nsdeps' working again Since commit cdd30ebb1b9f ("module: Convert symbol namespace to string literal"), when MODULE_IMPORT_NS() is missing, 'make nsdeps' inserts pointless code: MODULE_IMPORT_NS("ns"); Here, "ns" is not a namespace, but the variable in the semantic patch. It must not be quoted. Instead, a string literal must be passed to Coccinelle. Fixes: cdd30ebb1b9f ("module: Convert symbol namespace to string literal") Signed-off-by: Masahiro Yamada Signed-off-by: Linus Torvalds --- scripts/coccinelle/misc/add_namespace.cocci | 4 ++-- scripts/nsdeps | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/coccinelle/misc/add_namespace.cocci b/scripts/coccinelle/misc/add_namespace.cocci index d66c4e3cd9468..cbf1614163cb8 100644 --- a/scripts/coccinelle/misc/add_namespace.cocci +++ b/scripts/coccinelle/misc/add_namespace.cocci @@ -13,7 +13,7 @@ virtual report declarer name MODULE_IMPORT_NS; identifier virtual.ns; @@ -MODULE_IMPORT_NS("ns"); +MODULE_IMPORT_NS(ns); // Add missing imports, but only adjacent to a MODULE_LICENSE statement. // That ensures we are adding it only to the main module source file. @@ -23,7 +23,7 @@ expression license; identifier virtual.ns; @@ MODULE_LICENSE(license); -+ MODULE_IMPORT_NS("ns"); ++ MODULE_IMPORT_NS(ns); // Dummy rule for report mode that would otherwise be empty and make spatch // fail ("No rules apply.") diff --git a/scripts/nsdeps b/scripts/nsdeps index bab4ec870e505..a3372166ac01b 100644 --- a/scripts/nsdeps +++ b/scripts/nsdeps @@ -21,7 +21,7 @@ fi generate_deps_for_ns() { $SPATCH --very-quiet --in-place --sp-file \ - $srctree/scripts/coccinelle/misc/add_namespace.cocci -D nsdeps -D ns=$1 $2 + $srctree/scripts/coccinelle/misc/add_namespace.cocci -D nsdeps -D ns=\"$1\" $2 } generate_deps() { From 3727b1a7ca23050f8c7fe3d6a6884fc8038b8336 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Dec 2024 19:21:06 +0900 Subject: [PATCH 212/366] doc: module: revert misconversions for MODULE_IMPORT_NS() This reverts the misconversions introduced by commit cdd30ebb1b9f ("module: Convert symbol namespace to string literal"). The affected descriptions refer to MODULE_IMPORT_NS() tags in general, rather than suggesting the use of the empty string ("") as the namespace. Fixes: cdd30ebb1b9f ("module: Convert symbol namespace to string literal") Signed-off-by: Masahiro Yamada Signed-off-by: Linus Torvalds --- Documentation/core-api/symbol-namespaces.rst | 4 ++-- .../translations/it_IT/core-api/symbol-namespaces.rst | 4 ++-- .../translations/zh_CN/core-api/symbol-namespaces.rst | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst index d04639fd8a41c..55886b4e6aea5 100644 --- a/Documentation/core-api/symbol-namespaces.rst +++ b/Documentation/core-api/symbol-namespaces.rst @@ -106,7 +106,7 @@ inspected with modinfo:: [...] -It is advisable to add the MODULE_IMPORT_NS("") statement close to other module +It is advisable to add the MODULE_IMPORT_NS() statement close to other module metadata definitions like MODULE_AUTHOR() or MODULE_LICENSE(). Refer to section 5. for a way to create missing import statements automatically. @@ -128,7 +128,7 @@ enable loading regardless, but will emit a warning. Missing namespaces imports can easily be detected at build time. In fact, modpost will emit a warning if a module uses a symbol from a namespace without importing it. -MODULE_IMPORT_NS("") statements will usually be added at a definite location +MODULE_IMPORT_NS() statements will usually be added at a definite location (along with other module meta data). To make the life of module authors (and subsystem maintainers) easier, a script and make target is available to fixup missing imports. Fixing missing imports can be done with:: diff --git a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst index 55a7978c662b2..df152c2c55db0 100644 --- a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst +++ b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst @@ -107,7 +107,7 @@ modinfo:: [...] -Si consiglia di posizionare la dichiarazione MODULE_IMPORT_NS("") vicino +Si consiglia di posizionare la dichiarazione MODULE_IMPORT_NS() vicino ai metadati del modulo come MODULE_AUTHOR() o MODULE_LICENSE(). Fate riferimento alla sezione 5. per creare automaticamente le importazioni mancanti. @@ -131,7 +131,7 @@ emetterà un avviso. La mancanza di un'importazione può essere individuata facilmente al momento della compilazione. Infatti, modpost emetterà un avviso se il modulo usa un simbolo da uno spazio dei nomi che non è stato importato. -La dichiarazione MODULE_IMPORT_NS("") viene solitamente aggiunta in un posto +La dichiarazione MODULE_IMPORT_NS() viene solitamente aggiunta in un posto ben definito (assieme agli altri metadati del modulo). Per facilitare la vita di chi scrive moduli (e i manutentori di sottosistemi), esistono uno script e un target make per correggere le importazioni mancanti. Questo può diff --git a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst index 5e698f58b3fec..fc7f3797dceef 100644 --- a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst +++ b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst @@ -99,7 +99,7 @@ [...] -建议将 MODULE_IMPORT_NS("") 语句添加到靠近其他模块元数据定义的地方, +建议将 MODULE_IMPORT_NS() 语句添加到靠近其他模块元数据定义的地方, 如 MODULE_AUTHOR() 或 MODULE_LICENSE() 。关于自动创建缺失的导入 语句的方法,请参考第5节。 @@ -118,7 +118,7 @@ EINVAL方式失败。要允许加载不满足这个前提条件的模块,可 缺少命名空间的导入可以在构建时很容易被检测到。事实上,如果一个模块 使用了一个命名空间的符号而没有导入它,modpost会发出警告。 -MODULE_IMPORT_NS("")语句通常会被添加到一个明确的位置(和其他模块元 +MODULE_IMPORT_NS()语句通常会被添加到一个明确的位置(和其他模块元 数据一起)。为了使模块作者(和子系统维护者)的生活更加轻松,我们提 供了一个脚本和make目标来修复丢失的导入。修复丢失的导入可以用:: From ceb8bf2ceaa77fe222fe8fe32cb7789c9099ddf1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Dec 2024 19:21:07 +0900 Subject: [PATCH 213/366] module: Convert default symbol namespace to string literal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit cdd30ebb1b9f ("module: Convert symbol namespace to string literal") only converted MODULE_IMPORT_NS() and EXPORT_SYMBOL_NS(), leaving DEFAULT_SYMBOL_NAMESPACE as a macro expansion. This commit converts DEFAULT_SYMBOL_NAMESPACE in the same way to avoid annoyance for the default namespace as well. Signed-off-by: Masahiro Yamada Reviewed-by: Uwe Kleine-König Signed-off-by: Linus Torvalds --- Documentation/core-api/symbol-namespaces.rst | 4 ++-- .../translations/it_IT/core-api/symbol-namespaces.rst | 4 ++-- .../translations/zh_CN/core-api/symbol-namespaces.rst | 4 ++-- drivers/cdx/Makefile | 2 +- drivers/crypto/intel/iaa/Makefile | 2 +- drivers/crypto/intel/qat/qat_common/Makefile | 2 +- drivers/dma/idxd/Makefile | 2 +- drivers/gpio/gpio-idio-16.c | 2 +- drivers/hwmon/nct6775-core.c | 2 +- drivers/i2c/busses/i2c-designware-common.c | 2 +- drivers/i2c/busses/i2c-designware-master.c | 2 +- drivers/i2c/busses/i2c-designware-slave.c | 2 +- drivers/pwm/core.c | 2 +- drivers/pwm/pwm-dwc-core.c | 2 +- drivers/pwm/pwm-lpss.c | 2 +- drivers/tty/serial/sc16is7xx.c | 2 +- drivers/usb/storage/Makefile | 2 +- include/linux/export.h | 2 +- 18 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst index 55886b4e6aea5..27a9cccc792c1 100644 --- a/Documentation/core-api/symbol-namespaces.rst +++ b/Documentation/core-api/symbol-namespaces.rst @@ -68,7 +68,7 @@ is to define the default namespace in the ``Makefile`` of the subsystem. E.g. to export all symbols defined in usb-common into the namespace USB_COMMON, add a line like this to drivers/usb/common/Makefile:: - ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_COMMON + ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"USB_COMMON"' That will affect all EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL() statements. A symbol exported with EXPORT_SYMBOL_NS() while this definition is present, will @@ -79,7 +79,7 @@ A second option to define the default namespace is directly in the compilation unit as preprocessor statement. The above example would then read:: #undef DEFAULT_SYMBOL_NAMESPACE - #define DEFAULT_SYMBOL_NAMESPACE USB_COMMON + #define DEFAULT_SYMBOL_NAMESPACE "USB_COMMON" within the corresponding compilation unit before any EXPORT_SYMBOL macro is used. diff --git a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst index df152c2c55db0..6ee7139885316 100644 --- a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst +++ b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst @@ -69,7 +69,7 @@ Per esempio per esportare tutti i simboli definiti in usb-common nello spazio dei nomi USB_COMMON, si può aggiungere la seguente linea in drivers/usb/common/Makefile:: - ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_COMMON + ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"USB_COMMON"' Questo cambierà tutte le macro EXPORT_SYMBOL() ed EXPORT_SYMBOL_GPL(). Invece, un simbolo esportato con EXPORT_SYMBOL_NS() non verrà cambiato e il simbolo @@ -79,7 +79,7 @@ Una seconda possibilità è quella di definire il simbolo di preprocessore direttamente nei file da compilare. L'esempio precedente diventerebbe:: #undef DEFAULT_SYMBOL_NAMESPACE - #define DEFAULT_SYMBOL_NAMESPACE USB_COMMON + #define DEFAULT_SYMBOL_NAMESPACE "USB_COMMON" Questo va messo prima di un qualsiasi uso di EXPORT_SYMBOL. diff --git a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst index fc7f3797dceef..b1bec219912d5 100644 --- a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst +++ b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst @@ -66,7 +66,7 @@ 子系统的 ``Makefile`` 中定义默认命名空间。例如,如果要将usb-common中定义的所有符号导 出到USB_COMMON命名空间,可以在drivers/usb/common/Makefile中添加这样一行:: - ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_COMMON + ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"USB_COMMON"' 这将影响所有 EXPORT_SYMBOL() 和 EXPORT_SYMBOL_GPL() 语句。当这个定义存在时, 用EXPORT_SYMBOL_NS()导出的符号仍然会被导出到作为命名空间参数传递的命名空间中, @@ -76,7 +76,7 @@ 成:: #undef DEFAULT_SYMBOL_NAMESPACE - #define DEFAULT_SYMBOL_NAMESPACE USB_COMMON + #define DEFAULT_SYMBOL_NAMESPACE "USB_COMMON" 应置于相关编译单元中任何 EXPORT_SYMBOL 宏之前 diff --git a/drivers/cdx/Makefile b/drivers/cdx/Makefile index 749a3295c2bdc..3ca7068a30525 100644 --- a/drivers/cdx/Makefile +++ b/drivers/cdx/Makefile @@ -5,7 +5,7 @@ # Copyright (C) 2022-2023, Advanced Micro Devices, Inc. # -ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=CDX_BUS +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"CDX_BUS"' obj-$(CONFIG_CDX_BUS) += cdx.o controller/ diff --git a/drivers/crypto/intel/iaa/Makefile b/drivers/crypto/intel/iaa/Makefile index b64b208d23440..55bda7770fac7 100644 --- a/drivers/crypto/intel/iaa/Makefile +++ b/drivers/crypto/intel/iaa/Makefile @@ -3,7 +3,7 @@ # Makefile for IAA crypto device drivers # -ccflags-y += -I $(srctree)/drivers/dma/idxd -DDEFAULT_SYMBOL_NAMESPACE=IDXD +ccflags-y += -I $(srctree)/drivers/dma/idxd -DDEFAULT_SYMBOL_NAMESPACE='"IDXD"' obj-$(CONFIG_CRYPTO_DEV_IAA_CRYPTO) := iaa_crypto.o diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index eac73cbfdd38e..7acf9c576149b 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CRYPTO_DEV_QAT) += intel_qat.o -ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=CRYPTO_QAT +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"CRYPTO_QAT"' intel_qat-objs := adf_cfg.o \ adf_isr.o \ adf_ctl_drv.o \ diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index 2b4a0d406e1e7..9ff9d7b87b649 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -1,4 +1,4 @@ -ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"IDXD"' obj-$(CONFIG_INTEL_IDXD_BUS) += idxd_bus.o idxd_bus-y := bus.o diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c index 53b1eb876a125..2c95125892972 100644 --- a/drivers/gpio/gpio-idio-16.c +++ b/drivers/gpio/gpio-idio-16.c @@ -14,7 +14,7 @@ #include "gpio-idio-16.h" -#define DEFAULT_SYMBOL_NAMESPACE GPIO_IDIO_16 +#define DEFAULT_SYMBOL_NAMESPACE "GPIO_IDIO_16" #define IDIO_16_DAT_BASE 0x0 #define IDIO_16_OUT_BASE IDIO_16_DAT_BASE diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c index ee04795b98aab..c243b51837d2f 100644 --- a/drivers/hwmon/nct6775-core.c +++ b/drivers/hwmon/nct6775-core.c @@ -57,7 +57,7 @@ #include "nct6775.h" #undef DEFAULT_SYMBOL_NAMESPACE -#define DEFAULT_SYMBOL_NAMESPACE HWMON_NCT6775 +#define DEFAULT_SYMBOL_NAMESPACE "HWMON_NCT6775" #define USE_ALTERNATE diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index 857783d458fbd..183a35038eef9 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -29,7 +29,7 @@ #include #include -#define DEFAULT_SYMBOL_NAMESPACE I2C_DW_COMMON +#define DEFAULT_SYMBOL_NAMESPACE "I2C_DW_COMMON" #include "i2c-designware-core.h" diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index 69705cc7e6072..c8cbe5b1aeb19 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -22,7 +22,7 @@ #include #include -#define DEFAULT_SYMBOL_NAMESPACE I2C_DW +#define DEFAULT_SYMBOL_NAMESPACE "I2C_DW" #include "i2c-designware-core.h" diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index a98dcddacecef..dc2b788eac5bd 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -16,7 +16,7 @@ #include #include -#define DEFAULT_SYMBOL_NAMESPACE I2C_DW +#define DEFAULT_SYMBOL_NAMESPACE "I2C_DW" #include "i2c-designware-core.h" diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 9c733877e98e4..675b252d9c8ce 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -6,7 +6,7 @@ * Copyright (C) 2011-2012 Avionic Design GmbH */ -#define DEFAULT_SYMBOL_NAMESPACE PWM +#define DEFAULT_SYMBOL_NAMESPACE "PWM" #include #include diff --git a/drivers/pwm/pwm-dwc-core.c b/drivers/pwm/pwm-dwc-core.c index c8425493b95d8..6dabec93a3c64 100644 --- a/drivers/pwm/pwm-dwc-core.c +++ b/drivers/pwm/pwm-dwc-core.c @@ -9,7 +9,7 @@ * Author: Raymond Tan */ -#define DEFAULT_SYMBOL_NAMESPACE dwc_pwm +#define DEFAULT_SYMBOL_NAMESPACE "dwc_pwm" #include #include diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 867e2bc8c601c..3b99feb3bb491 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -19,7 +19,7 @@ #include #include -#define DEFAULT_SYMBOL_NAMESPACE PWM_LPSS +#define DEFAULT_SYMBOL_NAMESPACE "PWM_LPSS" #include "pwm-lpss.h" diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 9d0c971e49f59..a3093e09309ff 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -8,7 +8,7 @@ */ #undef DEFAULT_SYMBOL_NAMESPACE -#define DEFAULT_SYMBOL_NAMESPACE SERIAL_NXP_SC16IS7XX +#define DEFAULT_SYMBOL_NAMESPACE "SERIAL_NXP_SC16IS7XX" #include #include diff --git a/drivers/usb/storage/Makefile b/drivers/usb/storage/Makefile index 46635fa4a3405..28db337f190bf 100644 --- a/drivers/usb/storage/Makefile +++ b/drivers/usb/storage/Makefile @@ -8,7 +8,7 @@ ccflags-y := -I $(srctree)/drivers/scsi -ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_STORAGE +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"USB_STORAGE"' obj-$(CONFIG_USB_UAS) += uas.o obj-$(CONFIG_USB_STORAGE) += usb-storage.o diff --git a/include/linux/export.h b/include/linux/export.h index f5f3950a1e42f..2633df4d31e62 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -60,7 +60,7 @@ #endif #ifdef DEFAULT_SYMBOL_NAMESPACE -#define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE)) +#define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, DEFAULT_SYMBOL_NAMESPACE) #else #define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, "") #endif From 9151299ee5101e03eeed544c1280b0e14b89a8a4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Dec 2024 17:27:40 +0100 Subject: [PATCH 214/366] irqchip/stm32mp-exti: CONFIG_STM32MP_EXTI should not default to y when compile-testing Merely enabling compile-testing should not enable additional functionality. Fixes: 0be58e0553812fcb ("irqchip/stm32mp-exti: Allow building as module") Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/ef5ec063b23522058f92087e072419ea233acfe9.1733243115.git.geert+renesas@glider.be --- drivers/irqchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 55d7122121e28..9bee02db16439 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -415,7 +415,7 @@ config PARTITION_PERCPU config STM32MP_EXTI tristate "STM32MP extended interrupts and event controller" depends on (ARCH_STM32 && !ARM_SINGLE_ARMV7M) || COMPILE_TEST - default y + default ARCH_STM32 && !ARM_SINGLE_ARMV7M select IRQ_DOMAIN_HIERARCHY select GENERIC_IRQ_CHIP help From 56a708742a8bf127eb66798bfc9c9516c61f9930 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Mon, 25 Nov 2024 09:16:50 -0800 Subject: [PATCH 215/366] arm64: mm: Fix zone_dma_limit calculation Commit ba0fb44aed47 ("dma-mapping: replace zone_dma_bits by zone_dma_limit") and subsequent patches changed how zone_dma_limit is calculated to allow a reduced ZONE_DMA even when RAM starts above 4GB. Commit 122c234ef4e1 ("arm64: mm: keep low RAM dma zone") further fixed this to ensure ZONE_DMA remains below U32_MAX if RAM starts below 4GB, especially on platforms that do not have IORT or DT description of the device DMA ranges. While zone boundaries calculation was fixed by the latter commit, zone_dma_limit, used to determine the GFP_DMA flag in the core code, was not updated. This results in excessive use of GFP_DMA and unnecessary ZONE_DMA allocations on some platforms. Update zone_dma_limit to match the actual upper bound of ZONE_DMA. Fixes: ba0fb44aed47 ("dma-mapping: replace zone_dma_bits by zone_dma_limit") Cc: # 6.12.x Reported-by: Yutang Jiang Tested-by: Yutang Jiang Signed-off-by: Yang Shi Link: https://lore.kernel.org/r/20241125171650.77424-1-yang@os.amperecomputing.com [catalin.marinas@arm.com: some tweaking of the commit log] Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d21f67d67cf5f..ccdef53872a0b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -117,15 +117,6 @@ static void __init arch_reserve_crashkernel(void) static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) { - /** - * Information we get from firmware (e.g. DT dma-ranges) describe DMA - * bus constraints. Devices using DMA might have their own limitations. - * Some of them rely on DMA zone in low 32-bit memory. Keep low RAM - * DMA zone on platforms that have RAM there. - */ - if (memblock_start_of_DRAM() < U32_MAX) - zone_limit = min(zone_limit, U32_MAX); - return min(zone_limit, memblock_end_of_DRAM() - 1) + 1; } @@ -141,6 +132,14 @@ static void __init zone_sizes_init(void) acpi_zone_dma_limit = acpi_iort_dma_get_max_cpu_address(); dt_zone_dma_limit = of_dma_get_max_cpu_address(NULL); zone_dma_limit = min(dt_zone_dma_limit, acpi_zone_dma_limit); + /* + * Information we get from firmware (e.g. DT dma-ranges) describe DMA + * bus constraints. Devices using DMA might have their own limitations. + * Some of them rely on DMA zone in low 32-bit memory. Keep low RAM + * DMA zone on platforms that have RAM there. + */ + if (memblock_start_of_DRAM() < U32_MAX) + zone_dma_limit = min(zone_dma_limit, U32_MAX); arm64_dma_phys_limit = max_zone_phys(zone_dma_limit); max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif From 2ca704f55e22b7b00cc7025953091af3c82fa5c0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2024 14:51:36 -0400 Subject: [PATCH 216/366] iommu/arm-smmu-v3: Improve uAPI comment for IOMMU_HW_INFO_TYPE_ARM_SMMUV3 Be specific about what fields should be accessed in the idr result and give other guidance to the VMM on how it should generate the vIDR. Discussion on the list, and review of the qemu implementation understood this needs to be clearer and more detailed. Link: https://patch.msgid.link/r/0-v1-191e5e24cec3+3b0-iommufd_smmuv3_hwinf_jgg@nvidia.com Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index d7ce322fb93a5..34810f6ae2b5a 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -558,16 +558,25 @@ struct iommu_hw_info_vtd { * For the details of @idr, @iidr and @aidr, please refer to the chapters * from 6.3.1 to 6.3.6 in the SMMUv3 Spec. * - * User space should read the underlying ARM SMMUv3 hardware information for - * the list of supported features. - * - * Note that these values reflect the raw HW capability, without any insight if - * any required kernel driver support is present. Bits may be set indicating the - * HW has functionality that is lacking kernel software support, such as BTM. If - * a VMM is using this information to construct emulated copies of these - * registers it should only forward bits that it knows it can support. - * - * In future, presence of required kernel support will be indicated in flags. + * This reports the raw HW capability, and not all bits are meaningful to be + * read by userspace. Only the following fields should be used: + * + * idr[0]: ST_LEVEL, TERM_MODEL, STALL_MODEL, TTENDIAN , CD2L, ASID16, TTF + * idr[1]: SIDSIZE, SSIDSIZE + * idr[3]: BBML, RIL + * idr[5]: VAX, GRAN64K, GRAN16K, GRAN4K + * + * - S1P should be assumed to be true if a NESTED HWPT can be created + * - VFIO/iommufd only support platforms with COHACC, it should be assumed to be + * true. + * - ATS is a per-device property. If the VMM describes any devices as ATS + * capable in ACPI/DT it should set the corresponding idr. + * + * This list may expand in future (eg E0PD, AIE, PBHA, D128, DS etc). It is + * important that VMMs do not read bits outside the list to allow for + * compatibility with future kernels. Several features in the SMMUv3 + * architecture are not currently supported by the kernel for nesting: HTTU, + * BTM, MPAM and others. */ struct iommu_hw_info_arm_smmuv3 { __u32 flags; From 8d09e2d569f6e34301387f24433b42062517ca85 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Dec 2024 17:03:59 +0000 Subject: [PATCH 217/366] arm64: patching: avoid early page_to_phys() When arm64 is configured with CONFIG_DEBUG_VIRTUAL=y, a warning is printed from the patching code because patch_map(), e.g. | ------------[ cut here ]------------ | WARNING: CPU: 0 PID: 0 at arch/arm64/kernel/patching.c:45 patch_map.constprop.0+0x120/0xd00 | CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.13.0-rc1-00002-ge1a5d6c6be55 #1 | Hardware name: linux,dummy-virt (DT) | pstate: 800003c5 (Nzcv DAIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : patch_map.constprop.0+0x120/0xd00 | lr : patch_map.constprop.0+0x120/0xd00 | sp : ffffa9bb312a79a0 | x29: ffffa9bb312a79a0 x28: 0000000000000001 x27: 0000000000000001 | x26: 0000000000000000 x25: 0000000000000000 x24: 00000000000402e8 | x23: ffffa9bb2c94c1c8 x22: ffffa9bb2c94c000 x21: ffffa9bb222e883c | x20: 0000000000000002 x19: ffffc1ffc100ba40 x18: ffffa9bb2cf0f21c | x17: 0000000000000006 x16: 0000000000000000 x15: 0000000000000004 | x14: 1ffff5376625b4ac x13: ffff753766a67fb8 x12: ffff753766919cd1 | x11: 0000000000000003 x10: 1ffff5376625b4c3 x9 : 1ffff5376625b4af | x8 : ffff753766254f0a x7 : 0000000041b58ab3 x6 : ffff753766254f18 | x5 : ffffa9bb312d9bc0 x4 : 0000000000000000 x3 : ffffa9bb29bd90e4 | x2 : 0000000000000002 x1 : ffffa9bb312d9bc0 x0 : 0000000000000000 | Call trace: | patch_map.constprop.0+0x120/0xd00 (P) | patch_map.constprop.0+0x120/0xd00 (L) | __aarch64_insn_write+0xa8/0x120 | aarch64_insn_patch_text_nosync+0x4c/0xb8 | arch_jump_label_transform_queue+0x7c/0x100 | jump_label_update+0x154/0x460 | static_key_enable_cpuslocked+0x1d8/0x280 | static_key_enable+0x2c/0x48 | early_randomize_kstack_offset+0x104/0x168 | do_early_param+0xe4/0x148 | parse_args+0x3a4/0x838 | parse_early_options+0x50/0x68 | parse_early_param+0x58/0xe0 | setup_arch+0x78/0x1f0 | start_kernel+0xa0/0x530 | __primary_switched+0x8c/0xa0 | irq event stamp: 0 | hardirqs last enabled at (0): [<0000000000000000>] 0x0 | hardirqs last disabled at (0): [<0000000000000000>] 0x0 | softirqs last enabled at (0): [<0000000000000000>] 0x0 | softirqs last disabled at (0): [<0000000000000000>] 0x0 | ---[ end trace 0000000000000000 ]--- The warning has been produced since commit: 3e25d5a49f99b75b ("asm-generic: add an optional pfn_valid check to page_to_phys") ... which added a pfn_valid() check into page_to_phys(), and at this point in boot pfn_valid() will always return false because the vmemmap has not yet been initialized and there are no valid mem_sections yet. Before that commit, the arithmetic performed by page_to_phys() would give the expected physical address, though it is somewhat dubious to use vmemmap addresses before the vmemmap has been initialized. Aside from kernel image addresses, all executable code should be allocated from execmem (where all allocations will fall within the vmalloc area), and so there's no need for the fallback case when CONFIG_EXECMEM=n. Simplify patch_map() accordingly, directly converting kernel image addresses and removing the redundant fallback case. Fixes: 3e25d5a49f99 ("asm-generic: add an optional pfn_valid check to page_to_phys") Signed-off-by: Mark Rutland Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Mike Rapoport Cc: Thomas Huth Cc: Will Deacon Link: https://lore.kernel.org/r/20241202170359.1475019-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/patching.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 7f99723fbb8c4..1041bc67a3eee 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -30,20 +30,17 @@ static bool is_image_text(unsigned long addr) static void __kprobes *patch_map(void *addr, int fixmap) { - unsigned long uintaddr = (uintptr_t) addr; - bool image = is_image_text(uintaddr); - struct page *page; - - if (image) - page = phys_to_page(__pa_symbol(addr)); - else if (IS_ENABLED(CONFIG_EXECMEM)) - page = vmalloc_to_page(addr); - else - return addr; - - BUG_ON(!page); - return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + - (uintaddr & ~PAGE_MASK)); + phys_addr_t phys; + + if (is_image_text((unsigned long)addr)) { + phys = __pa_symbol(addr); + } else { + struct page *page = vmalloc_to_page(addr); + BUG_ON(!page); + phys = page_to_phys(page) + offset_in_page(addr); + } + + return (void *)set_fixmap_offset(fixmap, phys); } static void __kprobes patch_unmap(int fixmap) From d44679fb954ffea961036ed1aeb7d65035f78489 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 2 Dec 2024 14:57:29 +0000 Subject: [PATCH 218/366] drivers/virt: pkvm: Don't fail ioremap() call if MMIO_GUARD fails Calling the MMIO_GUARD hypercall from guests which have not been enrolled (e.g. because they are running without pvmfw) results in -EINVAL being returned. In this case, MMIO_GUARD is not active and so we can simply proceed with the normal ioremap() routine. Don't fail ioremap() if MMIO_GUARD fails; instead WARN_ON_ONCE() to highlight that the pvm environment is slightly wonky. Fixes: 0f1269495800 ("drivers/virt: pkvm: Intercept ioremap using pKVM MMIO_GUARD hypercall") Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20241202145731.6422-2-will@kernel.org Signed-off-by: Catalin Marinas --- drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c b/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c index 56a3859dda8a1..4230b817a80bd 100644 --- a/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c +++ b/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c @@ -87,12 +87,8 @@ static int mmio_guard_ioremap_hook(phys_addr_t phys, size_t size, while (phys < end) { const int func_id = ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_FUNC_ID; - int err; - - err = arm_smccc_do_one_page(func_id, phys); - if (err) - return err; + WARN_ON_ONCE(arm_smccc_do_one_page(func_id, phys)); phys += PAGE_SIZE; } From 92230596252ab6155f2d7f7ff9fa61425800a13f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 2 Dec 2024 14:57:30 +0000 Subject: [PATCH 219/366] MAINTAINERS: Add CCA and pKVM CoCO guest support to the ARM64 entry Commits 7999edc484ca ("virt: arm-cca-guest: TSM_REPORT support for realm") and a06c3fad49a5 ("drivers/virt: pkvm: Add initial support for running as a protected guest") added arm64 guest-side support for running in CCA and pKVM confidential computing environments respectively. Unfortunately, these changes were not accompanied by a MAINTAINERS entry and so aren't automatically picked up by the get_maintainer.pl script. Since the initial support was merged via the arm64 tree, extend the ARM64 entry to cover the two new directories. Cc: Marc Zyngier Cc: Oliver Upton Cc: Suzuki K Poulose Signed-off-by: Will Deacon Acked-by: Suzuki K Poulose Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20241202145731.6422-3-will@kernel.org Signed-off-by: Catalin Marinas --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1e930c7a58b13..9378c0c4bbc99 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3376,6 +3376,8 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git F: Documentation/arch/arm64/ F: arch/arm64/ +F: drivers/virt/coco/arm-cca-guest/ +F: drivers/virt/coco/pkvm-guest/ F: tools/testing/selftests/arm64/ X: arch/arm64/boot/dts/ From 01fd68e54794fb1e1fe95be38facf9bbafee9ca3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Mon, 30 Sep 2024 20:36:22 +0200 Subject: [PATCH 220/366] ice: fix PHY Clock Recovery availability check To check if PHY Clock Recovery mechanic is available for a device, there is a need to verify if given PHY is available within the netlist, but the netlist node type used for the search is wrong, also the search context shall be specified. Modify the search function to allow specifying the context in the search. Use the PHY node type instead of CLOCK CONTROLLER type, also use proper search context which for PHY search is PORT, as defined in E810 Datasheet [1] ('3.3.8.2.4 Node Part Number and Node Options (0x0003)' and 'Table 3-105. Program Topology Device NVM Admin Command'). [1] https://cdrdv2.intel.com/v1/dl/getContent/613875?explicitVersion=true Fixes: 91e43ca0090b ("ice: fix linking when CONFIG_PTP_1588_CLOCK=n") Reviewed-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 25 ++++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index b22e71dc59d4e..496d86cbd13fc 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -542,7 +542,8 @@ ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd, /** * ice_find_netlist_node * @hw: pointer to the hw struct - * @node_type_ctx: type of netlist node to look for + * @node_type: type of netlist node to look for + * @ctx: context of the search * @node_part_number: node part number to look for * @node_handle: output parameter if node found - optional * @@ -552,10 +553,12 @@ ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd, * valid if the function returns zero, and should be ignored on any non-zero * return value. * - * Returns: 0 if the node is found, -ENOENT if no handle was found, and - * a negative error code on failure to access the AQ. + * Return: + * * 0 if the node is found, + * * -ENOENT if no handle was found, + * * negative error code on failure to access the AQ. */ -static int ice_find_netlist_node(struct ice_hw *hw, u8 node_type_ctx, +static int ice_find_netlist_node(struct ice_hw *hw, u8 node_type, u8 ctx, u8 node_part_number, u16 *node_handle) { u8 idx; @@ -566,8 +569,8 @@ static int ice_find_netlist_node(struct ice_hw *hw, u8 node_type_ctx, int status; cmd.addr.topo_params.node_type_ctx = - FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_TYPE_M, - node_type_ctx); + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_TYPE_M, node_type) | + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, ctx); cmd.addr.topo_params.index = idx; status = ice_aq_get_netlist_node(hw, &cmd, @@ -2747,9 +2750,11 @@ bool ice_is_pf_c827(struct ice_hw *hw) */ bool ice_is_phy_rclk_in_netlist(struct ice_hw *hw) { - if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL, + if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_PHY, + ICE_AQC_LINK_TOPO_NODE_CTX_PORT, ICE_AQC_GET_LINK_TOPO_NODE_NR_C827, NULL) && - ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL, + ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_PHY, + ICE_AQC_LINK_TOPO_NODE_CTX_PORT, ICE_AQC_GET_LINK_TOPO_NODE_NR_E822_PHY, NULL)) return false; @@ -2765,6 +2770,7 @@ bool ice_is_phy_rclk_in_netlist(struct ice_hw *hw) bool ice_is_clock_mux_in_netlist(struct ice_hw *hw) { if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_MUX, + ICE_AQC_LINK_TOPO_NODE_CTX_GLOBAL, ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_CLK_MUX, NULL)) return false; @@ -2785,12 +2791,14 @@ bool ice_is_clock_mux_in_netlist(struct ice_hw *hw) bool ice_is_cgu_in_netlist(struct ice_hw *hw) { if (!ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL, + ICE_AQC_LINK_TOPO_NODE_CTX_GLOBAL, ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032, NULL)) { hw->cgu_part_number = ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032; return true; } else if (!ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL, + ICE_AQC_LINK_TOPO_NODE_CTX_GLOBAL, ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384, NULL)) { hw->cgu_part_number = ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384; @@ -2809,6 +2817,7 @@ bool ice_is_cgu_in_netlist(struct ice_hw *hw) bool ice_is_gps_in_netlist(struct ice_hw *hw) { if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_GPS, + ICE_AQC_LINK_TOPO_NODE_CTX_GLOBAL, ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_GPS, NULL)) return false; From 3214fae85e8336fe13e20cf78fc9b6a668bdedff Mon Sep 17 00:00:00 2001 From: Przemyslaw Korba Date: Fri, 15 Nov 2024 13:25:37 +0100 Subject: [PATCH 221/366] ice: fix PHY timestamp extraction for ETH56G Fix incorrect PHY timestamp extraction for ETH56G. It's better to use FIELD_PREP() than manual shift. Fixes: 7cab44f1c35f ("ice: Introduce ETH56G PHY model for E825C products") Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Przemyslaw Korba Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 3 ++- drivers/net/ethernet/intel/ice/ice_ptp_hw.h | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index dfd49732bd5b5..518893f23372b 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -1518,7 +1518,8 @@ static int ice_read_ptp_tstamp_eth56g(struct ice_hw *hw, u8 port, u8 idx, * lower 8 bits in the low register, and the upper 32 bits in the high * register. */ - *tstamp = ((u64)hi) << TS_PHY_HIGH_S | ((u64)lo & TS_PHY_LOW_M); + *tstamp = FIELD_PREP(TS_PHY_HIGH_M, hi) | + FIELD_PREP(TS_PHY_LOW_M, lo); return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 47af7c5c79b81..1cee0f1bba2df 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -682,9 +682,8 @@ static inline bool ice_is_dual(struct ice_hw *hw) #define TS_HIGH_M 0xFF #define TS_HIGH_S 32 -#define TS_PHY_LOW_M 0xFF -#define TS_PHY_HIGH_M 0xFFFFFFFF -#define TS_PHY_HIGH_S 8 +#define TS_PHY_LOW_M GENMASK(7, 0) +#define TS_PHY_HIGH_M GENMASK_ULL(39, 8) #define BYTES_PER_IDX_ADDR_L_U 8 #define BYTES_PER_IDX_ADDR_L 4 From 9ee87d2b21990f0bc590da9dff9258e9a8895cb3 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Tue, 29 Oct 2024 10:42:59 +0100 Subject: [PATCH 222/366] ice: Fix NULL pointer dereference in switchdev Commit 608a5c05c39b ("virtchnl: support queue rate limit and quanta size configuration") introduced new virtchnl ops: - get_qos_caps - cfg_q_bw - cfg_q_quanta New ops were added to ice_virtchnl_dflt_ops, in commit 015307754a19 ("ice: Support VF queue rate limit and quanta size configuration"), but not to the ice_virtchnl_repr_ops. Because of that, if we get one of those messages in switchdev mode we end up with NULL pointer dereference: [ 1199.794701] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 1199.794804] Workqueue: ice ice_service_task [ice] [ 1199.794878] RIP: 0010:0x0 [ 1199.795027] Call Trace: [ 1199.795033] [ 1199.795039] ? __die+0x20/0x70 [ 1199.795051] ? page_fault_oops+0x140/0x520 [ 1199.795064] ? exc_page_fault+0x7e/0x270 [ 1199.795074] ? asm_exc_page_fault+0x22/0x30 [ 1199.795086] ice_vc_process_vf_msg+0x6e5/0xd30 [ice] [ 1199.795165] __ice_clean_ctrlq+0x734/0x9d0 [ice] [ 1199.795207] ice_service_task+0xccf/0x12b0 [ice] [ 1199.795248] process_one_work+0x21a/0x620 [ 1199.795260] worker_thread+0x18d/0x330 [ 1199.795269] ? __pfx_worker_thread+0x10/0x10 [ 1199.795279] kthread+0xec/0x120 [ 1199.795288] ? __pfx_kthread+0x10/0x10 [ 1199.795296] ret_from_fork+0x2d/0x50 [ 1199.795305] ? __pfx_kthread+0x10/0x10 [ 1199.795312] ret_from_fork_asm+0x1a/0x30 [ 1199.795323] Fixes: 015307754a19 ("ice: Support VF queue rate limit and quanta size configuration") Reviewed-by: Przemek Kitszel Reviewed-by: Michal Swiatkowski Signed-off-by: Wojciech Drewek Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index f445e33b2028f..ff4ad788d96ac 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -4128,6 +4128,9 @@ static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = { .get_qos_caps = ice_vc_get_qos_caps, .cfg_q_bw = ice_vc_cfg_q_bw, .cfg_q_quanta = ice_vc_cfg_q_quanta, + /* If you add a new op here please make sure to add it to + * ice_virtchnl_repr_ops as well. + */ }; /** @@ -4258,6 +4261,9 @@ static const struct ice_virtchnl_ops ice_virtchnl_repr_ops = { .dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg, .ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg, .dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg, + .get_qos_caps = ice_vc_get_qos_caps, + .cfg_q_bw = ice_vc_cfg_q_bw, + .cfg_q_quanta = ice_vc_cfg_q_quanta, }; /** From 761e0be2888a931465e0d7bbeecce797f9c311a3 Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Mon, 4 Nov 2024 19:49:09 +0100 Subject: [PATCH 223/366] ice: Fix VLAN pruning in switchdev mode In switchdev mode the uplink VSI should receive all unmatched packets, including VLANs. Therefore, VLAN pruning should be disabled if uplink is in switchdev mode. It is already being done in ice_eswitch_setup_env(), however the addition of ice_up() in commit 44ba608db509 ("ice: do switchdev slow-path Rx using PF VSI") caused VLAN pruning to be re-enabled after disabling it. Add a check to ice_set_vlan_filtering_features() to ensure VLAN filtering will not be enabled if uplink is in switchdev mode. Note that ice_is_eswitch_mode_switchdev() is being used instead of ice_is_switchdev_running(), as the latter would only return true after the whole switchdev setup completes. Fixes: 44ba608db509 ("ice: do switchdev slow-path Rx using PF VSI") Reviewed-by: Michal Swiatkowski Signed-off-by: Marcin Szycik Tested-by: Priya Singh Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1eaa4428fd248..187856d4c79a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6408,10 +6408,12 @@ ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features) int err = 0; /* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking - * if either bit is set + * if either bit is set. In switchdev mode Rx filtering should never be + * enabled. */ - if (features & - (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)) + if ((features & + (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)) && + !ice_is_eswitch_mode_switchdev(vsi->back)) err = vlan_ops->ena_rx_filtering(vsi); else err = vlan_ops->dis_rx_filtering(vsi); From 4c69c77aafe74cf755af55070584b643e5c4e4d8 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Mon, 7 Oct 2024 13:24:35 -0700 Subject: [PATCH 224/366] idpf: set completion tag for "empty" bufs associated with a packet Commit d9028db618a6 ("idpf: convert to libeth Tx buffer completion") inadvertently removed code that was necessary for the tx buffer cleaning routine to iterate over all buffers associated with a packet. When a frag is too large for a single data descriptor, it will be split across multiple data descriptors. This means the frag will span multiple buffers in the buffer ring in order to keep the descriptor and buffer ring indexes aligned. The buffer entries in the ring are technically empty and no cleaning actions need to be performed. These empty buffers can precede other frags associated with the same packet. I.e. a single packet on the buffer ring can look like: buf[0]=skb0.frag0 buf[1]=skb0.frag1 buf[2]=empty buf[3]=skb0.frag2 The cleaning routine iterates through these buffers based on a matching completion tag. If the completion tag is not set for buf2, the loop will end prematurely. Frag2 will be left uncleaned and next_to_clean will be left pointing to the end of packet, which will break the cleaning logic for subsequent cleans. This consequently leads to tx timeouts. Assign the empty bufs the same completion tag for the packet to ensure the cleaning routine iterates over all of the buffers associated with the packet. Fixes: d9028db618a6 ("idpf: convert to libeth Tx buffer completion") Signed-off-by: Joshua Hay Acked-by: Alexander Lobakin Reviewed-by: Madhu chittim Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index da2a5becf62f1..34f4118c7bc0d 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2448,6 +2448,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, * rest of the packet. */ tx_buf->type = LIBETH_SQE_EMPTY; + idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; /* Adjust the DMA offset and the remaining size of the * fragment. On the first iteration of this loop, From d0725312adf5a803de8f621bd1b12ba7a6464a29 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 1 Nov 2024 16:05:42 -0700 Subject: [PATCH 225/366] ixgbevf: stop attempting IPSEC offload on Mailbox API 1.5 Commit 339f28964147 ("ixgbevf: Add support for new mailbox communication between PF and VF") added support for v1.5 of the PF to VF mailbox communication API. This commit mistakenly enabled IPSEC offload for API v1.5. No implementation of the v1.5 API has support for IPSEC offload. This offload is only supported by the Linux PF as mailbox API v1.4. In fact, the v1.5 API is not implemented in any Linux PF. Attempting to enable IPSEC offload on a PF which supports v1.5 API will not work. Only the Linux upstream ixgbe and ixgbevf support IPSEC offload, and only as part of the v1.4 API. Fix the ixgbevf Linux driver to stop attempting IPSEC offload when the mailbox API does not support it. The existing API design choice makes it difficult to support future API versions, as other non-Linux hosts do not implement IPSEC offload. If we add support for v1.5 to the Linux PF, then we lose support for IPSEC offload. A full solution likely requires a new mailbox API with a proper negotiation to check that IPSEC is actually supported by the host. Fixes: 339f28964147 ("ixgbevf: Add support for new mailbox communication between PF and VF") Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbevf/ipsec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index 66cf17f194082..f804b35d79c72 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -629,7 +629,6 @@ void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter) switch (adapter->hw.api_version) { case ixgbe_mbox_api_14: - case ixgbe_mbox_api_15: break; default: return; From 15915b43a7fb938934bb7fc4290127218859d795 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 1 Nov 2024 16:05:43 -0700 Subject: [PATCH 226/366] ixgbe: downgrade logging of unsupported VF API version to debug The ixgbe PF driver logs an info message when a VF attempts to negotiate an API version which it does not support: VF 0 requested invalid api version 6 The ixgbevf driver attempts to load with mailbox API v1.5, which is required for best compatibility with other hosts such as the ESX VMWare PF. The Linux PF only supports API v1.4, and does not currently have support for the v1.5 API. The logged message can confuse users, as the v1.5 API is valid, but just happens to not currently be supported by the Linux PF. Downgrade the info message to a debug message, and fix the language to use 'unsupported' instead of 'invalid' to improve message clarity. Long term, we should investigate whether the improvements in the v1.5 API make sense for the Linux PF, and if so implement them properly. This may require yet another API version to resolve issues with negotiating IPSEC offload support. Fixes: 339f28964147 ("ixgbevf: Add support for new mailbox communication between PF and VF") Reported-by: Yifei Liu Link: https://lore.kernel.org/intel-wired-lan/20240301235837.3741422-1-yifei.l.liu@oracle.com/ Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_common.h | 2 ++ drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index 6493abf189de5..6639069ad5283 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -194,6 +194,8 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg); dev_err(&adapter->pdev->dev, format, ## arg) #define e_dev_notice(format, arg...) \ dev_notice(&adapter->pdev->dev, format, ## arg) +#define e_dbg(msglvl, format, arg...) \ + netif_dbg(adapter, msglvl, adapter->netdev, format, ## arg) #define e_info(msglvl, format, arg...) \ netif_info(adapter, msglvl, adapter->netdev, format, ## arg) #define e_err(msglvl, format, arg...) \ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 9631559a5aeac..ccdce80edd142 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1048,7 +1048,7 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter, break; } - e_info(drv, "VF %d requested invalid api version %u\n", vf, api); + e_dbg(drv, "VF %d requested unsupported api version %u\n", vf, api); return -1; } From f72ce14b231f7bf06088e4e50f1875f1e35f79d7 Mon Sep 17 00:00:00 2001 From: Tore Amundsen Date: Fri, 15 Nov 2024 14:17:36 +0000 Subject: [PATCH 227/366] ixgbe: Correct BASE-BX10 compliance code SFF-8472 (section 5.4 Transceiver Compliance Codes) defines bit 6 as BASE-BX10. Bit 6 means a value of 0x40 (decimal 64). The current value in the source code is 0x64, which appears to be a mix-up of hex and decimal values. A value of 0x64 (binary 01100100) incorrectly sets bit 2 (1000BASE-CX) and bit 5 (100BASE-FX) as well. Fixes: 1b43e0d20f2d ("ixgbe: Add 1000BASE-BX support") Signed-off-by: Tore Amundsen Reviewed-by: Paul Menzel Acked-by: Ernesto Castellotti Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index 14aa2ca51f70e..81179c60af4e0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -40,7 +40,7 @@ #define IXGBE_SFF_1GBASESX_CAPABLE 0x1 #define IXGBE_SFF_1GBASELX_CAPABLE 0x2 #define IXGBE_SFF_1GBASET_CAPABLE 0x8 -#define IXGBE_SFF_BASEBX10_CAPABLE 0x64 +#define IXGBE_SFF_BASEBX10_CAPABLE 0x40 #define IXGBE_SFF_10GBASESR_CAPABLE 0x10 #define IXGBE_SFF_10GBASELR_CAPABLE 0x20 #define IXGBE_SFF_SOFT_RS_SELECT_MASK 0x8 From 0566f83d206c7a864abcd741fe39d6e0ae5eef29 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 23 Oct 2024 20:10:48 +0800 Subject: [PATCH 228/366] igb: Fix potential invalid memory access in igb_init_module() The pci_register_driver() can fail and when this happened, the dca_notifier needs to be unregistered, otherwise the dca_notifier can be called when igb fails to install, resulting to invalid memory access. Fixes: bbd98fe48a43 ("igb: Fix DCA errors and do not use context index for 82576") Signed-off-by: Yuan Can Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 08578980b6518..288a4bb2683a9 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -637,6 +637,10 @@ static int __init igb_init_module(void) dca_register_notify(&dca_notifier); #endif ret = pci_register_driver(&igb_driver); +#ifdef CONFIG_IGB_DCA + if (ret) + dca_unregister_notify(&dca_notifier); +#endif return ret; } From b0de5456e201c475d6a860ceeb3ed8ee2923695a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 2 Dec 2024 09:45:48 -0800 Subject: [PATCH 229/366] nvme-pci: remove two deallocate zeroes quirks The quirk was initially used as a signal to set the discard_zeroes_data queue limit because there were some use cases that relied on that behavior. The queue limit no longer exists as every user of it has been converted to use the write zeroes operation instead. The quirk now means to use a discard command as an alias to a write zeroes request. Two of the devices previously using the quirk support the write zeroes command directly, so these don't need or want to use discard when the desired operation is to write zeroes. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4c644bb7f0692..9535e35ef18a5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3588,12 +3588,10 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DEALLOCATE_ZEROES | NVME_QUIRK_IGNORE_DEV_SUBNQN | NVME_QUIRK_BOGUS_NID, }, { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ - .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DEALLOCATE_ZEROES, }, + .driver_data = NVME_QUIRK_STRIPE_SIZE, }, { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_MEDIUM_PRIO_SQ | From 7b1d83da254be3bf054965c8f3b1ad976f460ae5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 27 Nov 2024 12:46:54 +0100 Subject: [PATCH 230/366] netfilter: nft_inner: incorrect percpu area handling under softirq Softirq can interrupt ongoing packet from process context that is walking over the percpu area that contains inner header offsets. Disable bh and perform three checks before restoring the percpu inner header offsets to validate that the percpu area is valid for this skbuff: 1) If the NFT_PKTINFO_INNER_FULL flag is set on, then this skbuff has already been parsed before for inner header fetching to register. 2) Validate that the percpu area refers to this skbuff using the skbuff pointer as a cookie. If there is a cookie mismatch, then this skbuff needs to be parsed again. 3) Finally, validate if the percpu area refers to this tunnel type. Only after these three checks the percpu area is restored to a on-stack copy and bh is enabled again. After inner header fetching, the on-stack copy is stored back to the percpu area. Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") Reported-by: syzbot+84d0441b9860f0d63285@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 1 + net/netfilter/nft_inner.c | 57 ++++++++++++++++++++------ 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index ff27cb2e16620..03b6165756fc5 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -161,6 +161,7 @@ enum { }; struct nft_inner_tun_ctx { + unsigned long cookie; u16 type; u16 inner_tunoff; u16 inner_lloff; diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c index 928312d01eb1d..817ab978d24a1 100644 --- a/net/netfilter/nft_inner.c +++ b/net/netfilter/nft_inner.c @@ -210,35 +210,66 @@ static int nft_inner_parse(const struct nft_inner *priv, struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { - struct nft_inner_tun_ctx ctx = {}; u32 off = pkt->inneroff; if (priv->flags & NFT_INNER_HDRSIZE && - nft_inner_parse_tunhdr(priv, pkt, &ctx, &off) < 0) + nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0) return -1; if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) { - if (nft_inner_parse_l2l3(priv, pkt, &ctx, off) < 0) + if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0) return -1; } else if (priv->flags & NFT_INNER_TH) { - ctx.inner_thoff = off; - ctx.flags |= NFT_PAYLOAD_CTX_INNER_TH; + tun_ctx->inner_thoff = off; + tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; } - *tun_ctx = ctx; tun_ctx->type = priv->type; + tun_ctx->cookie = (unsigned long)pkt->skb; pkt->flags |= NFT_PKTINFO_INNER_FULL; return 0; } +static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *tun_ctx) +{ + struct nft_inner_tun_ctx *this_cpu_tun_ctx; + + local_bh_disable(); + this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx); + if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) { + local_bh_enable(); + return false; + } + *tun_ctx = *this_cpu_tun_ctx; + local_bh_enable(); + + return true; +} + +static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt, + const struct nft_inner_tun_ctx *tun_ctx) +{ + struct nft_inner_tun_ctx *this_cpu_tun_ctx; + + local_bh_disable(); + this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx); + if (this_cpu_tun_ctx->cookie != tun_ctx->cookie) + *this_cpu_tun_ctx = *tun_ctx; + local_bh_enable(); +} + static bool nft_inner_parse_needed(const struct nft_inner *priv, const struct nft_pktinfo *pkt, - const struct nft_inner_tun_ctx *tun_ctx) + struct nft_inner_tun_ctx *tun_ctx) { if (!(pkt->flags & NFT_PKTINFO_INNER_FULL)) return true; + if (!nft_inner_restore_tun_ctx(pkt, tun_ctx)) + return true; + if (priv->type != tun_ctx->type) return true; @@ -248,27 +279,29 @@ static bool nft_inner_parse_needed(const struct nft_inner *priv, static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_inner_tun_ctx *tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx); const struct nft_inner *priv = nft_expr_priv(expr); + struct nft_inner_tun_ctx tun_ctx = {}; if (nft_payload_inner_offset(pkt) < 0) goto err; - if (nft_inner_parse_needed(priv, pkt, tun_ctx) && - nft_inner_parse(priv, (struct nft_pktinfo *)pkt, tun_ctx) < 0) + if (nft_inner_parse_needed(priv, pkt, &tun_ctx) && + nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0) goto err; switch (priv->expr_type) { case NFT_INNER_EXPR_PAYLOAD: - nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, tun_ctx); + nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); break; case NFT_INNER_EXPR_META: - nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, tun_ctx); + nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); break; default: WARN_ON_ONCE(1); goto err; } + nft_inner_save_tun_ctx(pkt, &tun_ctx); + return; err: regs->verdict.code = NFT_BREAK; From 5a6ea7022ff4d2a65ae328619c586d6a8909b48b Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 3 Dec 2024 10:22:22 -0800 Subject: [PATCH 231/366] samples/bpf: Remove unnecessary -I flags from libbpf EXTRA_CFLAGS Commit [0] breaks samples/bpf build: $ make M=samples/bpf ... make -C /path/to/kernel/samples/bpf/../../tools/lib/bpf \ ... EXTRA_CFLAGS=" \ ... -fsanitize=bounds \ -I/path/to/kernel/usr/include \ ... /path/to/kernel/samples/bpf/libbpf/libbpf.a install_headers CC /path/to/kernel/samples/bpf/libbpf/staticobjs/libbpf.o In file included from libbpf.c:29: /path/to/kernel/tools/include/linux/err.h:35:8: error: 'inline' can only appear on functions 35 | static inline void * __must_check ERR_PTR(long error_) | ^ The error is caused by `objtree` variable changing definition from `.` (dot) to an absolute path: - The variable TPROGS_CFLAGS is constructed as follows: ... TPROGS_CFLAGS += -I$(objtree)/usr/include - It is passed as EXTRA_CFLAGS for libbpf compilation: $(LIBBPF): ... ... $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" - Before commit [0], the line passed to libbpf makefile was '-I./usr/include', where '.' referred to LIBBPF_SRC due to -C flag. The directory $(LIBBPF_SRC)/usr/include does not exist and thus was never resolved by C compiler. - After commit [0], the line passed to libbpf makefile became: '/usr/include', this directory exists and is resolved by C compiler. - Both 'tools/include' and 'usr/include' define files err.h and types.h. - libbpf expects headers like 'linux/err.h' and 'linux/types.h' defined in 'tools/include', not 'usr/include', hence the compilation error. This commit removes unnecessary -I flags from libbpf compilation. (libbpf sets up the necessary includes at lib/bpf/Makefile:63). Changes v1 [1] -> v2: - dropped unnecessary replacement of KBUILD_OUTPUT with $(objtree) (Andrii) Changes v2 [2] -> v3: - make sure --sysroot option is set for libbpf's EXTRA_CFLAGS, if $(SYSROOT) is set (Stanislav) [0] commit 13b25489b6f8 ("kbuild: change working directory to external module directory with M=") [1] https://lore.kernel.org/bpf/20241202212154.3174402-1-eddyz87@gmail.com/ [2] https://lore.kernel.org/bpf/20241202234741.3492084-1-eddyz87@gmail.com/ Fixes: 13b25489b6f8 ("kbuild: change working directory to external module directory with M=") Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20241203182222.3915763-1-eddyz87@gmail.com --- samples/bpf/Makefile | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index bcf103a4c14f9..96a05e70ace30 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -146,13 +146,14 @@ ifeq ($(ARCH), x86) BPF_EXTRA_CFLAGS += -fcf-protection endif -TPROGS_CFLAGS += -Wall -O2 -TPROGS_CFLAGS += -Wmissing-prototypes -TPROGS_CFLAGS += -Wstrict-prototypes -TPROGS_CFLAGS += $(call try-run,\ +COMMON_CFLAGS += -Wall -O2 +COMMON_CFLAGS += -Wmissing-prototypes +COMMON_CFLAGS += -Wstrict-prototypes +COMMON_CFLAGS += $(call try-run,\ printf "int main() { return 0; }" |\ $(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,) +TPROGS_CFLAGS += $(COMMON_CFLAGS) TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) @@ -162,7 +163,7 @@ TPROGS_CFLAGS += -I$(srctree)/tools/lib TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0 ifdef SYSROOT -TPROGS_CFLAGS += --sysroot=$(SYSROOT) +COMMON_CFLAGS += --sysroot=$(SYSROOT) TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib endif @@ -229,7 +230,7 @@ clean: $(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) # Fix up variables inherited from Kbuild that tools/ build system won't like - $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ + $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \ LDFLAGS="$(TPROGS_LDFLAGS)" srctree=$(BPF_SAMPLES_PATH)/../../ \ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ $@ install_headers From b2e382ae12a63560fca35050498e19e760adf8c0 Mon Sep 17 00:00:00 2001 From: Liequan Che Date: Mon, 2 Dec 2024 19:56:38 +0800 Subject: [PATCH 232/366] bcache: revert replacing IS_ERR_OR_NULL with IS_ERR again Commit 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") leads a NULL pointer deference in cache_set_flush(). 1721 if (!IS_ERR_OR_NULL(c->root)) 1722 list_add(&c->root->list, &c->btree_cache); >From the above code in cache_set_flush(), if previous registration code fails before allocating c->root, it is possible c->root is NULL as what it is initialized. __bch_btree_node_alloc() never returns NULL but c->root is possible to be NULL at above line 1721. This patch replaces IS_ERR() by IS_ERR_OR_NULL() to fix this. Fixes: 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") Signed-off-by: Liequan Che Cc: stable@vger.kernel.org Cc: Zheng Wang Reviewed-by: Mingzhe Zou Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20241202115638.28957-1-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e7abfdd77c3b6..e42f1400cea9d 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1718,7 +1718,7 @@ static CLOSURE_CALLBACK(cache_set_flush) if (!IS_ERR_OR_NULL(c->gc_thread)) kthread_stop(c->gc_thread); - if (!IS_ERR(c->root)) + if (!IS_ERR_OR_NULL(c->root)) list_add(&c->root->list, &c->btree_cache); /* From 73dae652dcac776296890da215ee7dec357a1032 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 25 Nov 2024 13:59:09 -0500 Subject: [PATCH 233/366] drm/amdgpu: rework resume handling for display (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split resume into a 3rd step to handle displays when DCC is enabled on DCN 4.0.1. Move display after the buffer funcs have been re-enabled so that the GPU will do the move and properly set the DCC metadata for DCN. v2: fix fence irq resume ordering Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 +++++++++++++++++++++- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 913eb4f08ee6f..96316111300a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3762,7 +3762,7 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * First resume function for hardware IPs. The list of all the hardware + * Second resume function for hardware IPs. The list of all the hardware * IPs that make up the asic is walked and the resume callbacks are run for * all blocks except COMMON, GMC, and IH. resume puts the hardware into a * functional state after a suspend and updates the software state as @@ -3780,6 +3780,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) continue; r = amdgpu_ip_block_resume(&adev->ip_blocks[i]); @@ -3790,6 +3791,36 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) return 0; } +/** + * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs + * + * @adev: amdgpu_device pointer + * + * Third resume function for hardware IPs. The list of all the hardware + * IPs that make up the asic is walked and the resume callbacks are run for + * all DCE. resume puts the hardware into a functional state after a suspend + * and updates the software state as necessary. This function is also used + * for restoring the GPU after a GPU reset. + * + * Returns 0 on success, negative error code on failure. + */ +static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) + continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { + r = amdgpu_ip_block_resume(&adev->ip_blocks[i]); + if (r) + return r; + } + } + + return 0; +} + /** * amdgpu_device_ip_resume - run resume for hardware IPs * @@ -3819,6 +3850,13 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) if (adev->mman.buffer_funcs_ring->sched.ready) amdgpu_ttm_set_buffer_funcs_status(adev, true); + if (r) + return r; + + amdgpu_fence_driver_hw_init(adev); + + r = amdgpu_device_ip_resume_phase3(adev); + return r; } @@ -4899,7 +4937,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); goto exit; } - amdgpu_fence_driver_hw_init(adev); if (!adev->in_s0ix) { r = amdgpu_amdkfd_resume(adev, adev->in_runpm); @@ -5484,6 +5521,10 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) if (tmp_adev->mman.buffer_funcs_ring->sched.ready) amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true); + r = amdgpu_device_ip_resume_phase3(tmp_adev); + if (r) + goto out; + if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); From cecc1555a8c2acd65f9d36182c28ae463db0ad7e Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Mon, 2 Dec 2024 18:21:02 +0000 Subject: [PATCH 234/366] net: Make napi_hash_lock irq safe Make napi_hash_lock IRQ safe. It is used during the control path, and is taken and released in napi_hash_add and napi_hash_del, which will typically be called by calls to napi_enable and napi_disable. This change avoids a deadlock in pcnet32 (and other any other drivers which follow the same pattern): CPU 0: pcnet32_open spin_lock_irqsave(&lp->lock, ...) napi_enable napi_hash_add <- before this executes, CPU 1 proceeds spin_lock(napi_hash_lock) [...] spin_unlock_irqrestore(&lp->lock, flags); CPU 1: pcnet32_close napi_disable napi_hash_del spin_lock(napi_hash_lock) < INTERRUPT > pcnet32_interrupt spin_lock(lp->lock) <- DEADLOCK Changing the napi_hash_lock to be IRQ safe prevents the IRQ from firing on CPU 1 until napi_hash_lock is released, preventing the deadlock. Cc: stable@vger.kernel.org Fixes: 86e25f40aa1e ("net: napi: Add napi_config") Reported-by: Guenter Roeck Closes: https://lore.kernel.org/netdev/85dd4590-ea6b-427d-876a-1d8559c7ad82@roeck-us.net/ Suggested-by: Jakub Kicinski Signed-off-by: Joe Damato Tested-by: Guenter Roeck Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241202182103.363038-1-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 13d00fc10f559..45a8c3dd4a648 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6557,18 +6557,22 @@ static void __napi_hash_add_with_id(struct napi_struct *napi, static void napi_hash_add_with_id(struct napi_struct *napi, unsigned int napi_id) { - spin_lock(&napi_hash_lock); + unsigned long flags; + + spin_lock_irqsave(&napi_hash_lock, flags); WARN_ON_ONCE(napi_by_id(napi_id)); __napi_hash_add_with_id(napi, napi_id); - spin_unlock(&napi_hash_lock); + spin_unlock_irqrestore(&napi_hash_lock, flags); } static void napi_hash_add(struct napi_struct *napi) { + unsigned long flags; + if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state)) return; - spin_lock(&napi_hash_lock); + spin_lock_irqsave(&napi_hash_lock, flags); /* 0..NR_CPUS range is reserved for sender_cpu use */ do { @@ -6578,7 +6582,7 @@ static void napi_hash_add(struct napi_struct *napi) __napi_hash_add_with_id(napi, napi_gen_id); - spin_unlock(&napi_hash_lock); + spin_unlock_irqrestore(&napi_hash_lock, flags); } /* Warning : caller is responsible to make sure rcu grace period @@ -6586,11 +6590,13 @@ static void napi_hash_add(struct napi_struct *napi) */ static void napi_hash_del(struct napi_struct *napi) { - spin_lock(&napi_hash_lock); + unsigned long flags; + + spin_lock_irqsave(&napi_hash_lock, flags); hlist_del_init_rcu(&napi->napi_hash_node); - spin_unlock(&napi_hash_lock); + spin_unlock_irqrestore(&napi_hash_lock, flags); } static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) From 3d501f562f63b290351169e3e9931ffe3d57b2ae Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 2 Dec 2024 15:56:08 +0000 Subject: [PATCH 235/366] Revert "udp: avoid calling sock_def_readable() if possible" This reverts commit 612b1c0dec5bc7367f90fc508448b8d0d7c05414. On a scenario with multiple threads blocking on a recvfrom(), we need to call sock_def_readable() on every __udp_enqueue_schedule_skb() otherwise the threads won't be woken up as __skb_wait_for_more_packets() is using prepare_to_wait_exclusive(). Link: https://bugzilla.redhat.com/2308477 Fixes: 612b1c0dec5b ("udp: avoid calling sock_def_readable() if possible") Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241202155620.1719-1-ffmancera@riseup.net Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6a01905d379fd..e8953e88efef9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1674,7 +1674,6 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) struct sk_buff_head *list = &sk->sk_receive_queue; int rmem, err = -ENOMEM; spinlock_t *busy = NULL; - bool becomes_readable; int size, rcvbuf; /* Immediately drop when the receive queue is full. @@ -1715,19 +1714,12 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) */ sock_skb_set_dropcount(sk, skb); - becomes_readable = skb_queue_empty(list); __skb_queue_tail(list, skb); spin_unlock(&list->lock); - if (!sock_flag(sk, SOCK_DEAD)) { - if (becomes_readable || - sk->sk_data_ready != sock_def_readable || - READ_ONCE(sk->sk_peek_off) >= 0) - INDIRECT_CALL_1(sk->sk_data_ready, - sock_def_readable, sk); - else - sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN); - } + if (!sock_flag(sk, SOCK_DEAD)) + INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk); + busylock_release(busy); return 0; From 94071909477677fc2a1abf3fb281f203f66cf3ca Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 2 Dec 2024 18:48:05 +0200 Subject: [PATCH 236/366] ethtool: Fix access to uninitialized fields in set RXNFC command The check for non-zero ring with RSS is only relevant for ETHTOOL_SRXCLSRLINS command, in other cases the check tries to access memory which was not initialized by the userspace tool. Only perform the check in case of ETHTOOL_SRXCLSRLINS. Without this patch, filter deletion (for example) could statistically result in a false error: # ethtool --config-ntuple eth3 delete 484 rmgr: Cannot delete RX class rule: Invalid argument Cannot delete classification rule Fixes: 9e43ad7a1ede ("net: ethtool: only allow set_rxnfc with rss + ring_cookie if driver opts in") Link: https://lore.kernel.org/netdev/871a9ecf-1e14-40dd-bbd7-e90c92f89d47@nvidia.com/ Reviewed-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Gal Pressman Reviewed-by: Edward Cree Link: https://patch.msgid.link/20241202164805.1637093-1-gal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ethtool/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 61df8ce44379d..7bb94875a7ec8 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -993,7 +993,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (info.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds && + if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS && + !ops->cap_rss_rxnfc_adds && ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; From 292207809486d99c78068d3f459cbbbffde88415 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 2 Dec 2024 10:21:38 -0500 Subject: [PATCH 237/366] net: sched: fix erspan_opt settings in cls_flower When matching erspan_opt in cls_flower, only the (version, dir, hwid) fields are relevant. However, in fl_set_erspan_opt() it initializes all bits of erspan_opt and its mask to 1. This inadvertently requires packets to match not only the (version, dir, hwid) fields but also the other fields that are unexpectedly set to 1. This patch resolves the issue by ensuring that only the (version, dir, hwid) fields are configured in fl_set_erspan_opt(), leaving the other fields to 0 in erspan_opt. Fixes: 79b1011cb33d ("net: sched: allow flower to match erspan options") Reported-by: Shuang Li Signed-off-by: Xin Long Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e280c27cb9f9a..1008ec8a464c9 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1369,7 +1369,6 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key, int err; md = (struct erspan_metadata *)&key->enc_opts.data[key->enc_opts.len]; - memset(md, 0xff, sizeof(*md)); md->version = 1; if (!depth) @@ -1398,9 +1397,9 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index"); return -EINVAL; } + memset(&md->u.index, 0xff, sizeof(md->u.index)); if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) { nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]; - memset(&md->u, 0x00, sizeof(md->u)); md->u.index = nla_get_be32(nla); } } else if (md->version == 2) { @@ -1409,10 +1408,12 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid"); return -EINVAL; } + md->u.md2.dir = 1; if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]) { nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]; md->u.md2.dir = nla_get_u8(nla); } + set_hwid(&md->u.md2, 0xff); if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]) { nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]; set_hwid(&md->u.md2, nla_get_u8(nla)); From 5eb7de8cd58e73851cd37ff8d0666517d9926948 Mon Sep 17 00:00:00 2001 From: Lion Ackermann Date: Mon, 2 Dec 2024 17:22:57 +0100 Subject: [PATCH 238/366] net: sched: fix ordering of qlen adjustment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes to sch->q.qlen around qdisc_tree_reduce_backlog() need to happen _before_ a call to said function because otherwise it may fail to notify parent qdiscs when the child is about to become empty. Signed-off-by: Lion Ackermann Acked-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 2 +- net/sched/sch_choke.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index f2f9b75008bb0..8d8b2db4653c0 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1525,7 +1525,6 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) b->backlogs[idx] -= len; b->tin_backlog -= len; sch->qstats.backlog -= len; - qdisc_tree_reduce_backlog(sch, 1, len); flow->dropped++; b->tin_dropped++; @@ -1536,6 +1535,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) __qdisc_drop(skb, to_free); sch->q.qlen--; + qdisc_tree_reduce_backlog(sch, 1, len); cake_heapify(q, 0); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 1e940ad0d2fad..59e7bdf5063e8 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -123,10 +123,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx, if (idx == q->tail) choke_zap_tail_holes(q); + --sch->q.qlen; qdisc_qstats_backlog_dec(sch, skb); qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_drop(skb, sch, to_free); - --sch->q.qlen; } struct choke_skb_cb { From ceb259e43bf572ba7d766e1679ba73861d16203a Mon Sep 17 00:00:00 2001 From: Aapo Vienamo Date: Wed, 4 Dec 2024 10:02:08 +0200 Subject: [PATCH 239/366] spi: intel: Add Panther Lake SPI controller support The Panther Lake SPI controllers are compatible with the Cannon Lake controllers. Add support for following SPI controller device IDs: - H-series: 0xe323 - P-series: 0xe423 - U-series: 0xe423 Signed-off-by: Aapo Vienamo Signed-off-by: Mika Westerberg Link: https://patch.msgid.link/20241204080208.1036537-1-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index c3b54928143d2..4d9ffec900bbf 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -86,6 +86,8 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa324), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa3a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa823), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0xe323), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0xe423), (unsigned long)&cnl_info }, { }, }; MODULE_DEVICE_TABLE(pci, intel_spi_pci_ids); From 456f010bfaefde84d3390c755eedb1b0a5857c3c Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 29 Nov 2024 16:30:38 +0100 Subject: [PATCH 240/366] netfilter: ipset: Hold module reference while requesting a module User space may unload ip_set.ko while it is itself requesting a set type backend module, leading to a kernel crash. The race condition may be provoked by inserting an mdelay() right after the nfnl_unlock() call. Fixes: a7b4f989a629 ("netfilter: ipset: IP set core support") Signed-off-by: Phil Sutter Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 61431690cbd5f..cc20e6d56807c 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -104,14 +104,19 @@ find_set_type(const char *name, u8 family, u8 revision) static bool load_settype(const char *name) { + if (!try_module_get(THIS_MODULE)) + return false; + nfnl_unlock(NFNL_SUBSYS_IPSET); pr_debug("try to load ip_set_%s\n", name); if (request_module("ip_set_%s", name) < 0) { pr_warn("Can't find ip_set type %s\n", name); nfnl_lock(NFNL_SUBSYS_IPSET); + module_put(THIS_MODULE); return false; } nfnl_lock(NFNL_SUBSYS_IPSET); + module_put(THIS_MODULE); return true; } From e63fbd5f6810ed756bbb8a1549c7d4132968baa9 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Wed, 4 Dec 2024 04:22:28 +0800 Subject: [PATCH 241/366] tracing: Fix cmp_entries_dup() to respect sort() comparison rules The cmp_entries_dup() function used as the comparator for sort() violated the symmetry and transitivity properties required by the sorting algorithm. Specifically, it returned 1 whenever memcmp() was non-zero, which broke the following expectations: * Symmetry: If x < y, then y > x. * Transitivity: If x < y and y < z, then x < z. These violations could lead to incorrect sorting and failure to correctly identify duplicate elements. Fix the issue by directly returning the result of memcmp(), which adheres to the required comparison properties. Cc: stable@vger.kernel.org Fixes: 08d43a5fa063 ("tracing: Add lock-free tracing_map") Link: https://lore.kernel.org/20241203202228.1274403-1-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/tracing_map.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 3a56e7c8aa4f6..1921ade45be38 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -845,15 +845,11 @@ int tracing_map_init(struct tracing_map *map) static int cmp_entries_dup(const void *A, const void *B) { const struct tracing_map_sort_entry *a, *b; - int ret = 0; a = *(const struct tracing_map_sort_entry **)A; b = *(const struct tracing_map_sort_entry **)B; - if (memcmp(a->key, b->key, a->elt->map->key_size)) - ret = 1; - - return ret; + return memcmp(a->key, b->key, a->elt->map->key_size); } static int cmp_entries_sum(const void *A, const void *B) From 3f5eb062e8aa335643181c480e6c590c6cedfd22 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Sun, 10 Nov 2024 23:03:40 +0530 Subject: [PATCH 242/366] scsi: mpt3sas: Diag-Reset when Doorbell-In-Use bit is set during driver load time Issue a Diag-Reset when the "Doorbell-In-Use" bit is set during the driver load/initialization. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20241110173341.11595-2-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index ed5046593fdab..16ac2267c71e1 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7041,11 +7041,12 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes, int i; u8 failed; __le32 *mfp; + int ret_val; /* make sure doorbell is not in use */ if ((ioc->base_readl_ext_retry(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) { ioc_err(ioc, "doorbell is in use (line=%d)\n", __LINE__); - return -EFAULT; + goto doorbell_diag_reset; } /* clear pending doorbell interrupts from previous state changes */ @@ -7135,6 +7136,10 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes, le32_to_cpu(mfp[i])); } return 0; + +doorbell_diag_reset: + ret_val = _base_diag_reset(ioc); + return ret_val; } /** From 6050471545eecec214ca9ceb38a63e98e9003338 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Sun, 10 Nov 2024 23:03:41 +0530 Subject: [PATCH 243/366] scsi: mpt3sas: Update driver version to 51.100.00.00 Update driver version to 51.100.00.00. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20241110173341.11595-3-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index eceb5eeb46513..d8d1a64b4764d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -77,11 +77,11 @@ #define MPT3SAS_DRIVER_NAME "mpt3sas" #define MPT3SAS_AUTHOR "Avago Technologies " #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" -#define MPT3SAS_DRIVER_VERSION "48.100.00.00" -#define MPT3SAS_MAJOR_VERSION 48 +#define MPT3SAS_DRIVER_VERSION "51.100.00.00" +#define MPT3SAS_MAJOR_VERSION 51 #define MPT3SAS_MINOR_VERSION 100 -#define MPT3SAS_BUILD_VERSION 0 -#define MPT3SAS_RELEASE_VERSION 00 +#define MPT3SAS_BUILD_VERSION 00 +#define MPT3SAS_RELEASE_VERSION 00 #define MPT2SAS_DRIVER_NAME "mpt2sas" #define MPT2SAS_DESCRIPTION "LSI MPT Fusion SAS 2.0 Device Driver" From 69772f509e084ec6bca12dbcdeeeff41b0103774 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 3 Dec 2024 20:47:53 -0800 Subject: [PATCH 244/366] bpf: Don't mark STACK_INVALID as STACK_MISC in mark_stack_slot_misc Inside mark_stack_slot_misc, we should not upgrade STACK_INVALID to STACK_MISC when allow_ptr_leaks is false, since invalid contents shouldn't be read unless the program has the relevant capabilities. The relaxation only makes sense when env->allow_ptr_leaks is true. However, such conversion in privileged mode becomes unnecessary, as invalid slots can be read without being upgraded to STACK_MISC. Currently, the condition is inverted (i.e. checking for true instead of false), simply remove it to restore correct behavior. Fixes: eaf18febd6eb ("bpf: preserve STACK_ZERO slots on partial reg spills") Acked-by: Andrii Nakryiko Reported-by: Tao Lyu Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241204044757.1483141-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2fd35465d650a..f18aad339de87 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1202,14 +1202,17 @@ static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack) /* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which * case they are equivalent, or it's STACK_ZERO, in which case we preserve * more precise STACK_ZERO. - * Note, in uprivileged mode leaving STACK_INVALID is wrong, so we take - * env->allow_ptr_leaks into account and force STACK_MISC, if necessary. + * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged + * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is + * unnecessary as both are considered equivalent when loading data and pruning, + * in case of unprivileged mode it will be incorrect to allow reads of invalid + * slots. */ static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype) { if (*stype == STACK_ZERO) return; - if (env->allow_ptr_leaks && *stype == STACK_INVALID) + if (*stype == STACK_INVALID) return; *stype = STACK_MISC; } From b0e66977dc072906bb76555fb1a64261d7f63d0f Mon Sep 17 00:00:00 2001 From: Tao Lyu Date: Tue, 3 Dec 2024 20:47:54 -0800 Subject: [PATCH 245/366] bpf: Fix narrow scalar spill onto 64-bit spilled scalar slots When CAP_PERFMON and CAP_SYS_ADMIN (allow_ptr_leaks) are disabled, the verifier aims to reject partial overwrite on an 8-byte stack slot that contains a spilled pointer. However, in such a scenario, it rejects all partial stack overwrites as long as the targeted stack slot is a spilled register, because it does not check if the stack slot is a spilled pointer. Incomplete checks will result in the rejection of valid programs, which spill narrower scalar values onto scalar slots, as shown below. 0: R1=ctx() R10=fp0 ; asm volatile ( @ repro.bpf.c:679 0: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1 1: (62) *(u32 *)(r10 -8) = 1 attempt to corrupt spilled pointer on stack processed 2 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0. Fix this by expanding the check to not consider spilled scalar registers when rejecting the write into the stack. Previous discussion on this patch is at link [0]. [0]: https://lore.kernel.org/bpf/20240403202409.2615469-1-tao.lyu@epfl.ch Fixes: ab125ed3ec1c ("bpf: fix check for attempt to corrupt spilled pointer") Acked-by: Eduard Zingerman Acked-by: Andrii Nakryiko Signed-off-by: Tao Lyu Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241204044757.1483141-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f18aad339de87..01fbef9576e00 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4703,6 +4703,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, */ if (!env->allow_ptr_leaks && is_spilled_reg(&state->stack[spi]) && + !is_spilled_scalar_reg(&state->stack[spi]) && size != BPF_REG_SIZE) { verbose(env, "attempt to corrupt spilled pointer on stack\n"); return -EACCES; From adfdd9c68566120debc622712888c4d084539081 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 3 Dec 2024 20:47:55 -0800 Subject: [PATCH 246/366] selftests/bpf: Introduce __caps_unpriv annotation for tests Add a __caps_unpriv annotation so that tests requiring specific capabilities while dropping the rest can conveniently specify them during selftest declaration instead of munging with capabilities at runtime from the testing binary. While at it, let us convert test_verifier_mtu to use this new support instead. Since we do not want to include linux/capability.h, we only defined the four main capabilities BPF subsystem deals with in bpf_misc.h for use in tests. If the user passes a CAP_SYS_NICE or anything else that's not defined in the header, capability parsing code will return a warning. Also reject strtol returning 0. CAP_CHOWN = 0 but we'll never need to use it, and strtol doesn't errno on failed conversion. Fail the test in such a case. The original diff for this idea is available at link [0]. [0]: https://lore.kernel.org/bpf/a1e48f5d9ae133e19adc6adf27e19d585e06bab4.camel@gmail.com Signed-off-by: Eduard Zingerman [ Kartikeya: rebase on bpf-next, add warn to parse_caps, convert test_verifier_mtu ] Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241204044757.1483141-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 19 +------- tools/testing/selftests/bpf/progs/bpf_misc.h | 12 +++++ .../selftests/bpf/progs/verifier_mtu.c | 4 +- tools/testing/selftests/bpf/test_loader.c | 46 +++++++++++++++++++ 4 files changed, 62 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index d9f65adb456b6..3ee40ee9413a9 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -225,24 +225,7 @@ void test_verifier_xdp(void) { RUN(verifier_xdp); } void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } void test_verifier_lsm(void) { RUN(verifier_lsm); } - -void test_verifier_mtu(void) -{ - __u64 caps = 0; - int ret; - - /* In case CAP_BPF and CAP_PERFMON is not set */ - ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps); - if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin")) - return; - ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL); - if (!ASSERT_OK(ret, "disable_cap_sys_admin")) - goto restore_cap; - RUN(verifier_mtu); -restore_cap: - if (caps) - cap_enable_effective(caps, NULL); -} +void test_verifier_mtu(void) { RUN(verifier_mtu); } static int init_test_val_map(struct bpf_object *obj, char *map_name) { diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index eccaf955e3947..f45f4352feebd 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -5,6 +5,10 @@ #define XSTR(s) STR(s) #define STR(s) #s +/* Expand a macro and then stringize the expansion */ +#define QUOTE(str) #str +#define EXPAND_QUOTE(str) QUOTE(str) + /* This set of attributes controls behavior of the * test_loader.c:test_loader__run_subtests(). * @@ -106,6 +110,7 @@ * __arch_* Specify on which architecture the test case should be tested. * Several __arch_* annotations could be specified at once. * When test case is not run on current arch it is marked as skipped. + * __caps_unpriv Specify the capabilities that should be set when running the test. */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg))) #define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg))) @@ -129,6 +134,13 @@ #define __arch_x86_64 __arch("X86_64") #define __arch_arm64 __arch("ARM64") #define __arch_riscv64 __arch("RISCV64") +#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps)))) + +/* Define common capabilities tested using __caps_unpriv */ +#define CAP_NET_ADMIN 12 +#define CAP_SYS_ADMIN 21 +#define CAP_PERFMON 38 +#define CAP_BPF 39 /* Convenience macro for use with 'asm volatile' blocks */ #define __naked __attribute__((naked)) diff --git a/tools/testing/selftests/bpf/progs/verifier_mtu.c b/tools/testing/selftests/bpf/progs/verifier_mtu.c index 70c7600a26a0a..4ccf1ebc42d16 100644 --- a/tools/testing/selftests/bpf/progs/verifier_mtu.c +++ b/tools/testing/selftests/bpf/progs/verifier_mtu.c @@ -6,7 +6,9 @@ SEC("tc/ingress") __description("uninit/mtu: write rejected") -__failure __msg("invalid indirect read from stack") +__success +__caps_unpriv(CAP_BPF|CAP_NET_ADMIN) +__failure_unpriv __msg_unpriv("invalid indirect read from stack") int tc_uninit_mtu(struct __sk_buff *ctx) { __u32 mtu; diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 3e9b009580d4e..53b06647cf57d 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -36,6 +36,7 @@ #define TEST_TAG_ARCH "comment:test_arch=" #define TEST_TAG_JITED_PFX "comment:test_jited=" #define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv=" +#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xcafe4all @@ -74,6 +75,7 @@ struct test_subspec { struct expected_msgs jited; int retval; bool execute; + __u64 caps; }; struct test_spec { @@ -276,6 +278,37 @@ static int parse_int(const char *str, int *val, const char *name) return 0; } +static int parse_caps(const char *str, __u64 *val, const char *name) +{ + int cap_flag = 0; + char *token = NULL, *saveptr = NULL; + + char *str_cpy = strdup(str); + if (str_cpy == NULL) { + PRINT_FAIL("Memory allocation failed\n"); + return -EINVAL; + } + + token = strtok_r(str_cpy, "|", &saveptr); + while (token != NULL) { + errno = 0; + if (!strncmp("CAP_", token, sizeof("CAP_") - 1)) { + PRINT_FAIL("define %s constant in bpf_misc.h, failed to parse caps\n", token); + return -EINVAL; + } + cap_flag = strtol(token, NULL, 10); + if (!cap_flag || errno) { + PRINT_FAIL("failed to parse caps %s\n", name); + return -EINVAL; + } + *val |= (1ULL << cap_flag); + token = strtok_r(NULL, "|", &saveptr); + } + + free(str_cpy); + return 0; +} + static int parse_retval(const char *str, int *val, const char *name) { struct { @@ -541,6 +574,12 @@ static int parse_test_spec(struct test_loader *tester, jit_on_next_line = true; } else if (str_has_pfx(s, TEST_BTF_PATH)) { spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1; + } else if (str_has_pfx(s, TEST_TAG_CAPS_UNPRIV)) { + val = s + sizeof(TEST_TAG_CAPS_UNPRIV) - 1; + err = parse_caps(val, &spec->unpriv.caps, "test caps"); + if (err) + goto cleanup; + spec->mode_mask |= UNPRIV; } } @@ -917,6 +956,13 @@ void run_subtest(struct test_loader *tester, test__end_subtest(); return; } + if (subspec->caps) { + err = cap_enable_effective(subspec->caps, NULL); + if (err) { + PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err)); + goto subtest_cleanup; + } + } } /* Implicitly reset to NULL if next test case doesn't specify */ From f513c3635078fc184af66b1af9e4f2b2d19b7d79 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 3 Dec 2024 20:47:56 -0800 Subject: [PATCH 247/366] selftests/bpf: Add test for reading from STACK_INVALID slots Ensure that when CAP_PERFMON is dropped, and the verifier sees allow_ptr_leaks as false, we are not permitted to read from a STACK_INVALID slot. Without the fix, the test will report unexpected success in loading. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241204044757.1483141-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_spill_fill.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 671d9f415dbf7..bab6d789ba007 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -1244,4 +1244,22 @@ __naked void old_stack_misc_vs_cur_ctx_ptr(void) : __clobber_all); } +SEC("socket") +__description("stack_noperfmon: reject read of invalid slots") +__success +__caps_unpriv(CAP_BPF) +__failure_unpriv __msg_unpriv("invalid read from stack off -8+1 size 8") +__naked void stack_noperfmon_reject_invalid_read(void) +{ + asm volatile (" \ + r2 = 1; \ + r6 = r10; \ + r6 += -8; \ + *(u8 *)(r6 + 0) = r2; \ + r2 = *(u64 *)(r6 + 0); \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; From 19b6dbc006ecc1f2c8d7fa2d5afbfb7e7eba7920 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 3 Dec 2024 20:47:57 -0800 Subject: [PATCH 248/366] selftests/bpf: Add test for narrow spill into 64-bit spilled scalar Add a test case to verify that without CAP_PERFMON, the test now succeeds instead of failing due to a verification error. Acked-by: Eduard Zingerman Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241204044757.1483141-6-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_spill_fill.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index bab6d789ba007..1e5a511e8494a 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -1262,4 +1262,21 @@ __naked void stack_noperfmon_reject_invalid_read(void) " ::: __clobber_all); } +SEC("socket") +__description("stack_noperfmon: narrow spill onto 64-bit scalar spilled slots") +__success +__caps_unpriv(CAP_BPF) +__success_unpriv +__naked void stack_noperfmon_spill_32bit_onto_64bit_slot(void) +{ + asm volatile(" \ + r0 = 0; \ + *(u64 *)(r10 - 8) = r0; \ + *(u32 *)(r10 - 8) = r0; \ + exit; \ +" : + : + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; From 88c23a32b851e36adc4ab36f796d9b711f47e2bb Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 29 Nov 2024 15:17:06 +0100 Subject: [PATCH 249/366] nvme-fabrics: handle zero MAXCMD without closing the connection The NVMe specification states that MAXCMD is mandatory for NVMe-over-Fabrics implementations. However, some NVMe/TCP and NVMe/FC arrays from major vendors have buggy firmware that reports MAXCMD as zero in the Identify Controller data structure. Currently, the implementation closes the connection in such cases, completely preventing the host from connecting to the target. Fix the issue by printing a clear error message about the firmware bug and allowing the connection to proceed. It assumes that the target supports a MAXCMD value of SQSIZE + 1. If any issues arise, the user can manually adjust SQSIZE to mitigate them. Fixes: 4999568184e5 ("nvme-fabrics: check max outstanding commands") Signed-off-by: Maurizio Lombardi Reviewed-by: Laurence Oberman Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5e5c9e15ad0ca..1e904a794a4f3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3233,8 +3233,9 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct } if (!ctrl->maxcmd) { - dev_err(ctrl->device, "Maximum outstanding commands is 0\n"); - return -EINVAL; + dev_warn(ctrl->device, + "Firmware bug: maximum outstanding commands is 0\n"); + ctrl->maxcmd = ctrl->sqsize + 1; } return 0; From 41d826c8a9de37af5d1710a37b55720bd5ad8709 Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Sun, 1 Dec 2024 01:02:58 +0800 Subject: [PATCH 250/366] nvmet: replace kmalloc + memset with kzalloc for data allocation cocci warnings: (new ones prefixed by >>) >> drivers/nvme/target/pr.c:831:8-15: WARNING: kzalloc should be used for data, instead of kmalloc/memset The pattern of using 'kmalloc' followed by 'memset' is replaced with 'kzalloc', which is functionally equivalent to 'kmalloc' + 'memset', but more efficient. 'kzalloc' automatically zeroes the allocated memory, making it a faster and more streamlined solution. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202411301434.LEckbcWx-lkp@intel.com/ Reviewed-by: Kuan-Wei Chiu Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Yu-Chun Lin Signed-off-by: Keith Busch --- drivers/nvme/target/pr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/target/pr.c b/drivers/nvme/target/pr.c index 25a02b50d9f3d..90e9f5bbe5815 100644 --- a/drivers/nvme/target/pr.c +++ b/drivers/nvme/target/pr.c @@ -828,12 +828,11 @@ static void nvmet_execute_pr_report(struct nvmet_req *req) goto out; } - data = kmalloc(num_bytes, GFP_KERNEL); + data = kzalloc(num_bytes, GFP_KERNEL); if (!data) { status = NVME_SC_INTERNAL; goto out; } - memset(data, 0, num_bytes); data->gen = cpu_to_le32(atomic_read(&pr->generation)); data->ptpls = 0; ctrl_eds = data->regctl_eds; From 367ac16e5ff2dcd6b7f00a8f94e6ba98875cb397 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Mon, 11 Nov 2024 01:14:01 +0530 Subject: [PATCH 251/366] scsi: mpi3mr: Synchronize access to ioctl data buffer The driver serializes ioctls through a mutex lock but access to the ioctl data buffer is not guarded by the mutex. This results in multiple user threads being able to write to the driver's ioctl buffer simultaneously. Protect the ioctl buffer with the ioctl mutex. Signed-off-by: Sumit Saxena Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20241110194405.10108-2-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_app.c | 36 ++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 01f035f9330e4..10b8e4dc64f8b 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -2329,6 +2329,15 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) if (!mrioc) return -ENODEV; + if (mutex_lock_interruptible(&mrioc->bsg_cmds.mutex)) + return -ERESTARTSYS; + + if (mrioc->bsg_cmds.state & MPI3MR_CMD_PENDING) { + dprint_bsg_err(mrioc, "%s: command is in use\n", __func__); + mutex_unlock(&mrioc->bsg_cmds.mutex); + return -EAGAIN; + } + if (!mrioc->ioctl_sges_allocated) { dprint_bsg_err(mrioc, "%s: DMA memory was not allocated\n", __func__); @@ -2339,13 +2348,16 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) karg->timeout = MPI3MR_APP_DEFAULT_TIMEOUT; mpi_req = kzalloc(MPI3MR_ADMIN_REQ_FRAME_SZ, GFP_KERNEL); - if (!mpi_req) + if (!mpi_req) { + mutex_unlock(&mrioc->bsg_cmds.mutex); return -ENOMEM; + } mpi_header = (struct mpi3_request_header *)mpi_req; bufcnt = karg->buf_entry_list.num_of_entries; drv_bufs = kzalloc((sizeof(*drv_bufs) * bufcnt), GFP_KERNEL); if (!drv_bufs) { + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -ENOMEM; goto out; } @@ -2353,6 +2365,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) dout_buf = kzalloc(job->request_payload.payload_len, GFP_KERNEL); if (!dout_buf) { + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -ENOMEM; goto out; } @@ -2360,6 +2373,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) din_buf = kzalloc(job->reply_payload.payload_len, GFP_KERNEL); if (!din_buf) { + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -ENOMEM; goto out; } @@ -2435,6 +2449,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) (mpi_msg_size > MPI3MR_ADMIN_REQ_FRAME_SZ)) { dprint_bsg_err(mrioc, "%s: invalid MPI message size\n", __func__); + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -EINVAL; goto out; } @@ -2447,6 +2462,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) if (invalid_be) { dprint_bsg_err(mrioc, "%s: invalid buffer entries passed\n", __func__); + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -EINVAL; goto out; } @@ -2454,12 +2470,14 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) if (sgl_dout_iter > (dout_buf + job->request_payload.payload_len)) { dprint_bsg_err(mrioc, "%s: data_out buffer length mismatch\n", __func__); + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -EINVAL; goto out; } if (sgl_din_iter > (din_buf + job->reply_payload.payload_len)) { dprint_bsg_err(mrioc, "%s: data_in buffer length mismatch\n", __func__); + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -EINVAL; goto out; } @@ -2472,6 +2490,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) dprint_bsg_err(mrioc, "%s:%d: invalid data transfer size passed for function 0x%x din_size = %d, dout_size = %d\n", __func__, __LINE__, mpi_header->function, din_size, dout_size); + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -EINVAL; goto out; } @@ -2480,6 +2499,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) dprint_bsg_err(mrioc, "%s:%d: invalid data transfer size passed for function 0x%x din_size=%d\n", __func__, __LINE__, mpi_header->function, din_size); + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -EINVAL; goto out; } @@ -2487,6 +2507,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) dprint_bsg_err(mrioc, "%s:%d: invalid data transfer size passed for function 0x%x dout_size = %d\n", __func__, __LINE__, mpi_header->function, dout_size); + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -EINVAL; goto out; } @@ -2497,6 +2518,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) dprint_bsg_err(mrioc, "%s:%d: invalid message size passed:%d:%d:%d:%d\n", __func__, __LINE__, din_cnt, dout_cnt, din_size, dout_size); + mutex_unlock(&mrioc->bsg_cmds.mutex); rval = -EINVAL; goto out; } @@ -2544,6 +2566,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) continue; if (mpi3mr_map_data_buffer_dma(mrioc, drv_buf_iter, desc_count)) { rval = -ENOMEM; + mutex_unlock(&mrioc->bsg_cmds.mutex); dprint_bsg_err(mrioc, "%s:%d: mapping data buffers failed\n", __func__, __LINE__); goto out; @@ -2556,20 +2579,11 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) sense_buff_k = kzalloc(erbsz, GFP_KERNEL); if (!sense_buff_k) { rval = -ENOMEM; + mutex_unlock(&mrioc->bsg_cmds.mutex); goto out; } } - if (mutex_lock_interruptible(&mrioc->bsg_cmds.mutex)) { - rval = -ERESTARTSYS; - goto out; - } - if (mrioc->bsg_cmds.state & MPI3MR_CMD_PENDING) { - rval = -EAGAIN; - dprint_bsg_err(mrioc, "%s: command is in use\n", __func__); - mutex_unlock(&mrioc->bsg_cmds.mutex); - goto out; - } if (mrioc->unrecoverable) { dprint_bsg_err(mrioc, "%s: unrecoverable controller\n", __func__); From 711201a8b8334a397440ac0b859df0054e174bc9 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Mon, 11 Nov 2024 01:14:02 +0530 Subject: [PATCH 252/366] scsi: mpi3mr: Fix corrupt config pages PHY state is switched in sysfs The driver, through the SAS transport, exposes a sysfs interface to enable/disable PHYs in a controller/expander setup. When multiple PHYs are disabled and enabled in rapid succession, the persistent and current config pages related to SAS IO unit/SAS Expander pages could get corrupted. Use separate memory for each config request. Signed-off-by: Prayas Patel Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20241110194405.10108-3-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr.h | 9 ---- drivers/scsi/mpi3mr/mpi3mr_fw.c | 81 ++++++--------------------------- 2 files changed, 13 insertions(+), 77 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 81bb408ce56d8..1e715fd65a7d4 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -134,8 +134,6 @@ extern atomic64_t event_counter; #define MPI3MR_WATCHDOG_INTERVAL 1000 /* in milli seconds */ -#define MPI3MR_DEFAULT_CFG_PAGE_SZ 1024 /* in bytes */ - #define MPI3MR_RESET_TOPOLOGY_SETTLE_TIME 10 #define MPI3MR_SCMD_TIMEOUT (60 * HZ) @@ -1133,9 +1131,6 @@ struct scmd_priv { * @io_throttle_low: I/O size to stop throttle in 512b blocks * @num_io_throttle_group: Maximum number of throttle groups * @throttle_groups: Pointer to throttle group info structures - * @cfg_page: Default memory for configuration pages - * @cfg_page_dma: Configuration page DMA address - * @cfg_page_sz: Default configuration page memory size * @sas_transport_enabled: SAS transport enabled or not * @scsi_device_channel: Channel ID for SCSI devices * @transport_cmds: Command tracker for SAS transport commands @@ -1332,10 +1327,6 @@ struct mpi3mr_ioc { u16 num_io_throttle_group; struct mpi3mr_throttle_group_info *throttle_groups; - void *cfg_page; - dma_addr_t cfg_page_dma; - u16 cfg_page_sz; - u8 sas_transport_enabled; u8 scsi_device_channel; struct mpi3mr_drv_cmd transport_cmds; diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index f1ab76351bd81..2e6245bd4282e 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -4186,17 +4186,6 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc) mpi3mr_read_tsu_interval(mrioc); mpi3mr_print_ioc_info(mrioc); - if (!mrioc->cfg_page) { - dprint_init(mrioc, "allocating config page buffers\n"); - mrioc->cfg_page_sz = MPI3MR_DEFAULT_CFG_PAGE_SZ; - mrioc->cfg_page = dma_alloc_coherent(&mrioc->pdev->dev, - mrioc->cfg_page_sz, &mrioc->cfg_page_dma, GFP_KERNEL); - if (!mrioc->cfg_page) { - retval = -1; - goto out_failed_noretry; - } - } - dprint_init(mrioc, "allocating host diag buffers\n"); mpi3mr_alloc_diag_bufs(mrioc); @@ -4768,11 +4757,7 @@ void mpi3mr_free_mem(struct mpi3mr_ioc *mrioc) mrioc->admin_req_base, mrioc->admin_req_dma); mrioc->admin_req_base = NULL; } - if (mrioc->cfg_page) { - dma_free_coherent(&mrioc->pdev->dev, mrioc->cfg_page_sz, - mrioc->cfg_page, mrioc->cfg_page_dma); - mrioc->cfg_page = NULL; - } + if (mrioc->pel_seqnum_virt) { dma_free_coherent(&mrioc->pdev->dev, mrioc->pel_seqnum_sz, mrioc->pel_seqnum_virt, mrioc->pel_seqnum_dma); @@ -5392,55 +5377,6 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc, return retval; } - -/** - * mpi3mr_free_config_dma_memory - free memory for config page - * @mrioc: Adapter instance reference - * @mem_desc: memory descriptor structure - * - * Check whether the size of the buffer specified by the memory - * descriptor is greater than the default page size if so then - * free the memory pointed by the descriptor. - * - * Return: Nothing. - */ -static void mpi3mr_free_config_dma_memory(struct mpi3mr_ioc *mrioc, - struct dma_memory_desc *mem_desc) -{ - if ((mem_desc->size > mrioc->cfg_page_sz) && mem_desc->addr) { - dma_free_coherent(&mrioc->pdev->dev, mem_desc->size, - mem_desc->addr, mem_desc->dma_addr); - mem_desc->addr = NULL; - } -} - -/** - * mpi3mr_alloc_config_dma_memory - Alloc memory for config page - * @mrioc: Adapter instance reference - * @mem_desc: Memory descriptor to hold dma memory info - * - * This function allocates new dmaable memory or provides the - * default config page dmaable memory based on the memory size - * described by the descriptor. - * - * Return: 0 on success, non-zero on failure. - */ -static int mpi3mr_alloc_config_dma_memory(struct mpi3mr_ioc *mrioc, - struct dma_memory_desc *mem_desc) -{ - if (mem_desc->size > mrioc->cfg_page_sz) { - mem_desc->addr = dma_alloc_coherent(&mrioc->pdev->dev, - mem_desc->size, &mem_desc->dma_addr, GFP_KERNEL); - if (!mem_desc->addr) - return -ENOMEM; - } else { - mem_desc->addr = mrioc->cfg_page; - mem_desc->dma_addr = mrioc->cfg_page_dma; - memset(mem_desc->addr, 0, mrioc->cfg_page_sz); - } - return 0; -} - /** * mpi3mr_post_cfg_req - Issue config requests and wait * @mrioc: Adapter instance reference @@ -5596,8 +5532,12 @@ static int mpi3mr_process_cfg_req(struct mpi3mr_ioc *mrioc, cfg_req->page_length = cfg_hdr->page_length; cfg_req->page_version = cfg_hdr->page_version; } - if (mpi3mr_alloc_config_dma_memory(mrioc, &mem_desc)) - goto out; + + mem_desc.addr = dma_alloc_coherent(&mrioc->pdev->dev, + mem_desc.size, &mem_desc.dma_addr, GFP_KERNEL); + + if (!mem_desc.addr) + return retval; mpi3mr_add_sg_single(&cfg_req->sgl, sgl_flags, mem_desc.size, mem_desc.dma_addr); @@ -5626,7 +5566,12 @@ static int mpi3mr_process_cfg_req(struct mpi3mr_ioc *mrioc, } out: - mpi3mr_free_config_dma_memory(mrioc, &mem_desc); + if (mem_desc.addr) { + dma_free_coherent(&mrioc->pdev->dev, mem_desc.size, + mem_desc.addr, mem_desc.dma_addr); + mem_desc.addr = NULL; + } + return retval; } From 0d32014f1e3e7a7adf1583c45387f26b9bb3a49d Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Mon, 11 Nov 2024 01:14:03 +0530 Subject: [PATCH 253/366] scsi: mpi3mr: Start controller indexing from 0 Instead of displaying the controller index starting from '1' make the driver display the controller index starting from '0'. Signed-off-by: Sumit Saxena Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20241110194405.10108-4-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 5f2f67acf8bf3..1bef88130d0c0 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -5215,7 +5215,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) } mrioc = shost_priv(shost); - retval = ida_alloc_range(&mrioc_ida, 1, U8_MAX, GFP_KERNEL); + retval = ida_alloc_range(&mrioc_ida, 0, U8_MAX, GFP_KERNEL); if (retval < 0) goto id_alloc_failed; mrioc->id = (u8)retval; From fb6eb98f3965e2ee92cbcb466051d2f2acf552d1 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Mon, 11 Nov 2024 01:14:04 +0530 Subject: [PATCH 254/366] scsi: mpi3mr: Handling of fault code for insufficient power Before retrying initialization, check and abort if the fault code indicates insufficient power. Also mark the controller as unrecoverable instead of issuing reset in the watch dog timer if the fault code indicates insufficient power. Signed-off-by: Prayas Patel Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20241110194405.10108-5-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 2e6245bd4282e..5ed31fe57474a 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -1035,6 +1035,36 @@ static const char *mpi3mr_reset_type_name(u16 reset_type) return name; } +/** + * mpi3mr_is_fault_recoverable - Read fault code and decide + * whether the controller can be recoverable + * @mrioc: Adapter instance reference + * Return: true if fault is recoverable, false otherwise. + */ +static inline bool mpi3mr_is_fault_recoverable(struct mpi3mr_ioc *mrioc) +{ + u32 fault; + + fault = (readl(&mrioc->sysif_regs->fault) & + MPI3_SYSIF_FAULT_CODE_MASK); + + switch (fault) { + case MPI3_SYSIF_FAULT_CODE_COMPLETE_RESET_NEEDED: + case MPI3_SYSIF_FAULT_CODE_POWER_CYCLE_REQUIRED: + ioc_warn(mrioc, + "controller requires system power cycle, marking controller as unrecoverable\n"); + return false; + case MPI3_SYSIF_FAULT_CODE_INSUFFICIENT_PCI_SLOT_POWER: + ioc_warn(mrioc, + "controller faulted due to insufficient power,\n" + " try by connecting it to a different slot\n"); + return false; + default: + break; + } + return true; +} + /** * mpi3mr_print_fault_info - Display fault information * @mrioc: Adapter instance reference @@ -1373,6 +1403,11 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc) ioc_info(mrioc, "ioc_status(0x%08x), ioc_config(0x%08x), ioc_info(0x%016llx) at the bringup\n", ioc_status, ioc_config, base_info); + if (!mpi3mr_is_fault_recoverable(mrioc)) { + mrioc->unrecoverable = 1; + goto out_device_not_present; + } + /*The timeout value is in 2sec unit, changing it to seconds*/ mrioc->ready_timeout = ((base_info & MPI3_SYSIF_IOC_INFO_LOW_TIMEOUT_MASK) >> @@ -2734,6 +2769,11 @@ static void mpi3mr_watchdog_work(struct work_struct *work) mpi3mr_print_fault_info(mrioc); mrioc->diagsave_timeout = 0; + if (!mpi3mr_is_fault_recoverable(mrioc)) { + mrioc->unrecoverable = 1; + goto schedule_work; + } + switch (trigger_data.fault) { case MPI3_SYSIF_FAULT_CODE_COMPLETE_RESET_NEEDED: case MPI3_SYSIF_FAULT_CODE_POWER_CYCLE_REQUIRED: From 0deb37c2f42a54610dd3569b01413378f430bfea Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Mon, 11 Nov 2024 01:14:05 +0530 Subject: [PATCH 255/366] scsi: mpi3mr: Update driver version to 8.12.0.3.50 Update driver version to 8.12.0.3.50. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20241110194405.10108-6-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 1e715fd65a7d4..0c3e1ac076b5f 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -57,8 +57,8 @@ extern struct list_head mrioc_list; extern int prot_mask; extern atomic64_t event_counter; -#define MPI3MR_DRIVER_VERSION "8.12.0.0.50" -#define MPI3MR_DRIVER_RELDATE "05-Sept-2024" +#define MPI3MR_DRIVER_VERSION "8.12.0.3.50" +#define MPI3MR_DRIVER_RELDATE "11-November-2024" #define MPI3MR_DRIVER_NAME "mpi3mr" #define MPI3MR_DRIVER_LICENSE "GPL" From ad0cf42e1fc4810170a8e8e232e85d69073e4d25 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Dec 2024 09:42:40 +0900 Subject: [PATCH 256/366] nvme-pci: don't use dma_alloc_noncontiguous with 0 merge boundary Only call into nvme_alloc_host_mem_single which uses dma_alloc_noncontiguous when there is non-null dma merge boundary. Without this we'll call into dma_alloc_noncontiguous for device using dma-direct, which can work fine as long as the preferred size is below the MAX_ORDER of the page allocator, but blows up with a warning if it is too large. Fixes: 63a5c7a4b4c4 ("nvme-pci: use dma_alloc_noncontigous if possible") Reported-by: Leon Romanovsky Reported-by: Chaitanya Kumar Borah Signed-off-by: Christoph Hellwig Reviewed-by: Leon Romanovsky Tested-by: Chaitanya Kumar Borah Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9535e35ef18a5..1a5ba80f1811a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2172,6 +2172,7 @@ static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred, static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) { + unsigned long dma_merge_boundary = dma_get_merge_boundary(dev->dev); u64 min_chunk = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES); u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2); u64 chunk_size; @@ -2180,7 +2181,7 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) * If there is an IOMMU that can merge pages, try a virtually * non-contiguous allocation for a single segment first. */ - if (!(PAGE_SIZE & dma_get_merge_boundary(dev->dev))) { + if (dma_merge_boundary && (PAGE_SIZE & dma_merge_boundary) == 0) { if (!nvme_alloc_host_mem_single(dev, preferred)) return 0; } From c423263082ee8ccfad59ab33e3d5da5dc004c21e Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 15 Nov 2024 18:33:07 +0530 Subject: [PATCH 257/366] scsi: qla2xxx: Fix abort in bsg timeout Current abort of bsg on timeout prematurely clears the outstanding_cmds[]. Abort does not allow FW to return the IOCB/SRB. In addition, bsg_job_done() is not called to return the BSG (i.e. leak). Abort the outstanding bsg/SRB and wait for the completion. The completion IOCB will wake up the bsg_timeout thread. If abort is not successful, then driver will forcibly call bsg_job_done() and free the srb. Err Inject: - qaucli -z - assign CT Passthru IOCB's NportHandle with another initiator nport handle to trigger timeout. Remote port will drop CT request. - bsg_job_done is properly called as part of cleanup kernel: qla2xxx [0000:21:00.1]-7012:7: qla2x00_process_ct : 286 : Error Inject. kernel: qla2xxx [0000:21:00.1]-7016:7: bsg rqst type: FC_BSG_HST_CT else type: 101 - loop-id=1 portid=fffffa. kernel: qla2xxx [0000:21:00.1]-70bb:7: qla24xx_bsg_timeout CMD timeout. bsg ptr ffff9971a42f0838 msgcode 80000004 vendor cmd fa010000 kernel: qla2xxx [0000:21:00.1]-507c:7: Abort command issued - hdl=4b, type=5 kernel: qla2xxx [0000:21:00.1]-5040:7: ELS-CT pass-through-ct pass-through error hdl=4b comp_status-status=0x5 error subcode 1=0x0 error subcode 2=0xaf882e80. kernel: qla2xxx [0000:21:00.1]-7009:7: qla2x00_bsg_job_done: sp hdl 4b, result=70000 bsg ptr ffff9971a42f0838 kernel: qla2xxx [0000:21:00.1]-802c:7: Aborting bsg ffff9971a42f0838 sp=ffff99760b87ba80 handle=4b rval=0 kernel: qla2xxx [0000:21:00.1]-708a:7: bsg abort success. bsg ffff9971a42f0838 sp=ffff99760b87ba80 handle=0x4b kernel: qla2xxx [0000:21:00.1]-7012:7: qla2x00_process_ct : 286 : Error Inject. kernel: qla2xxx [0000:21:00.1]-7016:7: bsg rqst type: FC_BSG_HST_CT else type: 101 - loop-id=1 portid=fffffa. kernel: qla2xxx [0000:21:00.1]-70bb:7: qla24xx_bsg_timeout CMD timeout. bsg ptr ffff9971a42f43b8 msgcode 80000004 vendor cmd fa010000 kernel: qla2xxx [0000:21:00.1]-7012:7: qla_bsg_found : 2206 : Error Inject 2. kernel: qla2xxx [0000:21:00.1]-802c:7: Aborting bsg ffff9971a42f43b8 sp=ffff99762c304440 handle=5e rval=5 kernel: qla2xxx [0000:21:00.1]-704f:7: bsg abort fail. bsg=ffff9971a42f43b8 sp=ffff99762c304440 rval=5. kernel: qla2xxx [0000:21:00.1]-7051:7: qla_bsg_found bsg_job_done : bsg ffff9971a42f43b8 result 0xfffffffa sp ffff99762c304440. Cc: stable@vger.kernel.org Fixes: c449b4198701 ("scsi: qla2xxx: Use QP lock to search for bsg") Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20241115130313.46826-2-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_bsg.c | 114 ++++++++++++++++++++++++++------- 1 file changed, 92 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 52dc9604f5674..981ac1986cbe3 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -24,6 +24,7 @@ void qla2x00_bsg_job_done(srb_t *sp, int res) { struct bsg_job *bsg_job = sp->u.bsg_job; struct fc_bsg_reply *bsg_reply = bsg_job->reply; + struct completion *comp = sp->comp; ql_dbg(ql_dbg_user, sp->vha, 0x7009, "%s: sp hdl %x, result=%x bsg ptr %p\n", @@ -35,6 +36,9 @@ void qla2x00_bsg_job_done(srb_t *sp, int res) bsg_reply->result = res; bsg_job_done(bsg_job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); + + if (comp) + complete(comp); } void qla2x00_bsg_sp_free(srb_t *sp) @@ -3061,7 +3065,7 @@ qla24xx_bsg_request(struct bsg_job *bsg_job) static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job) { - bool found = false; + bool found, do_bsg_done; struct fc_bsg_reply *bsg_reply = bsg_job->reply; scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job)); struct qla_hw_data *ha = vha->hw; @@ -3069,6 +3073,11 @@ static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job) int cnt; unsigned long flags; struct req_que *req; + int rval; + DECLARE_COMPLETION_ONSTACK(comp); + uint32_t ratov_j; + + found = do_bsg_done = false; spin_lock_irqsave(qpair->qp_lock_ptr, flags); req = qpair->req; @@ -3080,42 +3089,104 @@ static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job) sp->type == SRB_ELS_CMD_HST || sp->type == SRB_ELS_CMD_HST_NOLOGIN) && sp->u.bsg_job == bsg_job) { - req->outstanding_cmds[cnt] = NULL; - spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - - if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) { - ql_log(ql_log_warn, vha, 0x7089, - "mbx abort_command failed.\n"); - bsg_reply->result = -EIO; - } else { - ql_dbg(ql_dbg_user, vha, 0x708a, - "mbx abort_command success.\n"); - bsg_reply->result = 0; - } - /* ref: INIT */ - kref_put(&sp->cmd_kref, qla2x00_sp_release); found = true; - goto done; + sp->comp = ∁ + break; } } spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); -done: - return found; + if (!found) + return false; + + if (ha->flags.eeh_busy) { + /* skip over abort. EEH handling will return the bsg. Wait for it */ + rval = QLA_SUCCESS; + ql_dbg(ql_dbg_user, vha, 0x802c, + "eeh encounter. bsg %p sp=%p handle=%x \n", + bsg_job, sp, sp->handle); + } else { + rval = ha->isp_ops->abort_command(sp); + ql_dbg(ql_dbg_user, vha, 0x802c, + "Aborting bsg %p sp=%p handle=%x rval=%x\n", + bsg_job, sp, sp->handle, rval); + } + + switch (rval) { + case QLA_SUCCESS: + /* Wait for the command completion. */ + ratov_j = ha->r_a_tov / 10 * 4 * 1000; + ratov_j = msecs_to_jiffies(ratov_j); + + if (!wait_for_completion_timeout(&comp, ratov_j)) { + ql_log(ql_log_info, vha, 0x7089, + "bsg abort timeout. bsg=%p sp=%p handle %#x .\n", + bsg_job, sp, sp->handle); + + do_bsg_done = true; + } else { + /* fw had returned the bsg */ + ql_dbg(ql_dbg_user, vha, 0x708a, + "bsg abort success. bsg %p sp=%p handle=%#x\n", + bsg_job, sp, sp->handle); + do_bsg_done = false; + } + break; + default: + ql_log(ql_log_info, vha, 0x704f, + "bsg abort fail. bsg=%p sp=%p rval=%x.\n", + bsg_job, sp, rval); + + do_bsg_done = true; + break; + } + + if (!do_bsg_done) + return true; + + spin_lock_irqsave(qpair->qp_lock_ptr, flags); + /* + * recheck to make sure it's still the same bsg_job due to + * qp_lock_ptr was released earlier. + */ + if (req->outstanding_cmds[cnt] && + req->outstanding_cmds[cnt]->u.bsg_job != bsg_job) { + /* fw had returned the bsg */ + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + return true; + } + req->outstanding_cmds[cnt] = NULL; + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + + /* ref: INIT */ + sp->comp = NULL; + kref_put(&sp->cmd_kref, qla2x00_sp_release); + bsg_reply->result = -ENXIO; + bsg_reply->reply_payload_rcv_len = 0; + + ql_dbg(ql_dbg_user, vha, 0x7051, + "%s bsg_job_done : bsg %p result %#x sp %p.\n", + __func__, bsg_job, bsg_reply->result, sp); + + bsg_job_done(bsg_job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); + + return true; } int qla24xx_bsg_timeout(struct bsg_job *bsg_job) { - struct fc_bsg_reply *bsg_reply = bsg_job->reply; + struct fc_bsg_request *bsg_request = bsg_job->request; scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job)); struct qla_hw_data *ha = vha->hw; int i; struct qla_qpair *qpair; - ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n", - __func__, bsg_job); + ql_log(ql_log_info, vha, 0x708b, + "%s CMD timeout. bsg ptr %p msgcode %x vendor cmd %x\n", + __func__, bsg_job, bsg_request->msgcode, + bsg_request->rqst_data.h_vendor.vendor_cmd[0]); if (qla2x00_isp_reg_stat(ha)) { ql_log(ql_log_info, vha, 0x9007, @@ -3136,7 +3207,6 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job) } ql_log(ql_log_info, vha, 0x708b, "SRB not found to abort.\n"); - bsg_reply->result = -ENXIO; done: return 0; From 07c903db0a2ff84b68efa1a74a4de353ea591eb0 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 15 Nov 2024 18:33:08 +0530 Subject: [PATCH 258/366] scsi: qla2xxx: Fix use after free on unload System crash is observed with stack trace warning of use after free. There are 2 signals to tell dpc_thread to terminate (UNLOADING flag and kthread_stop). On setting the UNLOADING flag when dpc_thread happens to run at the time and sees the flag, this causes dpc_thread to exit and clean up itself. When kthread_stop is called for final cleanup, this causes use after free. Remove UNLOADING signal to terminate dpc_thread. Use the kthread_stop as the main signal to exit dpc_thread. [596663.812935] kernel BUG at mm/slub.c:294! [596663.812950] invalid opcode: 0000 [#1] SMP PTI [596663.812957] CPU: 13 PID: 1475935 Comm: rmmod Kdump: loaded Tainted: G IOE --------- - - 4.18.0-240.el8.x86_64 #1 [596663.812960] Hardware name: HP ProLiant DL380p Gen8, BIOS P70 08/20/2012 [596663.812974] RIP: 0010:__slab_free+0x17d/0x360 ... [596663.813008] Call Trace: [596663.813022] ? __dentry_kill+0x121/0x170 [596663.813030] ? _cond_resched+0x15/0x30 [596663.813034] ? _cond_resched+0x15/0x30 [596663.813039] ? wait_for_completion+0x35/0x190 [596663.813048] ? try_to_wake_up+0x63/0x540 [596663.813055] free_task+0x5a/0x60 [596663.813061] kthread_stop+0xf3/0x100 [596663.813103] qla2x00_remove_one+0x284/0x440 [qla2xxx] Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20241115130313.46826-3-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 7f980e6141c28..7ab717ed72327 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -6902,12 +6902,15 @@ qla2x00_do_dpc(void *data) set_user_nice(current, MIN_NICE); set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { + while (1) { ql_dbg(ql_dbg_dpc, base_vha, 0x4000, "DPC handler sleeping.\n"); schedule(); + if (kthread_should_stop()) + break; + if (test_and_clear_bit(DO_EEH_RECOVERY, &base_vha->dpc_flags)) qla_pci_set_eeh_busy(base_vha); @@ -6920,15 +6923,16 @@ qla2x00_do_dpc(void *data) goto end_loop; } + if (test_bit(UNLOADING, &base_vha->dpc_flags)) + /* don't do any work. Wait to be terminated by kthread_stop */ + goto end_loop; + ha->dpc_active = 1; ql_dbg(ql_dbg_dpc + ql_dbg_verbose, base_vha, 0x4001, "DPC handler waking up, dpc_flags=0x%lx.\n", base_vha->dpc_flags); - if (test_bit(UNLOADING, &base_vha->dpc_flags)) - break; - if (IS_P3P_TYPE(ha)) { if (IS_QLA8044(ha)) { if (test_and_clear_bit(ISP_UNRECOVERABLE, @@ -7241,9 +7245,6 @@ qla2x00_do_dpc(void *data) */ ha->dpc_active = 0; - /* Cleanup any residual CTX SRBs. */ - qla2x00_abort_all_cmds(base_vha, DID_NO_CONNECT << 16); - return 0; } From fec55c29e54d3ca6fe9d7d7d9266098b4514fd34 Mon Sep 17 00:00:00 2001 From: "Chunguang.xu" Date: Tue, 3 Dec 2024 11:34:40 +0800 Subject: [PATCH 259/366] nvme-tcp: fix the memleak while create new ctrl failed Now while we create new ctrl failed, we have not free the tagset occupied by admin_q, here try to fix it. Fixes: fd1418de10b9 ("nvme-tcp: avoid open-coding nvme_tcp_teardown_admin_queue()") Signed-off-by: Chunguang.xu Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 3e416af2659f1..55abfe5e1d254 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2278,7 +2278,7 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) } destroy_admin: nvme_stop_keep_alive(ctrl); - nvme_tcp_teardown_admin_queue(ctrl, false); + nvme_tcp_teardown_admin_queue(ctrl, new); return ret; } From 5858b687559809f05393af745cbadf06dee61295 Mon Sep 17 00:00:00 2001 From: "Chunguang.xu" Date: Tue, 3 Dec 2024 11:34:41 +0800 Subject: [PATCH 260/366] nvme-rdma: unquiesce admin_q before destroy it Kernel will hang on destroy admin_q while we create ctrl failed, such as following calltrace: PID: 23644 TASK: ff2d52b40f439fc0 CPU: 2 COMMAND: "nvme" #0 [ff61d23de260fb78] __schedule at ffffffff8323bc15 #1 [ff61d23de260fc08] schedule at ffffffff8323c014 #2 [ff61d23de260fc28] blk_mq_freeze_queue_wait at ffffffff82a3dba1 #3 [ff61d23de260fc78] blk_freeze_queue at ffffffff82a4113a #4 [ff61d23de260fc90] blk_cleanup_queue at ffffffff82a33006 #5 [ff61d23de260fcb0] nvme_rdma_destroy_admin_queue at ffffffffc12686ce #6 [ff61d23de260fcc8] nvme_rdma_setup_ctrl at ffffffffc1268ced #7 [ff61d23de260fd28] nvme_rdma_create_ctrl at ffffffffc126919b #8 [ff61d23de260fd68] nvmf_dev_write at ffffffffc024f362 #9 [ff61d23de260fe38] vfs_write at ffffffff827d5f25 RIP: 00007fda7891d574 RSP: 00007ffe2ef06958 RFLAGS: 00000202 RAX: ffffffffffffffda RBX: 000055e8122a4d90 RCX: 00007fda7891d574 RDX: 000000000000012b RSI: 000055e8122a4d90 RDI: 0000000000000004 RBP: 00007ffe2ef079c0 R8: 000000000000012b R9: 000055e8122a4d90 R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000004 R13: 000055e8122923c0 R14: 000000000000012b R15: 00007fda78a54500 ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b This due to we have quiesced admi_q before cancel requests, but forgot to unquiesce before destroy it, as a result we fail to drain the pending requests, and hang on blk_mq_freeze_queue_wait() forever. Here try to reuse nvme_rdma_teardown_admin_queue() to fix this issue and simplify the code. Fixes: 958dc1d32c80 ("nvme-rdma: add clean action for failed reconnection") Reported-by: Yingfu.zhou Signed-off-by: Chunguang.xu Signed-off-by: Yue.zhao Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index baf7d24901528..86a2891d9bcc7 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1091,13 +1091,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) } destroy_admin: nvme_stop_keep_alive(&ctrl->ctrl); - nvme_quiesce_admin_queue(&ctrl->ctrl); - blk_sync_queue(ctrl->ctrl.admin_q); - nvme_rdma_stop_queue(&ctrl->queues[0]); - nvme_cancel_admin_tagset(&ctrl->ctrl); - if (new) - nvme_remove_admin_tag_set(&ctrl->ctrl); - nvme_rdma_destroy_admin_queue(ctrl); + nvme_rdma_teardown_admin_queue(ctrl, new); return ret; } From fdc5664c690f6e0ee235dbc6b1f17ec59aca55bd Mon Sep 17 00:00:00 2001 From: "Chunguang.xu" Date: Tue, 3 Dec 2024 11:34:42 +0800 Subject: [PATCH 261/366] nvme-tcp: no need to quiesce admin_q in nvme_tcp_teardown_io_queues() As we quiesce admin_q in nvme_tcp_teardown_admin_queue(), so we should no need to quiesce it in nvme_tcp_reaardown_io_queues(), make things simple. Signed-off-by: Chunguang.xu Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 55abfe5e1d254..98bf758dc6fc6 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2178,7 +2178,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, { if (ctrl->queue_count <= 1) return; - nvme_quiesce_admin_queue(ctrl); nvme_quiesce_io_queues(ctrl); nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); From b4e12f5728ff963ca5590c48b85a20d076bf517d Mon Sep 17 00:00:00 2001 From: "Chunguang.xu" Date: Tue, 3 Dec 2024 11:34:43 +0800 Subject: [PATCH 262/366] nvme-tcp: simplify nvme_tcp_teardown_io_queues() As nvme_tcp_teardown_io_queues() is the only one caller of nvme_tcp_destroy_admin_queue(), so we can merge it into nvme_tcp_teardown_io_queues() to simplify the code. Signed-off-by: Chunguang.xu Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 98bf758dc6fc6..28c76a3e1bd22 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2101,14 +2101,6 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) return ret; } -static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) -{ - nvme_tcp_stop_queue(ctrl, 0); - if (remove) - nvme_remove_admin_tag_set(ctrl); - nvme_tcp_free_admin_queue(ctrl); -} - static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) { int error; @@ -2163,9 +2155,11 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); nvme_cancel_admin_tagset(ctrl); - if (remove) + if (remove) { nvme_unquiesce_admin_queue(ctrl); - nvme_tcp_destroy_admin_queue(ctrl, remove); + nvme_remove_admin_tag_set(ctrl); + } + nvme_tcp_free_admin_queue(ctrl); if (ctrl->tls_pskid) { dev_dbg(ctrl->device, "Wipe negotiated TLS_PSK %08x\n", ctrl->tls_pskid); From 833c70e212fc40d3e98da941796f4c7bcaecdf58 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Fri, 15 Nov 2024 18:33:10 +0530 Subject: [PATCH 263/366] scsi: qla2xxx: Remove check req_sg_cnt should be equal to rsp_sg_cnt Firmware supports multiple sg_cnt for request and response for CT commands, so remove the redundant check. A check is there where sg_cnt for request and response should be same. This is not required as driver and FW have code to handle multiple and different sg_cnt on request and response. Cc: stable@vger.kernel.org Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20241115130313.46826-5-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_bsg.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 981ac1986cbe3..10431a67d202b 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -494,16 +494,6 @@ qla2x00_process_ct(struct bsg_job *bsg_job) goto done; } - if ((req_sg_cnt != bsg_job->request_payload.sg_cnt) || - (rsp_sg_cnt != bsg_job->reply_payload.sg_cnt)) { - ql_log(ql_log_warn, vha, 0x7011, - "request_sg_cnt: %x dma_request_sg_cnt: %x reply_sg_cnt:%x " - "dma_reply_sg_cnt: %x\n", bsg_job->request_payload.sg_cnt, - req_sg_cnt, bsg_job->reply_payload.sg_cnt, rsp_sg_cnt); - rval = -EAGAIN; - goto done_unmap_sg; - } - if (!vha->flags.online) { ql_log(ql_log_warn, vha, 0x7012, "Host is not online.\n"); From 4812b7796c144f63a1094f79a5eb8fbdad8d7ebc Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 15 Nov 2024 18:33:11 +0530 Subject: [PATCH 264/366] scsi: qla2xxx: Fix NVMe and NPIV connect issue NVMe controller fails to send connect command due to failure to locate hw context buffer for NVMe queue 0 (blk_mq_hw_ctx, hctx_idx=0). The cause of the issue is NPIV host did not initialize the vha->irq_offset field. This field is given to blk-mq (blk_mq_pci_map_queues) to help locate the beginning of IO Queues which in turn help locate NVMe queue 0. Initialize this field to allow NVMe to work properly with NPIV host. kernel: nvme nvme5: Connect command failed, errno: -18 kernel: nvme nvme5: qid 0: secure concatenation is not supported kernel: nvme nvme5: NVME-FC{5}: create_assoc failed, assoc_id 2e9100 ret 401 kernel: nvme nvme5: NVME-FC{5}: reset: Reconnect attempt failed (401) kernel: nvme nvme5: NVME-FC{5}: Reconnect attempt in 2 seconds Cc: stable@vger.kernel.org Fixes: f0783d43dde4 ("scsi: qla2xxx: Use correct number of vectors for online CPUs") Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20241115130313.46826-6-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_mid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 76703f2706b8e..79879c4743e6d 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -506,6 +506,7 @@ qla24xx_create_vhost(struct fc_vport *fc_vport) return(NULL); } + vha->irq_offset = QLA_BASE_VECTORS; host = vha->host; fc_vport->dd_data = vha; /* New host info */ From e4e268f898c8a08f0a1188677e15eadbc06e98f6 Mon Sep 17 00:00:00 2001 From: Anil Gurumurthy Date: Fri, 15 Nov 2024 18:33:12 +0530 Subject: [PATCH 265/366] scsi: qla2xxx: Supported speed displayed incorrectly for VPorts The fc_function_template for vports was missing the .show_host_supported_speeds. The base port had the same. Add .show_host_supported_speeds to the vport template as well. Cc: stable@vger.kernel.org Fixes: 2c3dfe3f6ad8 ("[SCSI] qla2xxx: add support for NPIV") Signed-off-by: Anil Gurumurthy Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20241115130313.46826-7-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_attr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 2810608acd963..e6ece30c43486 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -3304,6 +3304,7 @@ struct fc_function_template qla2xxx_transport_vport_functions = { .show_host_node_name = 1, .show_host_port_name = 1, .show_host_supported_classes = 1, + .show_host_supported_speeds = 1, .get_host_port_id = qla2x00_get_host_port_id, .show_host_port_id = 1, From 35002a8ec557f679e414dae9dec9d08c66f2791a Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Fri, 15 Nov 2024 18:33:13 +0530 Subject: [PATCH 266/366] scsi: qla2xxx: Update version to 10.02.09.400-k Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20241115130313.46826-8-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index cf0f9d9db6458..a491d6ee5c94c 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -6,9 +6,9 @@ /* * Driver version */ -#define QLA2XXX_VERSION "10.02.09.300-k" +#define QLA2XXX_VERSION "10.02.09.400-k" #define QLA_DRIVER_MAJOR_VER 10 #define QLA_DRIVER_MINOR_VER 2 #define QLA_DRIVER_PATCH_VER 9 -#define QLA_DRIVER_BETA_VER 300 +#define QLA_DRIVER_BETA_VER 400 From eb48e9fc0028bed94a40a9352d065909f19e333c Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 19 Nov 2024 22:25:22 -0800 Subject: [PATCH 267/366] scsi: ufs: core: sysfs: Prevent div by zero Prevent a division by 0 when monitoring is not enabled. Fixes: 1d8613a23f3c ("scsi: ufs: core: Introduce HBA performance monitor sysfs nodes") Cc: stable@vger.kernel.org Signed-off-by: Gwendal Grignou Link: https://lore.kernel.org/r/20241120062522.917157-1-gwendal@chromium.org Reviewed-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs-sysfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 265f21133b633..796e37a1d859f 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -670,6 +670,9 @@ static ssize_t read_req_latency_avg_show(struct device *dev, struct ufs_hba *hba = dev_get_drvdata(dev); struct ufs_hba_monitor *m = &hba->monitor; + if (!m->nr_req[READ]) + return sysfs_emit(buf, "0\n"); + return sysfs_emit(buf, "%llu\n", div_u64(ktime_to_us(m->lat_sum[READ]), m->nr_req[READ])); } @@ -737,6 +740,9 @@ static ssize_t write_req_latency_avg_show(struct device *dev, struct ufs_hba *hba = dev_get_drvdata(dev); struct ufs_hba_monitor *m = &hba->monitor; + if (!m->nr_req[WRITE]) + return sysfs_emit(buf, "0\n"); + return sysfs_emit(buf, "%llu\n", div_u64(ktime_to_us(m->lat_sum[WRITE]), m->nr_req[WRITE])); } From f10593ad9bc36921f623361c9e3dd96bd52d85ee Mon Sep 17 00:00:00 2001 From: Suraj Sonawane Date: Wed, 20 Nov 2024 18:29:44 +0530 Subject: [PATCH 268/366] scsi: sg: Fix slab-use-after-free read in sg_release() Fix a use-after-free bug in sg_release(), detected by syzbot with KASAN: BUG: KASAN: slab-use-after-free in lock_release+0x151/0xa30 kernel/locking/lockdep.c:5838 __mutex_unlock_slowpath+0xe2/0x750 kernel/locking/mutex.c:912 sg_release+0x1f4/0x2e0 drivers/scsi/sg.c:407 In sg_release(), the function kref_put(&sfp->f_ref, sg_remove_sfp) is called before releasing the open_rel_lock mutex. The kref_put() call may decrement the reference count of sfp to zero, triggering its cleanup through sg_remove_sfp(). This cleanup includes scheduling deferred work via sg_remove_sfp_usercontext(), which ultimately frees sfp. After kref_put(), sg_release() continues to unlock open_rel_lock and may reference sfp or sdp. If sfp has already been freed, this results in a slab-use-after-free error. Move the kref_put(&sfp->f_ref, sg_remove_sfp) call after unlocking the open_rel_lock mutex. This ensures: - No references to sfp or sdp occur after the reference count is decremented. - Cleanup functions such as sg_remove_sfp() and sg_remove_sfp_usercontext() can safely execute without impacting the mutex handling in sg_release(). The fix has been tested and validated by syzbot. This patch closes the bug reported at the following syzkaller link and ensures proper sequencing of resource cleanup and mutex operations, eliminating the risk of use-after-free errors in sg_release(). Reported-by: syzbot+7efb5850a17ba6ce098b@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7efb5850a17ba6ce098b Tested-by: syzbot+7efb5850a17ba6ce098b@syzkaller.appspotmail.com Fixes: cc833acbee9d ("sg: O_EXCL and other lock handling") Signed-off-by: Suraj Sonawane Link: https://lore.kernel.org/r/20241120125944.88095-1-surajsonawane0215@gmail.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 84334ab39c810..94127868bedf8 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -386,7 +386,6 @@ sg_release(struct inode *inode, struct file *filp) SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_release\n")); mutex_lock(&sdp->open_rel_lock); - kref_put(&sfp->f_ref, sg_remove_sfp); sdp->open_cnt--; /* possibly many open()s waiting on exlude clearing, start many; @@ -398,6 +397,7 @@ sg_release(struct inode *inode, struct file *filp) wake_up_interruptible(&sdp->open_wait); } mutex_unlock(&sdp->open_rel_lock); + kref_put(&sfp->f_ref, sg_remove_sfp); return 0; } From 7f45ed5f0cd5ccbbec79adc6c48a67d6a85fba56 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Fri, 22 Nov 2024 10:49:43 +0800 Subject: [PATCH 269/366] scsi: ufs: core: Add missing post notify for power mode change When the power mode change is successful but the power mode hasn't actually changed, the post notification was missed. Similar to the approach with hibernate/clock scale/hce enable, having pre/post notifications in the same function will make it easier to maintain. Additionally, supplement the description of power parameters for the pwr_change_notify callback. Fixes: 7eb584db73be ("ufs: refactor configuring power mode") Cc: stable@vger.kernel.org #6.11.x Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20241122024943.30589-1-peter.wang@mediatek.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 7 ++++--- include/ufs/ufshcd.h | 10 ++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 25c8b3f571717..b7ec5797d5baa 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4671,9 +4671,6 @@ static int ufshcd_change_power_mode(struct ufs_hba *hba, dev_err(hba->dev, "%s: power mode change failed %d\n", __func__, ret); } else { - ufshcd_vops_pwr_change_notify(hba, POST_CHANGE, NULL, - pwr_mode); - memcpy(&hba->pwr_info, pwr_mode, sizeof(struct ufs_pa_layer_attr)); } @@ -4702,6 +4699,10 @@ int ufshcd_config_pwr_mode(struct ufs_hba *hba, ret = ufshcd_change_power_mode(hba, &final_params); + if (!ret) + ufshcd_vops_pwr_change_notify(hba, POST_CHANGE, NULL, + &final_params); + return ret; } EXPORT_SYMBOL_GPL(ufshcd_config_pwr_mode); diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index d7aca9e61684f..d650ae6b58d39 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -310,7 +310,9 @@ struct ufs_pwr_mode_info { * to allow variant specific Uni-Pro initialization. * @pwr_change_notify: called before and after a power mode change * is carried out to allow vendor spesific capabilities - * to be set. + * to be set. PRE_CHANGE can modify final_params based + * on desired_pwr_mode, but POST_CHANGE must not alter + * the final_params parameter * @setup_xfer_req: called before any transfer request is issued * to set some things * @setup_task_mgmt: called before any task management request is issued @@ -353,9 +355,9 @@ struct ufs_hba_variant_ops { int (*link_startup_notify)(struct ufs_hba *, enum ufs_notify_change_status); int (*pwr_change_notify)(struct ufs_hba *, - enum ufs_notify_change_status status, - struct ufs_pa_layer_attr *, - struct ufs_pa_layer_attr *); + enum ufs_notify_change_status status, + struct ufs_pa_layer_attr *desired_pwr_mode, + struct ufs_pa_layer_attr *final_params); void (*setup_xfer_req)(struct ufs_hba *hba, int tag, bool is_scsi_cmd); void (*setup_task_mgmt)(struct ufs_hba *, int, u8); From b1aee7f034615b6824d2c70ddb37ef9fc23493b7 Mon Sep 17 00:00:00 2001 From: Cathy Avery Date: Wed, 27 Nov 2024 13:13:24 -0500 Subject: [PATCH 270/366] scsi: storvsc: Do not flag MAINTENANCE_IN return of SRB_STATUS_DATA_OVERRUN as an error This partially reverts commit 812fe6420a6e ("scsi: storvsc: Handle additional SRB status values"). HyperV does not support MAINTENANCE_IN resulting in FC passthrough returning the SRB_STATUS_DATA_OVERRUN value. Now that SRB_STATUS_DATA_OVERRUN is treated as an error, multipath ALUA paths go into a faulty state as multipath ALUA submits RTPG commands via MAINTENANCE_IN. [ 3.215560] hv_storvsc 1d69d403-9692-4460-89f9-a8cbcc0f94f3: tag#230 cmd 0xa3 status: scsi 0x0 srb 0x12 hv 0xc0000001 [ 3.215572] scsi 1:0:0:32: alua: rtpg failed, result 458752 Make MAINTENANCE_IN return success to avoid the error path as is currently done with INQUIRY and MODE_SENSE. Suggested-by: Michael Kelley Signed-off-by: Cathy Avery Link: https://lore.kernel.org/r/20241127181324.3318443-1-cavery@redhat.com Reviewed-by: Michael Kelley Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 7ceb982040a5d..d0b55c1fa908a 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -149,6 +149,8 @@ struct hv_fc_wwn_packet { */ static int vmstor_proto_version; +static bool hv_dev_is_fc(struct hv_device *hv_dev); + #define STORVSC_LOGGING_NONE 0 #define STORVSC_LOGGING_ERROR 1 #define STORVSC_LOGGING_WARN 2 @@ -1138,6 +1140,7 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device, * not correctly handle: * INQUIRY command with page code parameter set to 0x80 * MODE_SENSE command with cmd[2] == 0x1c + * MAINTENANCE_IN is not supported by HyperV FC passthrough * * Setup srb and scsi status so this won't be fatal. * We do this so we can distinguish truly fatal failues @@ -1145,7 +1148,9 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device, */ if ((stor_pkt->vm_srb.cdb[0] == INQUIRY) || - (stor_pkt->vm_srb.cdb[0] == MODE_SENSE)) { + (stor_pkt->vm_srb.cdb[0] == MODE_SENSE) || + (stor_pkt->vm_srb.cdb[0] == MAINTENANCE_IN && + hv_dev_is_fc(device))) { vstor_packet->vm_srb.scsi_status = 0; vstor_packet->vm_srb.srb_status = SRB_STATUS_SUCCESS; } From 6918141d815acef056a0d10e966a027d869a922d Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 2 Dec 2024 13:00:45 +0000 Subject: [PATCH 271/366] scsi: scsi_debug: Fix hrtimer support for ndelay Since commit 771f712ba5b0 ("scsi: scsi_debug: Fix cmd duration calculation"), ns_from_boot value is only evaluated in schedule_resp() for polled requests. However, ns_from_boot is also required for hrtimer support for when ndelay is less than INCLUSIVE_TIMING_MAX_NS, so fix up the logic to decide when to evaluate ns_from_boot. Fixes: 771f712ba5b0 ("scsi: scsi_debug: Fix cmd duration calculation") Signed-off-by: John Garry Link: https://lore.kernel.org/r/20241202130045.2335194-1-john.g.garry@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index b52513eeeafa7..680ba180a6725 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -6447,7 +6447,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, } sd_dp = &sqcp->sd_dp; - if (polled) + if (polled || (ndelay > 0 && ndelay < INCLUSIVE_TIMING_MAX_NS)) ns_from_boot = ktime_get_boottime_ns(); /* one of the resp_*() response functions is called here */ From ca4b2c4607433033e9c4f4659f809af4261d8992 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Fri, 15 Nov 2024 13:15:50 +0100 Subject: [PATCH 272/366] fs/smb/client: avoid querying SMB2_OP_QUERY_WSL_EA for SMB3 POSIX Avoid extra roundtrip Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Ralph Boehme Signed-off-by: Steve French --- fs/smb/client/smb2inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index a188908914fe8..a55f0044d30bd 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -943,7 +943,8 @@ int smb2_query_path_info(const unsigned int xid, if (rc || !data->reparse_point) goto out; - cmds[num_cmds++] = SMB2_OP_QUERY_WSL_EA; + if (!tcon->posix_extensions) + cmds[num_cmds++] = SMB2_OP_QUERY_WSL_EA; /* * Skip SMB2_OP_GET_REPARSE if symlink already parsed in create * response. From c9a4b55431e5220347881e148725bed69c84e037 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 12 Nov 2024 21:07:00 -0500 Subject: [PATCH 273/366] x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR implementation Under some conditions, MONITOR wakeups on Lunar Lake processors can be lost, resulting in significant user-visible delays. Add Lunar Lake to X86_BUG_MONITOR so that wake_up_idle_cpu() always sends an IPI, avoiding this potential delay. Reported originally here: https://bugzilla.kernel.org/show_bug.cgi?id=219364 [ dhansen: tweak subject ] Signed-off-by: Len Brown Signed-off-by: Dave Hansen Reviewed-by: Rafael J. Wysocki Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/a4aa8842a3c3bfdb7fe9807710eef159cbf0e705.1731463305.git.len.brown%40intel.com --- arch/x86/kernel/cpu/intel.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d1de300af1737..8ded9f859a3a9 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -555,7 +555,9 @@ static void init_intel(struct cpuinfo_x86 *c) c->x86_vfm == INTEL_WESTMERE_EX)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); - if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT) + if (boot_cpu_has(X86_FEATURE_MWAIT) && + (c->x86_vfm == INTEL_ATOM_GOLDMONT || + c->x86_vfm == INTEL_LUNARLAKE_M)) set_cpu_bug(c, X86_BUG_MONITOR); #ifdef CONFIG_X86_64 From 7ffc7481153bbabf3332c6a19b289730c7e1edf5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Dec 2024 00:04:49 +0100 Subject: [PATCH 274/366] netfilter: nft_set_hash: skip duplicated elements pending gc run rhashtable does not provide stable walk, duplicated elements are possible in case of resizing. I considered that checking for errors when calling rhashtable_walk_next() was sufficient to detect the resizing. However, rhashtable_walk_next() returns -EAGAIN only at the end of the iteration, which is too late, because a gc work containing duplicated elements could have been already scheduled for removal to the worker. Add a u32 gc worker sequence number per set, bump it on every workqueue run. Annotate gc worker sequence number on the expired element. Use it to skip those already seen in this gc workqueue run. Note that this new field is never reset in case gc transaction fails, so next gc worker run on the expired element overrides it. Wraparound of gc worker sequence number should not be an issue with stale gc worker sequence number in the element, that would just postpone the element removal in one gc run. Note that it is not possible to use flags to annotate that element is pending gc run to detect duplicates, given that gc transaction can be invalidated in case of update from the control plane, therefore, not allowing to clear such flag. On x86_64, pahole reports no changes in the size of nft_rhash_elem. Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Reported-by: Laurent Fasnacht Tested-by: Laurent Fasnacht Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 65bd291318f2a..8bfac4185ac79 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -24,11 +24,13 @@ struct nft_rhash { struct rhashtable ht; struct delayed_work gc_work; + u32 wq_gc_seq; }; struct nft_rhash_elem { struct nft_elem_priv priv; struct rhash_head node; + u32 wq_gc_seq; struct nft_set_ext ext; }; @@ -338,6 +340,10 @@ static void nft_rhash_gc(struct work_struct *work) if (!gc) goto done; + /* Elements never collected use a zero gc worker sequence number. */ + if (unlikely(++priv->wq_gc_seq == 0)) + priv->wq_gc_seq++; + rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); @@ -355,6 +361,14 @@ static void nft_rhash_gc(struct work_struct *work) goto try_later; } + /* rhashtable walk is unstable, already seen in this gc run? + * Then, skip this element. In case of (unlikely) sequence + * wraparound and stale element wq_gc_seq, next gc run will + * just find this expired element. + */ + if (he->wq_gc_seq == priv->wq_gc_seq) + continue; + if (nft_set_elem_is_dead(&he->ext)) goto dead_elem; @@ -371,6 +385,8 @@ static void nft_rhash_gc(struct work_struct *work) if (!gc) goto try_later; + /* annotate gc sequence for this attempt. */ + he->wq_gc_seq = priv->wq_gc_seq; nft_trans_gc_elem_add(gc, he); } From 6a832bc8bbb22350f7ffe6ecb2d36f261bb96023 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Fri, 15 Nov 2024 19:21:04 +0100 Subject: [PATCH 275/366] fs/smb/client: Implement new SMB3 POSIX type Fixes special files against current Samba. On the Samba server: insgesamt 20 131958 brw-r--r-- 1 root root 0, 0 15. Nov 12:04 blockdev 131965 crw-r--r-- 1 root root 1, 1 15. Nov 12:04 chardev 131966 prw-r--r-- 1 samba samba 0 15. Nov 12:05 fifo 131953 -rw-rwxrw-+ 2 samba samba 4 18. Nov 11:37 file 131953 -rw-rwxrw-+ 2 samba samba 4 18. Nov 11:37 hardlink 131957 lrwxrwxrwx 1 samba samba 4 15. Nov 12:03 symlink -> file 131954 -rwxrwxr-x+ 1 samba samba 0 18. Nov 15:28 symlinkoversmb Before: ls: cannot access '/mnt/smb3unix/posix/blockdev': No data available ls: cannot access '/mnt/smb3unix/posix/chardev': No data available ls: cannot access '/mnt/smb3unix/posix/symlinkoversmb': No data available ls: cannot access '/mnt/smb3unix/posix/fifo': No data available ls: cannot access '/mnt/smb3unix/posix/symlink': No data available total 16 ? -????????? ? ? ? ? ? blockdev ? -????????? ? ? ? ? ? chardev ? -????????? ? ? ? ? ? fifo 131953 -rw-rwxrw- 2 root samba 4 Nov 18 11:37 file 131953 -rw-rwxrw- 2 root samba 4 Nov 18 11:37 hardlink ? -????????? ? ? ? ? ? symlink ? -????????? ? ? ? ? ? symlinkoversmb After: insgesamt 21 131958 brw-r--r-- 1 root root 0, 0 15. Nov 12:04 blockdev 131965 crw-r--r-- 1 root root 1, 1 15. Nov 12:04 chardev 131966 prw-r--r-- 1 root samba 0 15. Nov 12:05 fifo 131953 -rw-rwxrw- 2 root samba 4 18. Nov 11:37 file 131953 -rw-rwxrw- 2 root samba 4 18. Nov 11:37 hardlink 131957 lrwxrwxrwx 1 root samba 4 15. Nov 12:03 symlink -> file 131954 lrwxrwxr-x 1 root samba 23 18. Nov 15:28 symlinkoversmb -> mnt/smb3unix/posix/file Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Ralph Boehme Signed-off-by: Steve French --- fs/smb/client/cifsproto.h | 1 + fs/smb/client/inode.c | 89 +++++++++++++++++++++++++++++++++++---- fs/smb/client/readdir.c | 35 +++++++-------- fs/smb/client/reparse.c | 84 ++++++++++++++++++++++-------------- 4 files changed, 149 insertions(+), 60 deletions(-) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index bbaaf16af20fd..90e3297ea8479 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -669,6 +669,7 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode, int cifs_sfu_make_node(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev); +umode_t wire_mode_to_posix(u32 wire); #ifdef CONFIG_CIFS_DFS_UPCALL static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 42c030687918d..ef913d65c67f2 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -746,6 +746,84 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, #endif } +#define POSIX_TYPE_FILE 0 +#define POSIX_TYPE_DIR 1 +#define POSIX_TYPE_SYMLINK 2 +#define POSIX_TYPE_CHARDEV 3 +#define POSIX_TYPE_BLKDEV 4 +#define POSIX_TYPE_FIFO 5 +#define POSIX_TYPE_SOCKET 6 + +#define POSIX_X_OTH 0000001 +#define POSIX_W_OTH 0000002 +#define POSIX_R_OTH 0000004 +#define POSIX_X_GRP 0000010 +#define POSIX_W_GRP 0000020 +#define POSIX_R_GRP 0000040 +#define POSIX_X_USR 0000100 +#define POSIX_W_USR 0000200 +#define POSIX_R_USR 0000400 +#define POSIX_STICKY 0001000 +#define POSIX_SET_GID 0002000 +#define POSIX_SET_UID 0004000 + +#define POSIX_OTH_MASK 0000007 +#define POSIX_GRP_MASK 0000070 +#define POSIX_USR_MASK 0000700 +#define POSIX_PERM_MASK 0000777 +#define POSIX_FILETYPE_MASK 0070000 + +#define POSIX_FILETYPE_SHIFT 12 + +static u32 wire_perms_to_posix(u32 wire) +{ + u32 mode = 0; + + mode |= (wire & POSIX_X_OTH) ? S_IXOTH : 0; + mode |= (wire & POSIX_W_OTH) ? S_IWOTH : 0; + mode |= (wire & POSIX_R_OTH) ? S_IROTH : 0; + mode |= (wire & POSIX_X_GRP) ? S_IXGRP : 0; + mode |= (wire & POSIX_W_GRP) ? S_IWGRP : 0; + mode |= (wire & POSIX_R_GRP) ? S_IRGRP : 0; + mode |= (wire & POSIX_X_USR) ? S_IXUSR : 0; + mode |= (wire & POSIX_W_USR) ? S_IWUSR : 0; + mode |= (wire & POSIX_R_USR) ? S_IRUSR : 0; + mode |= (wire & POSIX_STICKY) ? S_ISVTX : 0; + mode |= (wire & POSIX_SET_GID) ? S_ISGID : 0; + mode |= (wire & POSIX_SET_UID) ? S_ISUID : 0; + + return mode; +} + +static u32 posix_filetypes[] = { + S_IFREG, + S_IFDIR, + S_IFLNK, + S_IFCHR, + S_IFBLK, + S_IFIFO, + S_IFSOCK +}; + +static u32 wire_filetype_to_posix(u32 wire_type) +{ + if (wire_type >= ARRAY_SIZE(posix_filetypes)) { + pr_warn("Unexpected type %u", wire_type); + return 0; + } + return posix_filetypes[wire_type]; +} + +umode_t wire_mode_to_posix(u32 wire) +{ + u32 wire_type; + u32 mode; + + wire_type = (wire & POSIX_FILETYPE_MASK) >> POSIX_FILETYPE_SHIFT; + mode = (wire_perms_to_posix(wire) | wire_filetype_to_posix(wire_type)); + return (umode_t)mode; +} + /* Fill a cifs_fattr struct with info from POSIX info struct */ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct cifs_open_info_data *data, @@ -782,20 +860,13 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, fattr->cf_bytes = le64_to_cpu(info->AllocationSize); fattr->cf_createtime = le64_to_cpu(info->CreationTime); fattr->cf_nlink = le32_to_cpu(info->HardLinks); - fattr->cf_mode = (umode_t) le32_to_cpu(info->Mode); + fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode)); if (cifs_open_data_reparse(data) && cifs_reparse_point_to_fattr(cifs_sb, fattr, data)) goto out_reparse; - fattr->cf_mode &= ~S_IFMT; - if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { - fattr->cf_mode |= S_IFDIR; - fattr->cf_dtype = DT_DIR; - } else { /* file */ - fattr->cf_mode |= S_IFREG; - fattr->cf_dtype = DT_REG; - } + fattr->cf_dtype = S_DT(fattr->cf_mode); out_reparse: if (S_ISLNK(fattr->cf_mode)) { diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index b3a8f9c6fcff6..f878bcdbd3198 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -241,31 +241,28 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info, fattr->cf_nlink = le32_to_cpu(info->HardLinks); fattr->cf_cifsattrs = le32_to_cpu(info->DosAttributes); - /* - * Since we set the inode type below we need to mask off - * to avoid strange results if bits set above. - * XXX: why not make server&client use the type bits? - */ - fattr->cf_mode = le32_to_cpu(info->Mode) & ~S_IFMT; + if (fattr->cf_cifsattrs & ATTR_REPARSE) + fattr->cf_cifstag = le32_to_cpu(info->ReparseTag); + + /* The Mode field in the response can now include the file type as well */ + fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode)); + fattr->cf_dtype = S_DT(le32_to_cpu(info->Mode)); + + switch (fattr->cf_mode & S_IFMT) { + case S_IFLNK: + case S_IFBLK: + case S_IFCHR: + fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; + break; + default: + break; + } cifs_dbg(FYI, "posix fattr: dev %d, reparse %d, mode %o\n", le32_to_cpu(info->DeviceId), le32_to_cpu(info->ReparseTag), le32_to_cpu(info->Mode)); - if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { - fattr->cf_mode |= S_IFDIR; - fattr->cf_dtype = DT_DIR; - } else { - /* - * mark anything that is not a dir as regular - * file. special files should have the REPARSE - * attribute and will be marked as needing revaluation - */ - fattr->cf_mode |= S_IFREG; - fattr->cf_dtype = DT_REG; - } - sid_to_id(cifs_sb, &parsed.owner, fattr, SIDOWNER); sid_to_id(cifs_sb, &parsed.group, fattr, SIDGROUP); } diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index e81d2d78ddb7c..50b1aba200089 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -803,44 +803,60 @@ static void wsl_to_fattr(struct cifs_open_info_data *data, fattr->cf_dtype = S_DT(fattr->cf_mode); } -bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, - struct cifs_fattr *fattr, - struct cifs_open_info_data *data) +static bool posix_reparse_to_fattr(struct cifs_sb_info *cifs_sb, + struct cifs_fattr *fattr, + struct cifs_open_info_data *data) { struct reparse_posix_data *buf = data->reparse.posix; - u32 tag = data->reparse.tag; - if (tag == IO_REPARSE_TAG_NFS && buf) { - if (le16_to_cpu(buf->ReparseDataLength) < sizeof(buf->InodeType)) + + if (buf == NULL) + return true; + + if (le16_to_cpu(buf->ReparseDataLength) < sizeof(buf->InodeType)) { + WARN_ON_ONCE(1); + return false; + } + + switch (le64_to_cpu(buf->InodeType)) { + case NFS_SPECFILE_CHR: + if (le16_to_cpu(buf->ReparseDataLength) != sizeof(buf->InodeType) + 8) { + WARN_ON_ONCE(1); return false; - switch (le64_to_cpu(buf->InodeType)) { - case NFS_SPECFILE_CHR: - if (le16_to_cpu(buf->ReparseDataLength) != sizeof(buf->InodeType) + 8) - return false; - fattr->cf_mode |= S_IFCHR; - fattr->cf_rdev = reparse_mkdev(buf->DataBuffer); - break; - case NFS_SPECFILE_BLK: - if (le16_to_cpu(buf->ReparseDataLength) != sizeof(buf->InodeType) + 8) - return false; - fattr->cf_mode |= S_IFBLK; - fattr->cf_rdev = reparse_mkdev(buf->DataBuffer); - break; - case NFS_SPECFILE_FIFO: - fattr->cf_mode |= S_IFIFO; - break; - case NFS_SPECFILE_SOCK: - fattr->cf_mode |= S_IFSOCK; - break; - case NFS_SPECFILE_LNK: - fattr->cf_mode |= S_IFLNK; - break; - default: + } + fattr->cf_mode |= S_IFCHR; + fattr->cf_rdev = reparse_mkdev(buf->DataBuffer); + break; + case NFS_SPECFILE_BLK: + if (le16_to_cpu(buf->ReparseDataLength) != sizeof(buf->InodeType) + 8) { WARN_ON_ONCE(1); return false; } - goto out; + fattr->cf_mode |= S_IFBLK; + fattr->cf_rdev = reparse_mkdev(buf->DataBuffer); + break; + case NFS_SPECFILE_FIFO: + fattr->cf_mode |= S_IFIFO; + break; + case NFS_SPECFILE_SOCK: + fattr->cf_mode |= S_IFSOCK; + break; + case NFS_SPECFILE_LNK: + fattr->cf_mode |= S_IFLNK; + break; + default: + WARN_ON_ONCE(1); + return false; } + return true; +} + +bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, + struct cifs_fattr *fattr, + struct cifs_open_info_data *data) +{ + u32 tag = data->reparse.tag; + bool ok; switch (tag) { case IO_REPARSE_TAG_INTERNAL: @@ -860,15 +876,19 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, case IO_REPARSE_TAG_LX_BLK: wsl_to_fattr(data, cifs_sb, tag, fattr); break; + case IO_REPARSE_TAG_NFS: + ok = posix_reparse_to_fattr(cifs_sb, fattr, data); + if (!ok) + return false; + break; case 0: /* SMB1 symlink */ case IO_REPARSE_TAG_SYMLINK: - case IO_REPARSE_TAG_NFS: fattr->cf_mode |= S_IFLNK; break; default: return false; } -out: + fattr->cf_dtype = S_DT(fattr->cf_mode); return true; } From 8cb0bc5436351de8a11eef13b7367d64cc0d6c68 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Mon, 25 Nov 2024 16:19:56 +0100 Subject: [PATCH 276/366] fs/smb/client: cifs_prime_dcache() for SMB3 POSIX reparse points Spares an extra revalidation request Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Ralph Boehme Signed-off-by: Steve French --- fs/smb/client/readdir.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index f878bcdbd3198..b906bf78af1ec 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -71,6 +71,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, struct inode *inode; struct super_block *sb = parent->d_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions; + bool reparse_need_reval = false; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); int rc; @@ -85,7 +87,21 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, * this spares us an invalidation. */ retry: - if ((fattr->cf_cifsattrs & ATTR_REPARSE) || + if (posix) { + switch (fattr->cf_mode & S_IFMT) { + case S_IFLNK: + case S_IFBLK: + case S_IFCHR: + reparse_need_reval = true; + break; + default: + break; + } + } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { + reparse_need_reval = true; + } + + if (reparse_need_reval || (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)) return; From 06a025448b572c3bd78dd23a31488a0907cd9512 Mon Sep 17 00:00:00 2001 From: Norbert Szetei Date: Sat, 30 Nov 2024 16:56:14 +0100 Subject: [PATCH 277/366] ksmbd: align aux_payload_buf to avoid OOB reads in cryptographic operations The aux_payload_buf allocation in SMB2 read is performed without ensuring alignment, which could result in out-of-bounds (OOB) reads during cryptographic operations such as crypto_xor or ghash. This patch aligns the allocation of aux_payload_buf to prevent these issues. (Note that to add this patch to stable would require modifications due to recent patch "ksmbd: use __GFP_RETRY_MAYFAIL") Signed-off-by: Norbert Szetei Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 23879555880fc..4f539eeadbb0d 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6680,7 +6680,7 @@ int smb2_read(struct ksmbd_work *work) ksmbd_debug(SMB, "filename %pD, offset %lld, len %zu\n", fp->filp, offset, length); - aux_payload_buf = kvzalloc(length, KSMBD_DEFAULT_GFP); + aux_payload_buf = kvzalloc(ALIGN(length, 8), KSMBD_DEFAULT_GFP); if (!aux_payload_buf) { err = -ENOMEM; goto out; From 50b94204446e1215af081fd713d7d566d9258e35 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 3 Dec 2024 10:48:15 +0100 Subject: [PATCH 278/366] ipmr: tune the ipmr_can_free_table() checks. Eric reported a syzkaller-triggered splat caused by recent ipmr changes: WARNING: CPU: 2 PID: 6041 at net/ipv6/ip6mr.c:419 ip6mr_free_table+0xbd/0x120 net/ipv6/ip6mr.c:419 Modules linked in: CPU: 2 UID: 0 PID: 6041 Comm: syz-executor183 Not tainted 6.12.0-syzkaller-10681-g65ae975e97d5 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:ip6mr_free_table+0xbd/0x120 net/ipv6/ip6mr.c:419 Code: 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 75 58 49 83 bc 24 c0 0e 00 00 00 74 09 e8 44 ef a9 f7 90 <0f> 0b 90 e8 3b ef a9 f7 48 8d 7b 38 e8 12 a3 96 f7 48 89 df be 0f RSP: 0018:ffffc90004267bd8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff88803c710000 RCX: ffffffff89e4d844 RDX: ffff88803c52c880 RSI: ffffffff89e4d87c RDI: ffff88803c578ec0 RBP: 0000000000000001 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff88803c578000 R13: ffff88803c710000 R14: ffff88803c710008 R15: dead000000000100 FS: 00007f7a855ee6c0(0000) GS:ffff88806a800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7a85689938 CR3: 000000003c492000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip6mr_rules_exit+0x176/0x2d0 net/ipv6/ip6mr.c:283 ip6mr_net_exit_batch+0x53/0xa0 net/ipv6/ip6mr.c:1388 ops_exit_list+0x128/0x180 net/core/net_namespace.c:177 setup_net+0x4fe/0x860 net/core/net_namespace.c:394 copy_net_ns+0x2b4/0x6b0 net/core/net_namespace.c:500 create_new_namespaces+0x3ea/0xad0 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xc0/0x1f0 kernel/nsproxy.c:228 ksys_unshare+0x45d/0xa40 kernel/fork.c:3334 __do_sys_unshare kernel/fork.c:3405 [inline] __se_sys_unshare kernel/fork.c:3403 [inline] __x64_sys_unshare+0x31/0x40 kernel/fork.c:3403 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f7a856332d9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f7a855ee238 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 00007f7a856bd308 RCX: 00007f7a856332d9 RDX: 00007f7a8560f8c6 RSI: 0000000000000000 RDI: 0000000062040200 RBP: 00007f7a856bd300 R08: 00007fff932160a7 R09: 00007f7a855ee6c0 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f7a856bd30c R13: 0000000000000000 R14: 00007fff93215fc0 R15: 00007fff932160a8 The root cause is a network namespace creation failing after successful initialization of the ipmr subsystem. Such a case is not currently matched by the ipmr_can_free_table() helper. New namespaces are zeroed on allocation and inserted into net ns list only after successful creation; when deleting an ipmr table, the list next pointer can be NULL only on netns initialization failure. Update the ipmr_can_free_table() checks leveraging such condition. Reported-by: Eric Dumazet Reported-by: syzbot+6e8cb445d4b43d006e0c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6e8cb445d4b43d006e0c Fixes: 11b6e701bce9 ("ipmr: add debug check for mr table cleanup") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/8bde975e21bbca9d9c27e36209b2dd4f1d7a3f00.1733212078.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/net/net_namespace.h | 5 +++++ net/ipv4/ipmr.c | 2 +- net/ipv6/ip6mr.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 873c0f9fdac66..f943bd79ef038 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -325,6 +325,11 @@ static inline int check_net(const struct net *net) #define net_drop_ns NULL #endif +/* Returns true if the netns initialization is completed successfully */ +static inline bool net_initialized(const struct net *net) +{ + return READ_ONCE(net->list.next); +} static inline void __netns_tracker_alloc(struct net *net, netns_tracker *tracker, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c5b8ec5c0a8c0..99d8faa508e53 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -122,7 +122,7 @@ static void ipmr_expire_process(struct timer_list *t); static bool ipmr_can_free_table(struct net *net) { - return !check_net(net) || !net->ipv4.mr_rules_ops; + return !check_net(net) || !net_initialized(net); } static struct mr_table *ipmr_mr_table_iter(struct net *net, diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7f1902ac35862..578ff1336afef 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -110,7 +110,7 @@ static void ipmr_expire_process(struct timer_list *t); static bool ip6mr_can_free_table(struct net *net) { - return !check_net(net) || !net->ipv6.mr6_rules_ops; + return !check_net(net) || !net_initialized(net); } static struct mr_table *ip6mr_mr_table_iter(struct net *net, From 910c4788d6155b2202ec88273376cd7ecdc24f0a Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 2 Dec 2024 16:33:57 +0100 Subject: [PATCH 279/366] ethtool: Fix wrong mod state in case of verbose and no_mask bitset A bitset without mask in a _SET request means we want exactly the bits in the bitset to be set. This works correctly for compact format but when verbose format is parsed, ethnl_update_bitset32_verbose() only sets the bits present in the request bitset but does not clear the rest. The commit 6699170376ab ("ethtool: fix application of verbose no_mask bitset") fixes this issue by clearing the whole target bitmap before we start iterating. The solution proposed brought an issue with the behavior of the mod variable. As the bitset is always cleared the old value will always differ to the new value. Fix it by adding a new function to compare bitmaps and a temporary variable which save the state of the old bitmap. Fixes: 6699170376ab ("ethtool: fix application of verbose no_mask bitset") Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20241202153358.1142095-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/bitset.c | 48 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index 0515d6604b3b9..f0883357d12e5 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -425,12 +425,32 @@ static int ethnl_parse_bit(unsigned int *index, bool *val, unsigned int nbits, return 0; } +/** + * ethnl_bitmap32_equal() - Compare two bitmaps + * @map1: first bitmap + * @map2: second bitmap + * @nbits: bit size to compare + * + * Return: true if first @nbits are equal, false if not + */ +static bool ethnl_bitmap32_equal(const u32 *map1, const u32 *map2, + unsigned int nbits) +{ + if (memcmp(map1, map2, nbits / 32 * sizeof(u32))) + return false; + if (nbits % 32 == 0) + return true; + return !((map1[nbits / 32] ^ map2[nbits / 32]) & + ethnl_lower_bits(nbits % 32)); +} + static int ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, const struct nlattr *attr, struct nlattr **tb, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { + u32 *saved_bitmap = NULL; struct nlattr *bit_attr; bool no_mask; int rem; @@ -448,8 +468,20 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, } no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; - if (no_mask) - ethnl_bitmap32_clear(bitmap, 0, nbits, mod); + if (no_mask) { + unsigned int nwords = DIV_ROUND_UP(nbits, 32); + unsigned int nbytes = nwords * sizeof(u32); + bool dummy; + + /* The bitmap size is only the size of the map part without + * its mask part. + */ + saved_bitmap = kcalloc(nwords, sizeof(u32), GFP_KERNEL); + if (!saved_bitmap) + return -ENOMEM; + memcpy(saved_bitmap, bitmap, nbytes); + ethnl_bitmap32_clear(bitmap, 0, nbits, &dummy); + } nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { bool old_val, new_val; @@ -458,22 +490,30 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, if (nla_type(bit_attr) != ETHTOOL_A_BITSET_BITS_BIT) { NL_SET_ERR_MSG_ATTR(extack, bit_attr, "only ETHTOOL_A_BITSET_BITS_BIT allowed in ETHTOOL_A_BITSET_BITS"); + kfree(saved_bitmap); return -EINVAL; } ret = ethnl_parse_bit(&idx, &new_val, nbits, bit_attr, no_mask, names, extack); - if (ret < 0) + if (ret < 0) { + kfree(saved_bitmap); return ret; + } old_val = bitmap[idx / 32] & ((u32)1 << (idx % 32)); if (new_val != old_val) { if (new_val) bitmap[idx / 32] |= ((u32)1 << (idx % 32)); else bitmap[idx / 32] &= ~((u32)1 << (idx % 32)); - *mod = true; + if (!no_mask) + *mod = true; } } + if (no_mask && !ethnl_bitmap32_equal(saved_bitmap, bitmap, nbits)) + *mod = true; + + kfree(saved_bitmap); return 0; } From 217bbf156f93ada86b91617489e7ba8a0904233c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 3 Dec 2024 16:16:05 +0100 Subject: [PATCH 280/366] mlxsw: spectrum_acl_flex_keys: Use correct key block on Spectrum-4 The driver is currently using an ACL key block that is not supported by Spectrum-4. This works because the driver is only using a single field from this key block which is located in the same offset in the equivalent Spectrum-4 key block. The issue was discovered when the firmware started rejecting the use of the unsupported key block. The change has been reverted to avoid breaking users that only update their firmware. Nonetheless, fix the issue by using the correct key block. Fixes: 07ff135958dd ("mlxsw: Introduce flex key elements for Spectrum-4") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Link: https://patch.msgid.link/35e72c97bdd3bc414fb8e4d747e5fb5d26c29658.1733237440.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c index 6fe185ea6732c..1850a975b3804 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -324,6 +324,10 @@ static const struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 9, -1, true), /* RX_ACL_SYSTEM_PORT */ }; +static const struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_1b[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x04, 4), +}; + static const struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_5b[] = { MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER, 0x04, 20, 12), }; @@ -341,7 +345,7 @@ static const struct mlxsw_afk_block mlxsw_sp4_afk_blocks[] = { MLXSW_AFK_BLOCK(0x14, mlxsw_sp_afk_element_info_mac_4), MLXSW_AFK_BLOCK_HIGH_ENTROPY(0x1A, mlxsw_sp_afk_element_info_mac_5b), MLXSW_AFK_BLOCK_HIGH_ENTROPY(0x38, mlxsw_sp_afk_element_info_ipv4_0), - MLXSW_AFK_BLOCK_HIGH_ENTROPY(0x39, mlxsw_sp_afk_element_info_ipv4_1), + MLXSW_AFK_BLOCK_HIGH_ENTROPY(0x3F, mlxsw_sp_afk_element_info_ipv4_1b), MLXSW_AFK_BLOCK(0x3A, mlxsw_sp_afk_element_info_ipv4_2), MLXSW_AFK_BLOCK(0x36, mlxsw_sp_afk_element_info_ipv4_5b), MLXSW_AFK_BLOCK(0x40, mlxsw_sp_afk_element_info_ipv6_0), From 8588c99c7d47448fcae39e3227d6e2bb97aad86d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Dec 2024 18:21:21 +0000 Subject: [PATCH 281/366] geneve: do not assume mac header is set in geneve_xmit_skb() We should not assume mac header is set in output path. Use skb_eth_hdr() instead of eth_hdr() to fix the issue. sysbot reported the following : WARNING: CPU: 0 PID: 11635 at include/linux/skbuff.h:3052 skb_mac_header include/linux/skbuff.h:3052 [inline] WARNING: CPU: 0 PID: 11635 at include/linux/skbuff.h:3052 eth_hdr include/linux/if_ether.h:24 [inline] WARNING: CPU: 0 PID: 11635 at include/linux/skbuff.h:3052 geneve_xmit_skb drivers/net/geneve.c:898 [inline] WARNING: CPU: 0 PID: 11635 at include/linux/skbuff.h:3052 geneve_xmit+0x4c38/0x5730 drivers/net/geneve.c:1039 Modules linked in: CPU: 0 UID: 0 PID: 11635 Comm: syz.4.1423 Not tainted 6.12.0-syzkaller-10296-gaaf20f870da0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:skb_mac_header include/linux/skbuff.h:3052 [inline] RIP: 0010:eth_hdr include/linux/if_ether.h:24 [inline] RIP: 0010:geneve_xmit_skb drivers/net/geneve.c:898 [inline] RIP: 0010:geneve_xmit+0x4c38/0x5730 drivers/net/geneve.c:1039 Code: 21 c6 02 e9 35 d4 ff ff e8 a5 48 4c fb 90 0f 0b 90 e9 fd f5 ff ff e8 97 48 4c fb 90 0f 0b 90 e9 d8 f5 ff ff e8 89 48 4c fb 90 <0f> 0b 90 e9 41 e4 ff ff e8 7b 48 4c fb 90 0f 0b 90 e9 cd e7 ff ff RSP: 0018:ffffc90003b2f870 EFLAGS: 00010283 RAX: 000000000000037a RBX: 000000000000ffff RCX: ffffc9000dc3d000 RDX: 0000000000080000 RSI: ffffffff86428417 RDI: 0000000000000003 RBP: ffffc90003b2f9f0 R08: 0000000000000003 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000002 R12: ffff88806603c000 R13: 0000000000000000 R14: ffff8880685b2780 R15: 0000000000000e23 FS: 00007fdc2deed6c0(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b30a1dff8 CR3: 0000000056b8c000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __netdev_start_xmit include/linux/netdevice.h:5002 [inline] netdev_start_xmit include/linux/netdevice.h:5011 [inline] __dev_direct_xmit+0x58a/0x720 net/core/dev.c:4490 dev_direct_xmit include/linux/netdevice.h:3181 [inline] packet_xmit+0x1e4/0x360 net/packet/af_packet.c:285 packet_snd net/packet/af_packet.c:3146 [inline] packet_sendmsg+0x2700/0x5660 net/packet/af_packet.c:3178 sock_sendmsg_nosec net/socket.c:711 [inline] __sock_sendmsg net/socket.c:726 [inline] __sys_sendto+0x488/0x4f0 net/socket.c:2197 __do_sys_sendto net/socket.c:2204 [inline] __se_sys_sendto net/socket.c:2200 [inline] __x64_sys_sendto+0xe0/0x1c0 net/socket.c:2200 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: a025fb5f49ad ("geneve: Allow configuration of DF behaviour") Reported-by: syzbot+3ec5271486d7cb2d242a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/674f4b72.050a0220.17bd51.004a.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Stefano Brivio Link: https://patch.msgid.link/20241203182122.2725517-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/geneve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 2f29b1386b1c8..bc658bc608854 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -895,7 +895,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (geneve->cfg.df == GENEVE_DF_SET) { df = htons(IP_DF); } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { - struct ethhdr *eth = eth_hdr(skb); + struct ethhdr *eth = skb_eth_hdr(skb); if (ntohs(eth->h_proto) == ETH_P_IPV6) { df = htons(IP_DF); From 5883a3e0babf55d85422fddec3422f211c853f6e Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 3 Dec 2024 20:10:20 -0800 Subject: [PATCH 282/366] bnxt_en: refactor tpa_info alloc/free into helpers Refactor bnxt_rx_ring_info->tpa_info operations into helpers that work on a single tpa_info in prep for queue API using them. There are 2 pairs of operations: * bnxt_alloc_one_tpa_info() * bnxt_free_one_tpa_info() These alloc/free the tpa_info array itself. * bnxt_alloc_one_tpa_info_data() * bnxt_free_one_tpa_info_data() These alloc/free the frags stored in tpa_info array. Reviewed-by: Somnath Kotur Signed-off-by: David Wei Reviewed-by: Michael Chan Link: https://patch.msgid.link/20241204041022.56512-2-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 142 ++++++++++++++-------- 1 file changed, 90 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4ec4934a4eddd..b85f22a4d1c36 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3421,15 +3421,11 @@ static void bnxt_free_one_rx_agg_ring(struct bnxt *bp, struct bnxt_rx_ring_info } } -static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) +static void bnxt_free_one_tpa_info_data(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr) { - struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr]; - struct bnxt_tpa_idx_map *map; int i; - if (!rxr->rx_tpa) - goto skip_rx_tpa_free; - for (i = 0; i < bp->max_tpa; i++) { struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[i]; u8 *data = tpa_info->data; @@ -3440,6 +3436,17 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) tpa_info->data = NULL; page_pool_free_va(rxr->head_pool, data, false); } +} + +static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr) +{ + struct bnxt_tpa_idx_map *map; + + if (!rxr->rx_tpa) + goto skip_rx_tpa_free; + + bnxt_free_one_tpa_info_data(bp, rxr); skip_rx_tpa_free: if (!rxr->rx_buf_ring) @@ -3467,7 +3474,7 @@ static void bnxt_free_rx_skbs(struct bnxt *bp) return; for (i = 0; i < bp->rx_nr_rings; i++) - bnxt_free_one_rx_ring_skbs(bp, i); + bnxt_free_one_rx_ring_skbs(bp, &bp->rx_ring[i]); } static void bnxt_free_skbs(struct bnxt *bp) @@ -3608,29 +3615,64 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem) return 0; } +static void bnxt_free_one_tpa_info(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr) +{ + int i; + + kfree(rxr->rx_tpa_idx_map); + rxr->rx_tpa_idx_map = NULL; + if (rxr->rx_tpa) { + for (i = 0; i < bp->max_tpa; i++) { + kfree(rxr->rx_tpa[i].agg_arr); + rxr->rx_tpa[i].agg_arr = NULL; + } + } + kfree(rxr->rx_tpa); + rxr->rx_tpa = NULL; +} + static void bnxt_free_tpa_info(struct bnxt *bp) { - int i, j; + int i; for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; - kfree(rxr->rx_tpa_idx_map); - rxr->rx_tpa_idx_map = NULL; - if (rxr->rx_tpa) { - for (j = 0; j < bp->max_tpa; j++) { - kfree(rxr->rx_tpa[j].agg_arr); - rxr->rx_tpa[j].agg_arr = NULL; - } - } - kfree(rxr->rx_tpa); - rxr->rx_tpa = NULL; + bnxt_free_one_tpa_info(bp, rxr); } } +static int bnxt_alloc_one_tpa_info(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr) +{ + struct rx_agg_cmp *agg; + int i; + + rxr->rx_tpa = kcalloc(bp->max_tpa, sizeof(struct bnxt_tpa_info), + GFP_KERNEL); + if (!rxr->rx_tpa) + return -ENOMEM; + + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) + return 0; + for (i = 0; i < bp->max_tpa; i++) { + agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL); + if (!agg) + return -ENOMEM; + rxr->rx_tpa[i].agg_arr = agg; + } + rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map), + GFP_KERNEL); + if (!rxr->rx_tpa_idx_map) + return -ENOMEM; + + return 0; +} + static int bnxt_alloc_tpa_info(struct bnxt *bp) { - int i, j; + int i, rc; bp->max_tpa = MAX_TPA; if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { @@ -3641,25 +3683,10 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp) for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; - struct rx_agg_cmp *agg; - - rxr->rx_tpa = kcalloc(bp->max_tpa, sizeof(struct bnxt_tpa_info), - GFP_KERNEL); - if (!rxr->rx_tpa) - return -ENOMEM; - if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) - continue; - for (j = 0; j < bp->max_tpa; j++) { - agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL); - if (!agg) - return -ENOMEM; - rxr->rx_tpa[j].agg_arr = agg; - } - rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map), - GFP_KERNEL); - if (!rxr->rx_tpa_idx_map) - return -ENOMEM; + rc = bnxt_alloc_one_tpa_info(bp, rxr); + if (rc) + return rc; } return 0; } @@ -4268,10 +4295,31 @@ static void bnxt_alloc_one_rx_ring_page(struct bnxt *bp, rxr->rx_agg_prod = prod; } +static int bnxt_alloc_one_tpa_info_data(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr) +{ + dma_addr_t mapping; + u8 *data; + int i; + + for (i = 0; i < bp->max_tpa; i++) { + data = __bnxt_alloc_rx_frag(bp, &mapping, rxr, + GFP_KERNEL); + if (!data) + return -ENOMEM; + + rxr->rx_tpa[i].data = data; + rxr->rx_tpa[i].data_ptr = data + bp->rx_offset; + rxr->rx_tpa[i].mapping = mapping; + } + + return 0; +} + static int bnxt_alloc_one_rx_ring(struct bnxt *bp, int ring_nr) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr]; - int i; + int rc; bnxt_alloc_one_rx_ring_skb(bp, rxr, ring_nr); @@ -4281,19 +4329,9 @@ static int bnxt_alloc_one_rx_ring(struct bnxt *bp, int ring_nr) bnxt_alloc_one_rx_ring_page(bp, rxr, ring_nr); if (rxr->rx_tpa) { - dma_addr_t mapping; - u8 *data; - - for (i = 0; i < bp->max_tpa; i++) { - data = __bnxt_alloc_rx_frag(bp, &mapping, rxr, - GFP_KERNEL); - if (!data) - return -ENOMEM; - - rxr->rx_tpa[i].data = data; - rxr->rx_tpa[i].data_ptr = data + bp->rx_offset; - rxr->rx_tpa[i].mapping = mapping; - } + rc = bnxt_alloc_one_tpa_info_data(bp, rxr); + if (rc) + return rc; } return 0; } @@ -13663,7 +13701,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) bnxt_reset_task(bp, true); break; } - bnxt_free_one_rx_ring_skbs(bp, i); + bnxt_free_one_rx_ring_skbs(bp, rxr); rxr->rx_prod = 0; rxr->rx_agg_prod = 0; rxr->rx_sw_agg_prod = 0; From bf1782d70ddd9f23911824e41df8d9b5244f2f30 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 3 Dec 2024 20:10:21 -0800 Subject: [PATCH 283/366] bnxt_en: refactor bnxt_alloc_rx_rings() to call bnxt_alloc_rx_agg_bmap() Refactor bnxt_alloc_rx_rings() to call bnxt_alloc_rx_agg_bmap() for allocating rx_agg_bmap. Reviewed-by: Somnath Kotur Signed-off-by: David Wei Reviewed-by: Michael Chan Link: https://patch.msgid.link/20241204041022.56512-3-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 36 ++++++++++------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b85f22a4d1c36..8031ff31f8377 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3764,6 +3764,19 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, return PTR_ERR(pool); } +static int bnxt_alloc_rx_agg_bmap(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) +{ + u16 mem_size; + + rxr->rx_agg_bmap_size = bp->rx_agg_ring_mask + 1; + mem_size = rxr->rx_agg_bmap_size / 8; + rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL); + if (!rxr->rx_agg_bmap) + return -ENOMEM; + + return 0; +} + static int bnxt_alloc_rx_rings(struct bnxt *bp) { int numa_node = dev_to_node(&bp->pdev->dev); @@ -3808,19 +3821,15 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp) ring->grp_idx = i; if (agg_rings) { - u16 mem_size; - ring = &rxr->rx_agg_ring_struct; rc = bnxt_alloc_ring(bp, &ring->ring_mem); if (rc) return rc; ring->grp_idx = i; - rxr->rx_agg_bmap_size = bp->rx_agg_ring_mask + 1; - mem_size = rxr->rx_agg_bmap_size / 8; - rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL); - if (!rxr->rx_agg_bmap) - return -ENOMEM; + rc = bnxt_alloc_rx_agg_bmap(bp, rxr); + if (rc) + return rc; } } if (bp->flags & BNXT_FLAG_TPA) @@ -15331,19 +15340,6 @@ static const struct netdev_stat_ops bnxt_stat_ops = { .get_base_stats = bnxt_get_base_stats, }; -static int bnxt_alloc_rx_agg_bmap(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) -{ - u16 mem_size; - - rxr->rx_agg_bmap_size = bp->rx_agg_ring_mask + 1; - mem_size = rxr->rx_agg_bmap_size / 8; - rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL); - if (!rxr->rx_agg_bmap) - return -ENOMEM; - - return 0; -} - static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) { struct bnxt_rx_ring_info *rxr, *clone; From bd649c5cc958169b8a8a3e77ea926d92d472b02a Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 3 Dec 2024 20:10:22 -0800 Subject: [PATCH 284/366] bnxt_en: handle tpa_info in queue API implementation Commit 7ed816be35ab ("eth: bnxt: use page pool for head frags") added a page pool for header frags, which may be distinct from the existing pool for the aggregation ring. Prior to this change, frags used in the TPA ring rx_tpa were allocated from system memory e.g. napi_alloc_frag() meaning their lifetimes were not associated with a page pool. They can be returned at any time and so the queue API did not alloc or free rx_tpa. But now frags come from a separate head_pool which may be different to page_pool. Without allocating and freeing rx_tpa, frags allocated from the old head_pool may be returned to a different new head_pool which causes a mismatch between the pp hold/release count. Fix this problem by properly freeing and allocating rx_tpa in the queue API implementation. Signed-off-by: David Wei Reviewed-by: Michael Chan Link: https://patch.msgid.link/20241204041022.56512-4-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 27 +++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8031ff31f8377..6b963086c1d3b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3710,7 +3710,7 @@ static void bnxt_free_rx_rings(struct bnxt *bp) xdp_rxq_info_unreg(&rxr->xdp_rxq); page_pool_destroy(rxr->page_pool); - if (rxr->page_pool != rxr->head_pool) + if (bnxt_separate_head_pool()) page_pool_destroy(rxr->head_pool); rxr->page_pool = rxr->head_pool = NULL; @@ -15388,15 +15388,25 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) goto err_free_rx_agg_ring; } + if (bp->flags & BNXT_FLAG_TPA) { + rc = bnxt_alloc_one_tpa_info(bp, clone); + if (rc) + goto err_free_tpa_info; + } + bnxt_init_one_rx_ring_rxbd(bp, clone); bnxt_init_one_rx_agg_ring_rxbd(bp, clone); bnxt_alloc_one_rx_ring_skb(bp, clone, idx); if (bp->flags & BNXT_FLAG_AGG_RINGS) bnxt_alloc_one_rx_ring_page(bp, clone, idx); + if (bp->flags & BNXT_FLAG_TPA) + bnxt_alloc_one_tpa_info_data(bp, clone); return 0; +err_free_tpa_info: + bnxt_free_one_tpa_info(bp, clone); err_free_rx_agg_ring: bnxt_free_ring(bp, &clone->rx_agg_ring_struct.ring_mem); err_free_rx_ring: @@ -15404,9 +15414,11 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) err_rxq_info_unreg: xdp_rxq_info_unreg(&clone->xdp_rxq); err_page_pool_destroy: - clone->page_pool->p.napi = NULL; page_pool_destroy(clone->page_pool); + if (bnxt_separate_head_pool()) + page_pool_destroy(clone->head_pool); clone->page_pool = NULL; + clone->head_pool = NULL; return rc; } @@ -15416,13 +15428,15 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem) struct bnxt *bp = netdev_priv(dev); struct bnxt_ring_struct *ring; - bnxt_free_one_rx_ring(bp, rxr); - bnxt_free_one_rx_agg_ring(bp, rxr); + bnxt_free_one_rx_ring_skbs(bp, rxr); xdp_rxq_info_unreg(&rxr->xdp_rxq); page_pool_destroy(rxr->page_pool); + if (bnxt_separate_head_pool()) + page_pool_destroy(rxr->head_pool); rxr->page_pool = NULL; + rxr->head_pool = NULL; ring = &rxr->rx_ring_struct; bnxt_free_ring(bp, &ring->ring_mem); @@ -15504,7 +15518,10 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) rxr->rx_agg_prod = clone->rx_agg_prod; rxr->rx_sw_agg_prod = clone->rx_sw_agg_prod; rxr->rx_next_cons = clone->rx_next_cons; + rxr->rx_tpa = clone->rx_tpa; + rxr->rx_tpa_idx_map = clone->rx_tpa_idx_map; rxr->page_pool = clone->page_pool; + rxr->head_pool = clone->head_pool; rxr->xdp_rxq = clone->xdp_rxq; bnxt_copy_rx_ring(bp, rxr, clone); @@ -15563,6 +15580,8 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) bnxt_hwrm_rx_agg_ring_free(bp, rxr, false); rxr->rx_next_cons = 0; page_pool_disable_direct_recycling(rxr->page_pool); + if (bnxt_separate_head_pool()) + page_pool_disable_direct_recycling(rxr->head_pool); memcpy(qmem, rxr, sizeof(*rxr)); bnxt_init_rx_ring_struct(bp, qmem); From 530b69a26952c95ffc9e6dcd1cff6f249032780a Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 3 Dec 2024 22:49:15 +0200 Subject: [PATCH 285/366] net/mlx5: HWS: Fix memory leak in mlx5hws_definer_calc_layout It allocates a match template, which creates a compressed definer fc struct, but that is not deallocated. This commit fixes that. Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling") Signed-off-by: Cosmin Ratiu Reviewed-by: Yevgeny Kliteynik Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241203204920.232744-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index c00010ca86bdc..9fb059a6511f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -39,6 +39,8 @@ bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, } else { mlx5hws_err(ctx, "Failed to calculate matcher definer layout\n"); } + } else { + kfree(mt->fc); } mlx5hws_match_template_destroy(mt); From 10e0f0c018d5ce5ba4f349875c67f99c3253b5c3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 3 Dec 2024 22:49:16 +0200 Subject: [PATCH 286/366] net/mlx5: HWS: Properly set bwc queue locks lock classes The mentioned "Fixes" patch forgot to do that. Fixes: 9addffa34359 ("net/mlx5: HWS, use lock classes for bwc locks") Signed-off-by: Cosmin Ratiu Reviewed-by: Yevgeny Kliteynik Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241203204920.232744-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index 424797b6d8020..883b4ed30892c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -990,6 +990,7 @@ static int hws_bwc_send_queues_init(struct mlx5hws_context *ctx) for (i = 0; i < bwc_queues; i++) { mutex_init(&ctx->bwc_send_queue_locks[i]); lockdep_register_key(ctx->bwc_lock_class_keys + i); + lockdep_set_class(ctx->bwc_send_queue_locks + i, ctx->bwc_lock_class_keys + i); } return 0; From 5f9b2bf019b7fb58e883dca7522c606c52612ea4 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 3 Dec 2024 22:49:17 +0200 Subject: [PATCH 287/366] net/mlx5: E-Switch, Fix switching to switchdev mode with IB device disabled In case that IB device is already disabled when moving to switchdev mode, which can happen when working with LAG, need to do rescan_drivers() before leaving in order to add ethernet representor auxiliary device. Fixes: ab85ebf43723 ("net/mlx5: E-switch, refactor eswitch mode change") Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241203204920.232744-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d6ff2dc4c19e9..5213d5b2cad58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2338,6 +2338,7 @@ static void esw_mode_change(struct mlx5_eswitch *esw, u16 mode) if (esw->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) { esw->mode = mode; + mlx5_rescan_drivers_locked(esw->dev); mlx5_devcom_comp_unlock(esw->dev->priv.hca_devcom_comp); return; } From d04c81a3e3ced59f68018a7906c82e421bf748a5 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 3 Dec 2024 22:49:18 +0200 Subject: [PATCH 288/366] net/mlx5: E-Switch, Fix switching to switchdev mode in MPV Fix the mentioned commit change for MPV mode, since in MPV mode the IB device is shared between different core devices, so under this change when moving both devices simultaneously to switchdev mode the IB device removal and re-addition can race with itself causing unexpected behavior. In such case do rescan_drivers() only once in order to add the ethernet representor auxiliary device, and skip adding and removing IB devices. Fixes: ab85ebf43723 ("net/mlx5: E-switch, refactor eswitch mode change") Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241203204920.232744-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 5213d5b2cad58..d5b42b3a19fdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2335,8 +2335,8 @@ static int esw_create_restore_table(struct mlx5_eswitch *esw) static void esw_mode_change(struct mlx5_eswitch *esw, u16 mode) { mlx5_devcom_comp_lock(esw->dev->priv.hca_devcom_comp); - - if (esw->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) { + if (esw->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV || + mlx5_core_mp_enabled(esw->dev)) { esw->mode = mode; mlx5_rescan_drivers_locked(esw->dev); mlx5_devcom_comp_unlock(esw->dev->priv.hca_devcom_comp); From 31f114c3d158dacbeda33f2afb0ca41f4ec6c9f9 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 3 Dec 2024 22:49:19 +0200 Subject: [PATCH 289/366] net/mlx5e: SD, Use correct mdev to build channel param In a multi-PF netdev, each traffic channel creates its own resources against a specific PF. In the cited commit, where this support was added, the channel_param logic was mistakenly kept unchanged, so it always used the primary PF which is found at priv->mdev. In this patch we fix this by moving the logic to be per-channel, and passing the correct mdev instance. This bug happened to be usually harmless, as the resulting cparam structures would be the same for all channels, due to identical FW logic and decisions. However, in some use cases, like fwreset, this gets broken. This could lead to different symptoms. Example: Error cqe on cqn 0x428, ci 0x0, qn 0x10a9, opcode 0xe, syndrome 0x4, vendor syndrome 0x32 Fixes: e4f9686bdee7 ("net/mlx5e: Let channels be SD-aware") Signed-off-by: Tariq Toukan Reviewed-by: Lama Kayal Reviewed-by: Gal Pressman Link: https://patch.msgid.link/20241203204920.232744-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d0b80b520397f..dd16d73000c31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2680,11 +2680,11 @@ void mlx5e_trigger_napi_sched(struct napi_struct *napi) static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, - struct mlx5e_channel_param *cparam, struct xsk_buff_pool *xsk_pool, struct mlx5e_channel **cp) { struct net_device *netdev = priv->netdev; + struct mlx5e_channel_param *cparam; struct mlx5_core_dev *mdev; struct mlx5e_xsk_param xsk; struct mlx5e_channel *c; @@ -2706,8 +2706,15 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, return err; c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); - if (!c) - return -ENOMEM; + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!c || !cparam) { + err = -ENOMEM; + goto err_free; + } + + err = mlx5e_build_channel_param(mdev, params, cparam); + if (err) + goto err_free; c->priv = priv; c->mdev = mdev; @@ -2741,6 +2748,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, *cp = c; + kvfree(cparam); return 0; err_close_queues: @@ -2749,6 +2757,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, err_napi_del: netif_napi_del(&c->napi); +err_free: + kvfree(cparam); kvfree(c); return err; @@ -2807,20 +2817,14 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { - struct mlx5e_channel_param *cparam; int err = -ENOMEM; int i; chs->num = chs->params.num_channels; chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); - cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); - if (!chs->c || !cparam) - goto err_free; - - err = mlx5e_build_channel_param(priv->mdev, &chs->params, cparam); - if (err) - goto err_free; + if (!chs->c) + goto err_out; for (i = 0; i < chs->num; i++) { struct xsk_buff_pool *xsk_pool = NULL; @@ -2828,7 +2832,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, if (chs->params.xdp_prog) xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i); - err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]); + err = mlx5e_open_channel(priv, i, &chs->params, xsk_pool, &chs->c[i]); if (err) goto err_close_channels; } @@ -2846,7 +2850,6 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, } mlx5e_health_channels_update(priv); - kvfree(cparam); return 0; err_close_ptp: @@ -2857,9 +2860,8 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, for (i--; i >= 0; i--) mlx5e_close_channel(chs->c[i]); -err_free: kfree(chs->c); - kvfree(cparam); +err_out: chs->num = 0; return err; } From 5085f861b414e4a51ce28a891dfa32a10a54b64e Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 3 Dec 2024 22:49:20 +0200 Subject: [PATCH 290/366] net/mlx5e: Remove workaround to avoid syndrome for internal port Previously a workaround was added to avoid syndrome 0xcdb051. It is triggered when offload a rule with tunnel encapsulation, and forwarding to another table, but not matching on the internal port in firmware steering mode. The original workaround skips internal tunnel port logic, which is not correct as not all cases are considered. As an example, if vlan is configured on the uplink port, traffic can't pass because vlan header is not added with this workaround. Besides, there is no such issue for software steering. So, this patch removes that, and returns error directly if trying to offload such rule for firmware steering. Fixes: 06b4eac9c4be ("net/mlx5e: Don't offload internal port if filter device is out device") Signed-off-by: Jianbo Liu Tested-by: Frode Nordahl Reviewed-by: Chris Mi Reviewed-by: Ariel Levkovich Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241203204920.232744-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 878cbdbf5ec8b..e7e01f3298efb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -5,6 +5,7 @@ #include #include #include "tc_tun_encap.h" +#include "fs_core.h" #include "en_tc.h" #include "tc_tun.h" #include "rep/tc.h" @@ -24,10 +25,18 @@ static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); - if (!route_dev || !netif_is_ovs_master(route_dev) || - attr->parse_attr->filter_dev == e->out_dev) + if (!route_dev || !netif_is_ovs_master(route_dev)) goto out; + if (priv->mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS && + mlx5e_eswitch_uplink_rep(attr->parse_attr->filter_dev) && + (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) { + mlx5_core_warn(priv->mdev, + "Matching on external port with encap + fwd to table actions is not allowed for firmware steering\n"); + err = -EINVAL; + goto out; + } + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, MLX5E_TC_INT_PORT_EGRESS, &attr->action, out_index); From d9381508ea2b590aff46d28d432d20bfef1ba64c Mon Sep 17 00:00:00 2001 From: Yafang shao Date: Thu, 5 Dec 2024 10:11:00 +0800 Subject: [PATCH 291/366] audit: workaround a GCC bug triggered by task comm changes A build failure has been reported with the following details: In file included from include/linux/string.h:390, from include/linux/bitmap.h:13, from include/linux/cpumask.h:12, from include/linux/smp.h:13, from include/linux/lockdep.h:14, from include/linux/spinlock.h:63, from include/linux/wait.h:9, from include/linux/wait_bit.h:8, from include/linux/fs.h:6, from kernel/auditsc.c:37: In function 'sized_strscpy', inlined from '__audit_ptrace' at kernel/auditsc.c:2732:2: >> include/linux/fortify-string.h:293:17: error: call to '__write_overflow' declared with attribute error: detected write beyond size of object (1st parameter) 293 | __write_overflow(); | ^~~~~~~~~~~~~~~~~~ In function 'sized_strscpy', inlined from 'audit_signal_info_syscall' at kernel/auditsc.c:2759:3: >> include/linux/fortify-string.h:293:17: error: call to '__write_overflow' declared with attribute error: detected write beyond size of object (1st parameter) 293 | __write_overflow(); | ^~~~~~~~~~~~~~~~~~ The issue appears to be a GCC bug, though the root cause remains unclear at this time. For now, let's implement a workaround. A bug report has also been filed with GCC [0]. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117912 [0] Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410171420.1V00ICVG-lkp@intel.com/ Reported-by: Steven Rostedt (Google) Closes: https://lore.kernel.org/all/20241128182435.57a1ea6f@gandalf.local.home/ Reported-by: Zhuo, Qiuxu Closes: https://lore.kernel.org/all/CY8PR11MB71348E568DBDA576F17DAFF389362@CY8PR11MB7134.namprd11.prod.outlook.com/ Originally-by: Kees Cook Link: https://lore.kernel.org/linux-hardening/202410171059.C2C395030@keescook/ Signed-off-by: Yafang shao Tested-by: Steven Rostedt (Google) Tested-by: Yafang Shao [PM: subject tweak, description line wrapping] Signed-off-by: Paul Moore --- kernel/auditsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 279ba5c420a49..561d96affe9f5 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2728,8 +2728,8 @@ void __audit_ptrace(struct task_struct *t) context->target_auid = audit_get_loginuid(t); context->target_uid = task_uid(t); context->target_sessionid = audit_get_sessionid(t); - security_task_getlsmprop_obj(t, &context->target_ref); strscpy(context->target_comm, t->comm); + security_task_getlsmprop_obj(t, &context->target_ref); } /** @@ -2755,8 +2755,8 @@ int audit_signal_info_syscall(struct task_struct *t) ctx->target_auid = audit_get_loginuid(t); ctx->target_uid = t_uid; ctx->target_sessionid = audit_get_sessionid(t); - security_task_getlsmprop_obj(t, &ctx->target_ref); strscpy(ctx->target_comm, t->comm); + security_task_getlsmprop_obj(t, &ctx->target_ref); return 0; } From 7ce1c0921a806ee7d4bb24f74a3b30c89fc5fb39 Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Tue, 3 Dec 2024 09:06:54 -0600 Subject: [PATCH 292/366] vsock/test: fix failures due to wrong SO_RCVLOWAT parameter This happens on 64-bit big-endian machines. SO_RCVLOWAT requires an int parameter. However, instead of int, the test uses unsigned long in one place and size_t in another. Both are 8 bytes long on 64-bit machines. The kernel, having received the 8 bytes, doesn't test for the exact size of the parameter, it only cares that it's >= sizeof(int), and casts the 4 lower-addressed bytes to an int, which, on a big-endian machine, contains 0. 0 doesn't trigger an error, SO_RCVLOWAT returns with success and the socket stays with the default SO_RCVLOWAT = 1, which results in vsock_test failures, while vsock_perf doesn't even notice that it's failed to change it. Fixes: b1346338fbae ("vsock_test: POLLIN + SO_RCVLOWAT test") Fixes: 542e893fbadc ("vsock/test: two tests to check credit update logic") Fixes: 8abbffd27ced ("test/vsock: vsock_perf utility") Signed-off-by: Konstantin Shkolnyy Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- tools/testing/vsock/vsock_perf.c | 6 +++--- tools/testing/vsock/vsock_test.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c index 4e8578f815e08..22633c2848cc7 100644 --- a/tools/testing/vsock/vsock_perf.c +++ b/tools/testing/vsock/vsock_perf.c @@ -133,7 +133,7 @@ static float get_gbps(unsigned long bits, time_t ns_delta) ((float)ns_delta / NSEC_PER_SEC); } -static void run_receiver(unsigned long rcvlowat_bytes) +static void run_receiver(int rcvlowat_bytes) { unsigned int read_cnt; time_t rx_begin_ns; @@ -163,7 +163,7 @@ static void run_receiver(unsigned long rcvlowat_bytes) printf("Listen port %u\n", port); printf("RX buffer %lu bytes\n", buf_size_bytes); printf("vsock buffer %lu bytes\n", vsock_buf_bytes); - printf("SO_RCVLOWAT %lu bytes\n", rcvlowat_bytes); + printf("SO_RCVLOWAT %d bytes\n", rcvlowat_bytes); fd = socket(AF_VSOCK, SOCK_STREAM, 0); @@ -439,7 +439,7 @@ static long strtolx(const char *arg) int main(int argc, char **argv) { unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES; - unsigned long rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; + int rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; int peer_cid = -1; bool sender = false; diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 8d38dbf8f41f0..7fd25b814b4b6 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -835,7 +835,7 @@ static void test_stream_poll_rcvlowat_server(const struct test_opts *opts) static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) { - unsigned long lowat_val = RCVLOWAT_BUF_SIZE; + int lowat_val = RCVLOWAT_BUF_SIZE; char buf[RCVLOWAT_BUF_SIZE]; struct pollfd fds; short poll_flags; @@ -1357,7 +1357,7 @@ static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opt static void test_stream_credit_update_test(const struct test_opts *opts, bool low_rx_bytes_test) { - size_t recv_buf_size; + int recv_buf_size; struct pollfd fds; size_t buf_size; void *buf; From 3f36ee29e732b68044d531f47d31f22d265954c6 Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Tue, 3 Dec 2024 09:06:55 -0600 Subject: [PATCH 293/366] vsock/test: fix parameter types in SO_VM_SOCKETS_* calls Change parameters of SO_VM_SOCKETS_* to unsigned long long as documented in the vm_sockets.h, because the corresponding kernel code requires them to be at least 64-bit, no matter what architecture. Otherwise they are too small on 32-bit machines. Fixes: 5c338112e48a ("test/vsock: rework message bounds test") Fixes: 685a21c314a8 ("test/vsock: add big message test") Fixes: 542e893fbadc ("vsock/test: two tests to check credit update logic") Fixes: 8abbffd27ced ("test/vsock: vsock_perf utility") Signed-off-by: Konstantin Shkolnyy Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- tools/testing/vsock/vsock_perf.c | 4 ++-- tools/testing/vsock/vsock_test.c | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c index 22633c2848cc7..8e0a6c0770d37 100644 --- a/tools/testing/vsock/vsock_perf.c +++ b/tools/testing/vsock/vsock_perf.c @@ -33,7 +33,7 @@ static unsigned int port = DEFAULT_PORT; static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; -static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; +static unsigned long long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; static bool zerocopy; static void error(const char *s) @@ -162,7 +162,7 @@ static void run_receiver(int rcvlowat_bytes) printf("Run as receiver\n"); printf("Listen port %u\n", port); printf("RX buffer %lu bytes\n", buf_size_bytes); - printf("vsock buffer %lu bytes\n", vsock_buf_bytes); + printf("vsock buffer %llu bytes\n", vsock_buf_bytes); printf("SO_RCVLOWAT %d bytes\n", rcvlowat_bytes); fd = socket(AF_VSOCK, SOCK_STREAM, 0); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 7fd25b814b4b6..0b7f5bf546da5 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -429,7 +429,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) { - unsigned long sock_buf_size; + unsigned long long sock_buf_size; unsigned long remote_hash; unsigned long curr_hash; int fd; @@ -634,7 +634,8 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts) static void test_seqpacket_bigmsg_client(const struct test_opts *opts) { - unsigned long sock_buf_size; + unsigned long long sock_buf_size; + size_t buf_size; socklen_t len; void *data; int fd; @@ -655,13 +656,20 @@ static void test_seqpacket_bigmsg_client(const struct test_opts *opts) sock_buf_size++; - data = malloc(sock_buf_size); + /* size_t can be < unsigned long long */ + buf_size = (size_t)sock_buf_size; + if (buf_size != sock_buf_size) { + fprintf(stderr, "Returned BUFFER_SIZE too large\n"); + exit(EXIT_FAILURE); + } + + data = malloc(buf_size); if (!data) { perror("malloc"); exit(EXIT_FAILURE); } - send_buf(fd, data, sock_buf_size, 0, -EMSGSIZE); + send_buf(fd, data, buf_size, 0, -EMSGSIZE); control_writeln("CLISENT"); @@ -1360,6 +1368,7 @@ static void test_stream_credit_update_test(const struct test_opts *opts, int recv_buf_size; struct pollfd fds; size_t buf_size; + unsigned long long sock_buf_size; void *buf; int fd; @@ -1371,8 +1380,11 @@ static void test_stream_credit_update_test(const struct test_opts *opts, buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE; + /* size_t can be < unsigned long long */ + sock_buf_size = buf_size; + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, - &buf_size, sizeof(buf_size))) { + &sock_buf_size, sizeof(sock_buf_size))) { perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); exit(EXIT_FAILURE); } From 86814d8ffd55fd4ad19c512eccd721522a370fb2 Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Tue, 3 Dec 2024 09:06:56 -0600 Subject: [PATCH 294/366] vsock/test: verify socket options after setting them Replace setsockopt() calls with calls to functions that follow setsockopt() with getsockopt() and check that the returned value and its size are the same as have been set. (Except in vsock_perf.) Signed-off-by: Konstantin Shkolnyy Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- tools/testing/vsock/control.c | 9 +- tools/testing/vsock/msg_zerocopy_common.c | 10 -- tools/testing/vsock/msg_zerocopy_common.h | 1 - tools/testing/vsock/util.c | 142 ++++++++++++++++++++++ tools/testing/vsock/util.h | 7 ++ tools/testing/vsock/vsock_perf.c | 10 ++ tools/testing/vsock/vsock_test.c | 51 +++----- tools/testing/vsock/vsock_test_zerocopy.c | 2 +- tools/testing/vsock/vsock_uring_test.c | 2 +- 9 files changed, 181 insertions(+), 53 deletions(-) diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c index d2deb4b15b943..0066e0324d35c 100644 --- a/tools/testing/vsock/control.c +++ b/tools/testing/vsock/control.c @@ -27,6 +27,7 @@ #include "timeout.h" #include "control.h" +#include "util.h" static int control_fd = -1; @@ -50,7 +51,6 @@ void control_init(const char *control_host, for (ai = result; ai; ai = ai->ai_next) { int fd; - int val = 1; fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (fd < 0) @@ -65,11 +65,8 @@ void control_init(const char *control_host, break; } - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &val, sizeof(val)) < 0) { - perror("setsockopt"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_REUSEADDR, 1, + "setsockopt SO_REUSEADDR"); if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0) goto next; diff --git a/tools/testing/vsock/msg_zerocopy_common.c b/tools/testing/vsock/msg_zerocopy_common.c index 5a4bdf7b51328..8622e5a0f8b77 100644 --- a/tools/testing/vsock/msg_zerocopy_common.c +++ b/tools/testing/vsock/msg_zerocopy_common.c @@ -14,16 +14,6 @@ #include "msg_zerocopy_common.h" -void enable_so_zerocopy(int fd) -{ - int val = 1; - - if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { - perror("setsockopt"); - exit(EXIT_FAILURE); - } -} - void vsock_recv_completion(int fd, const bool *zerocopied) { struct sock_extended_err *serr; diff --git a/tools/testing/vsock/msg_zerocopy_common.h b/tools/testing/vsock/msg_zerocopy_common.h index 3763c5ccedb95..ad14139e93ca3 100644 --- a/tools/testing/vsock/msg_zerocopy_common.h +++ b/tools/testing/vsock/msg_zerocopy_common.h @@ -12,7 +12,6 @@ #define VSOCK_RECVERR 1 #endif -void enable_so_zerocopy(int fd); void vsock_recv_completion(int fd, const bool *zerocopied); #endif /* MSG_ZEROCOPY_COMMON_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index a3d448a075e3a..34e9dac0a105f 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -651,3 +651,145 @@ void free_test_iovec(const struct iovec *test_iovec, free(iovec); } + +/* Set "unsigned long long" socket option and check that it's indeed set */ +void setsockopt_ull_check(int fd, int level, int optname, + unsigned long long val, char const *errmsg) +{ + unsigned long long chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + chkval = ~val; /* just make storage != val */ + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (chkval != val) { + fprintf(stderr, "value mismatch: set %llu got %llu\n", val, + chkval); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %llu\n", errmsg, val); + exit(EXIT_FAILURE); +; +} + +/* Set "int" socket option and check that it's indeed set */ +void setsockopt_int_check(int fd, int level, int optname, int val, + char const *errmsg) +{ + int chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + chkval = ~val; /* just make storage != val */ + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (chkval != val) { + fprintf(stderr, "value mismatch: set %d got %d\n", val, chkval); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %d\n", errmsg, val); + exit(EXIT_FAILURE); +} + +static void mem_invert(unsigned char *mem, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + mem[i] = ~mem[i]; +} + +/* Set "timeval" socket option and check that it's indeed set */ +void setsockopt_timeval_check(int fd, int level, int optname, + struct timeval val, char const *errmsg) +{ + struct timeval chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + /* just make storage != val */ + chkval = val; + mem_invert((unsigned char *)&chkval, sizeof(chkval)); + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (memcmp(&chkval, &val, sizeof(val)) != 0) { + fprintf(stderr, "value mismatch: set %ld:%ld got %ld:%ld\n", + val.tv_sec, val.tv_usec, chkval.tv_sec, chkval.tv_usec); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %ld:%ld\n", errmsg, val.tv_sec, val.tv_usec); + exit(EXIT_FAILURE); +} + +void enable_so_zerocopy_check(int fd) +{ + setsockopt_int_check(fd, SOL_SOCKET, SO_ZEROCOPY, 1, + "setsockopt SO_ZEROCOPY"); +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index fff22d4a14c0f..ba84d296d8b71 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -68,4 +68,11 @@ unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum); struct iovec *alloc_test_iovec(const struct iovec *test_iovec, int iovnum); void free_test_iovec(const struct iovec *test_iovec, struct iovec *iovec, int iovnum); +void setsockopt_ull_check(int fd, int level, int optname, + unsigned long long val, char const *errmsg); +void setsockopt_int_check(int fd, int level, int optname, int val, + char const *errmsg); +void setsockopt_timeval_check(int fd, int level, int optname, + struct timeval val, char const *errmsg); +void enable_so_zerocopy_check(int fd); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c index 8e0a6c0770d37..75971ac708c9a 100644 --- a/tools/testing/vsock/vsock_perf.c +++ b/tools/testing/vsock/vsock_perf.c @@ -251,6 +251,16 @@ static void run_receiver(int rcvlowat_bytes) close(fd); } +static void enable_so_zerocopy(int fd) +{ + int val = 1; + + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } +} + static void run_sender(int peer_cid, unsigned long to_send_bytes) { time_t tx_begin_ns; diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 0b7f5bf546da5..48f17641ca504 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -444,17 +444,13 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) sock_buf_size = SOCK_BUF_SIZE; - if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, - &sock_buf_size, sizeof(sock_buf_size))) { - perror("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); - exit(EXIT_FAILURE); - } + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); - if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, - &sock_buf_size, sizeof(sock_buf_size))) { - perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); - exit(EXIT_FAILURE); - } + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); /* Ready to receive data. */ control_writeln("SRVREADY"); @@ -586,10 +582,8 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) tv.tv_sec = RCVTIMEO_TIMEOUT_SEC; tv.tv_usec = 0; - if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv)) == -1) { - perror("setsockopt(SO_RCVTIMEO)"); - exit(EXIT_FAILURE); - } + setsockopt_timeval_check(fd, SOL_SOCKET, SO_RCVTIMEO, tv, + "setsockopt(SO_RCVTIMEO)"); read_enter_ns = current_nsec(); @@ -855,11 +849,8 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) exit(EXIT_FAILURE); } - if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, - &lowat_val, sizeof(lowat_val))) { - perror("setsockopt(SO_RCVLOWAT)"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + lowat_val, "setsockopt(SO_RCVLOWAT)"); control_expectln("SRVSENT"); @@ -1383,11 +1374,9 @@ static void test_stream_credit_update_test(const struct test_opts *opts, /* size_t can be < unsigned long long */ sock_buf_size = buf_size; - if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, - &sock_buf_size, sizeof(sock_buf_size))) { - perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); - exit(EXIT_FAILURE); - } + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); if (low_rx_bytes_test) { /* Set new SO_RCVLOWAT here. This enables sending credit @@ -1396,11 +1385,8 @@ static void test_stream_credit_update_test(const struct test_opts *opts, */ recv_buf_size = 1 + VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; - if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, - &recv_buf_size, sizeof(recv_buf_size))) { - perror("setsockopt(SO_RCVLOWAT)"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + recv_buf_size, "setsockopt(SO_RCVLOWAT)"); } /* Send one dummy byte here, because 'setsockopt()' above also @@ -1442,11 +1428,8 @@ static void test_stream_credit_update_test(const struct test_opts *opts, recv_buf_size++; /* Updating SO_RCVLOWAT will send credit update. */ - if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, - &recv_buf_size, sizeof(recv_buf_size))) { - perror("setsockopt(SO_RCVLOWAT)"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + recv_buf_size, "setsockopt(SO_RCVLOWAT)"); } fds.fd = fd; diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c index 04c376b6937f5..9d9a6cb9614ad 100644 --- a/tools/testing/vsock/vsock_test_zerocopy.c +++ b/tools/testing/vsock/vsock_test_zerocopy.c @@ -162,7 +162,7 @@ static void test_client(const struct test_opts *opts, } if (test_data->so_zerocopy) - enable_so_zerocopy(fd); + enable_so_zerocopy_check(fd); iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); diff --git a/tools/testing/vsock/vsock_uring_test.c b/tools/testing/vsock/vsock_uring_test.c index 6c3e6f70c457d..5c3078969659f 100644 --- a/tools/testing/vsock/vsock_uring_test.c +++ b/tools/testing/vsock/vsock_uring_test.c @@ -73,7 +73,7 @@ static void vsock_io_uring_client(const struct test_opts *opts, } if (msg_zerocopy) - enable_so_zerocopy(fd); + enable_so_zerocopy_check(fd); iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); From 750e51603395e755537da08f745864c93e3ce741 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Dec 2024 17:09:33 +0000 Subject: [PATCH 295/366] net: avoid potential UAF in default_operstate() syzbot reported an UAF in default_operstate() [1] Issue is a race between device and netns dismantles. After calling __rtnl_unlock() from netdev_run_todo(), we can not assume the netns of each device is still alive. Make sure the device is not in NETREG_UNREGISTERED state, and add an ASSERT_RTNL() before the call to __dev_get_by_index(). We might move this ASSERT_RTNL() in __dev_get_by_index() in the future. [1] BUG: KASAN: slab-use-after-free in __dev_get_by_index+0x5d/0x110 net/core/dev.c:852 Read of size 8 at addr ffff888043eba1b0 by task syz.0.0/5339 CPU: 0 UID: 0 PID: 5339 Comm: syz.0.0 Not tainted 6.12.0-syzkaller-10296-gaaf20f870da0 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0x169/0x550 mm/kasan/report.c:489 kasan_report+0x143/0x180 mm/kasan/report.c:602 __dev_get_by_index+0x5d/0x110 net/core/dev.c:852 default_operstate net/core/link_watch.c:51 [inline] rfc2863_policy+0x224/0x300 net/core/link_watch.c:67 linkwatch_do_dev+0x3e/0x170 net/core/link_watch.c:170 netdev_run_todo+0x461/0x1000 net/core/dev.c:10894 rtnl_unlock net/core/rtnetlink.c:152 [inline] rtnl_net_unlock include/linux/rtnetlink.h:133 [inline] rtnl_dellink+0x760/0x8d0 net/core/rtnetlink.c:3520 rtnetlink_rcv_msg+0x791/0xcf0 net/core/rtnetlink.c:6911 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2541 netlink_unicast_kernel net/netlink/af_netlink.c:1321 [inline] netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1347 netlink_sendmsg+0x8e4/0xcb0 net/netlink/af_netlink.c:1891 sock_sendmsg_nosec net/socket.c:711 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:726 ____sys_sendmsg+0x52a/0x7e0 net/socket.c:2583 ___sys_sendmsg net/socket.c:2637 [inline] __sys_sendmsg+0x269/0x350 net/socket.c:2669 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f2a3cb80809 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f2a3d9cd058 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f2a3cd45fa0 RCX: 00007f2a3cb80809 RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000008 RBP: 00007f2a3cbf393e R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f2a3cd45fa0 R15: 00007ffd03bc65c8 Allocated by task 5339: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x243/0x390 mm/slub.c:4314 kmalloc_noprof include/linux/slab.h:901 [inline] kmalloc_array_noprof include/linux/slab.h:945 [inline] netdev_create_hash net/core/dev.c:11870 [inline] netdev_init+0x10c/0x250 net/core/dev.c:11890 ops_init+0x31e/0x590 net/core/net_namespace.c:138 setup_net+0x287/0x9e0 net/core/net_namespace.c:362 copy_net_ns+0x33f/0x570 net/core/net_namespace.c:500 create_new_namespaces+0x425/0x7b0 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0x124/0x180 kernel/nsproxy.c:228 ksys_unshare+0x57d/0xa70 kernel/fork.c:3314 __do_sys_unshare kernel/fork.c:3385 [inline] __se_sys_unshare kernel/fork.c:3383 [inline] __x64_sys_unshare+0x38/0x40 kernel/fork.c:3383 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 12: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:582 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x59/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2338 [inline] slab_free mm/slub.c:4598 [inline] kfree+0x196/0x420 mm/slub.c:4746 netdev_exit+0x65/0xd0 net/core/dev.c:11992 ops_exit_list net/core/net_namespace.c:172 [inline] cleanup_net+0x802/0xcc0 net/core/net_namespace.c:632 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa63/0x1850 kernel/workqueue.c:3310 worker_thread+0x870/0xd30 kernel/workqueue.c:3391 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 The buggy address belongs to the object at ffff888043eba000 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 432 bytes inside of freed 2048-byte region [ffff888043eba000, ffff888043eba800) The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x43eb8 head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0x4fff00000000040(head|node=1|zone=1|lastcpupid=0x7ff) page_type: f5(slab) raw: 04fff00000000040 ffff88801ac42000 dead000000000122 0000000000000000 raw: 0000000000000000 0000000000080008 00000001f5000000 0000000000000000 head: 04fff00000000040 ffff88801ac42000 dead000000000122 0000000000000000 head: 0000000000000000 0000000000080008 00000001f5000000 0000000000000000 head: 04fff00000000003 ffffea00010fae01 ffffffffffffffff 0000000000000000 head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 5339, tgid 5338 (syz.0.0), ts 69674195892, free_ts 69663220888 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1556 prep_new_page mm/page_alloc.c:1564 [inline] get_page_from_freelist+0x3649/0x3790 mm/page_alloc.c:3474 __alloc_pages_noprof+0x292/0x710 mm/page_alloc.c:4751 alloc_pages_mpol_noprof+0x3e8/0x680 mm/mempolicy.c:2265 alloc_slab_page+0x6a/0x140 mm/slub.c:2408 allocate_slab+0x5a/0x2f0 mm/slub.c:2574 new_slab mm/slub.c:2627 [inline] ___slab_alloc+0xcd1/0x14b0 mm/slub.c:3815 __slab_alloc+0x58/0xa0 mm/slub.c:3905 __slab_alloc_node mm/slub.c:3980 [inline] slab_alloc_node mm/slub.c:4141 [inline] __do_kmalloc_node mm/slub.c:4282 [inline] __kmalloc_noprof+0x2e6/0x4c0 mm/slub.c:4295 kmalloc_noprof include/linux/slab.h:905 [inline] sk_prot_alloc+0xe0/0x210 net/core/sock.c:2165 sk_alloc+0x38/0x370 net/core/sock.c:2218 __netlink_create+0x65/0x260 net/netlink/af_netlink.c:629 __netlink_kernel_create+0x174/0x6f0 net/netlink/af_netlink.c:2015 netlink_kernel_create include/linux/netlink.h:62 [inline] uevent_net_init+0xed/0x2d0 lib/kobject_uevent.c:783 ops_init+0x31e/0x590 net/core/net_namespace.c:138 setup_net+0x287/0x9e0 net/core/net_namespace.c:362 page last free pid 1032 tgid 1032 stack trace: reset_page_owner include/linux/page_owner.h:25 [inline] free_pages_prepare mm/page_alloc.c:1127 [inline] free_unref_page+0xdf9/0x1140 mm/page_alloc.c:2657 __slab_free+0x31b/0x3d0 mm/slub.c:4509 qlink_free mm/kasan/quarantine.c:163 [inline] qlist_free_all+0x9a/0x140 mm/kasan/quarantine.c:179 kasan_quarantine_reduce+0x14f/0x170 mm/kasan/quarantine.c:286 __kasan_slab_alloc+0x23/0x80 mm/kasan/common.c:329 kasan_slab_alloc include/linux/kasan.h:250 [inline] slab_post_alloc_hook mm/slub.c:4104 [inline] slab_alloc_node mm/slub.c:4153 [inline] kmem_cache_alloc_node_noprof+0x1d9/0x380 mm/slub.c:4205 __alloc_skb+0x1c3/0x440 net/core/skbuff.c:668 alloc_skb include/linux/skbuff.h:1323 [inline] alloc_skb_with_frags+0xc3/0x820 net/core/skbuff.c:6612 sock_alloc_send_pskb+0x91a/0xa60 net/core/sock.c:2881 sock_alloc_send_skb include/net/sock.h:1797 [inline] mld_newpack+0x1c3/0xaf0 net/ipv6/mcast.c:1747 add_grhead net/ipv6/mcast.c:1850 [inline] add_grec+0x1492/0x19a0 net/ipv6/mcast.c:1988 mld_send_initial_cr+0x228/0x4b0 net/ipv6/mcast.c:2234 ipv6_mc_dad_complete+0x88/0x490 net/ipv6/mcast.c:2245 addrconf_dad_completed+0x712/0xcd0 net/ipv6/addrconf.c:4342 addrconf_dad_work+0xdc2/0x16f0 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa63/0x1850 kernel/workqueue.c:3310 Memory state around the buggy address: ffff888043eba080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888043eba100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888043eba180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888043eba200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888043eba280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 8c55facecd7a ("net: linkwatch: only report IF_OPER_LOWERLAYERDOWN if iflink is actually down") Reported-by: syzbot+1939f24bdb783e9e43d9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/674f3a18.050a0220.48a03.0041.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241203170933.2449307-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/core/link_watch.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/core/link_watch.c b/net/core/link_watch.c index ab150641142aa..1b4d39e380842 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -45,9 +45,14 @@ static unsigned int default_operstate(const struct net_device *dev) int iflink = dev_get_iflink(dev); struct net_device *peer; - if (iflink == dev->ifindex) + /* If called from netdev_run_todo()/linkwatch_sync_dev(), + * dev_net(dev) can be already freed, and RTNL is not held. + */ + if (dev->reg_state == NETREG_UNREGISTERED || + iflink == dev->ifindex) return IF_OPER_DOWN; + ASSERT_RTNL(); peer = __dev_get_by_index(dev_net(dev), iflink); if (!peer) return IF_OPER_DOWN; From 31f1b55d5d7e531cd827419e5d71c19f24de161c Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Tue, 3 Dec 2024 21:48:20 -0800 Subject: [PATCH 296/366] net :mana :Request a V2 response version for MANA_QUERY_GF_STAT The current requested response version(V1) for MANA_QUERY_GF_STAT query results in STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR value being set to 0 always. In order to get the correct value for this counter we request the response version to be V2. Cc: stable@vger.kernel.org Fixes: e1df5202e879 ("net :mana :Add remaining GDMA stats for MANA to ethtool") Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Link: https://patch.msgid.link/1733291300-12593-1-git-send-email-shradhagupta@linux.microsoft.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 57ac732e77074..f73848a4feb3d 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2536,6 +2536,7 @@ void mana_query_gf_stats(struct mana_port_context *apc) mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_GF_STAT, sizeof(req), sizeof(resp)); + req.hdr.resp.msg_version = GDMA_MESSAGE_V2; req.req_stats = STATISTICS_FLAGS_RX_DISCARDS_NO_WQE | STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED | STATISTICS_FLAGS_HC_RX_BYTES | From c2b46ae022704a2d845e59461fa24431ad627022 Mon Sep 17 00:00:00 2001 From: Qinxin Xia Date: Thu, 5 Dec 2024 09:33:31 +0800 Subject: [PATCH 297/366] ACPI/IORT: Add PMCG platform information for HiSilicon HIP09A HiSilicon HIP09A platforms using the same SMMU PMCG with HIP09 and thus suffers the same erratum. List them in the PMCG platform information list without introducing a new SMMU PMCG Model. Update the silicon-errata.rst as well. Reviewed-by: Yicong Yang Acked-by: Hanjun Guo Signed-off-by: Qinxin Xia Link: https://lore.kernel.org/r/20241205013331.1484017-1-xiaqinxin@huawei.com Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/silicon-errata.rst | 5 +++-- drivers/acpi/arm64/iort.c | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 77db10e944f03..b42fea07c5cec 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -255,8 +255,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A | +----------------+-----------------+-----------------+-----------------------------+ -| Hisilicon | Hip{08,09,10,10C| #162001900 | N/A | -| | ,11} SMMU PMCG | | | +| Hisilicon | Hip{08,09,09A,10| #162001900 | N/A | +| | ,10C,11} | | | +| | SMMU PMCG | | | +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip09 | #162100801 | HISILICON_ERRATUM_162100801 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 1f7e4c691d9ee..98759d6199d3a 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1716,6 +1716,8 @@ static struct acpi_platform_list pmcg_plat_info[] __initdata = { /* HiSilicon Hip09 Platform */ {"HISI ", "HIP09 ", 0, ACPI_SIG_IORT, greater_than_or_equal, "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, + {"HISI ", "HIP09A ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, /* HiSilicon Hip10/11 Platform uses the same SMMU IP with Hip09 */ {"HISI ", "HIP10 ", 0, ACPI_SIG_IORT, greater_than_or_equal, "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, From c0900d15d31c2597dd9f634c8be2b71762199890 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 3 Dec 2024 15:19:41 +0000 Subject: [PATCH 298/366] arm64: Ensure bits ASID[15:8] are masked out when the kernel uses 8-bit ASIDs Linux currently sets the TCR_EL1.AS bit unconditionally during CPU bring-up. On an 8-bit ASID CPU, this is RES0 and ignored, otherwise 16-bit ASIDs are enabled. However, if running in a VM and the hypervisor reports 8-bit ASIDs (ID_AA64MMFR0_EL1.ASIDBits == 0) on a 16-bit ASIDs CPU, Linux uses bits 8 to 63 as a generation number for tracking old process ASIDs. The bottom 8 bits of this generation end up being written to TTBR1_EL1 and also used for the ASID-based TLBI operations as the upper 8 bits of the ASID. Following an ASID roll-over event we can have threads of the same application with the same 8-bit ASID but different generation numbers running on separate CPUs. Both TLB caching and the TLBI operations will end up using different actual 16-bit ASIDs for the same process. A similar scenario can happen in a big.LITTLE configuration if the boot CPU only uses 8-bit ASIDs while secondary CPUs have 16-bit ASIDs. Ensure that the ASID generation is only tracked by bits 16 and up, leaving bits 15:8 as 0 if the kernel uses 8-bit ASIDs. Note that clearing TCR_EL1.AS is not sufficient since the architecture requires that the top 8 bits of the ASID passed to TLBI instructions are 0 rather than ignored in such configuration. Cc: stable@vger.kernel.org Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Cc: James Morse Acked-by: Mark Rutland Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20241203151941.353796-1-catalin.marinas@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 188197590fc9c..b2ac062463273 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -32,9 +32,9 @@ static unsigned long nr_pinned_asids; static unsigned long *pinned_asid_map; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) -#define ASID_FIRST_VERSION (1UL << asid_bits) +#define ASID_FIRST_VERSION (1UL << 16) -#define NUM_USER_ASIDS ASID_FIRST_VERSION +#define NUM_USER_ASIDS (1UL << asid_bits) #define ctxid2asid(asid) ((asid) & ~ASID_MASK) #define asid2ctxid(asid, genid) ((asid) | (genid)) From cf3b16dae4cab9dfa2436d76ce010ad4dd55b53a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 4 Dec 2024 17:50:04 +0000 Subject: [PATCH 299/366] arm64: mte: Fix copy_highpage() warning on hugetlb folios Commit 25c17c4b55de ("hugetlb: arm64: add mte support") improved the copy_highpage() function to update the tags in the destination hugetlb folio. However, when the source folio isn't tagged, the code takes the non-hugetlb path where try_page_mte_tagging() warns as the destination is a hugetlb folio: WARNING: CPU: 0 PID: 363 at arch/arm64/include/asm/mte.h:58 copy_highpage+0x1d4/0x2d8 [...] pc : copy_highpage+0x1d4/0x2d8 lr : copy_highpage+0x78/0x2d8 [...] Call trace: copy_highpage+0x1d4/0x2d8 (P) copy_highpage+0x78/0x2d8 (L) copy_user_highpage+0x20/0x48 copy_user_large_folio+0x1bc/0x268 hugetlb_wp+0x190/0x860 hugetlb_fault+0xa28/0xc10 handle_mm_fault+0x2a0/0x2c0 do_page_fault+0x12c/0x578 do_mem_abort+0x4c/0xa8 el0_da+0x44/0xb0 el0t_64_sync_handler+0xc4/0x138 el0t_64_sync+0x198/0x1a0 Change the check for the tagged status of the source folio so that it does not fall through the non-hugetlb case. In addition, only perform the copy (for the full folio) if the source page is the folio head and warn if the destination folio is already tagged, for symmetry with the non-hugetlb case. Fixes: 25c17c4b55de ("hugetlb: arm64: add mte support") Reported-by: Sasha Levin Cc: Yang Shi Cc: David Hildenbrand Cc: Will Deacon Link: https://lore.kernel.org/r/Z0STR6VLt2MCalnY@sashalap Link: https://lore.kernel.org/r/20241204175004.906754-1-catalin.marinas@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/copypage.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 87b3f1a255356..a86c897017df0 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -30,11 +30,13 @@ void copy_highpage(struct page *to, struct page *from) if (!system_supports_mte()) return; - if (folio_test_hugetlb(src) && - folio_test_hugetlb_mte_tagged(src)) { - if (!folio_try_hugetlb_mte_tagging(dst)) + if (folio_test_hugetlb(src)) { + if (!folio_test_hugetlb_mte_tagged(src) || + from != folio_page(src, 0)) return; + WARN_ON_ONCE(!folio_try_hugetlb_mte_tagging(dst)); + /* * Populate tags for all subpages. * From b29bf7119d6bbfd04aabb8d82b060fe2a33ef890 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 3 Dec 2024 12:27:15 +0100 Subject: [PATCH 300/366] jffs2: Fix rtime decompressor The fix for a memory corruption contained a off-by-one error and caused the compressor to fail in legit cases. Cc: Kinsey Moore Cc: stable@vger.kernel.org Fixes: fe051552f5078 ("jffs2: Prevent rtime decompress memory corruption") Signed-off-by: Richard Weinberger --- fs/jffs2/compr_rtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c index 2b9ef713b844a..3bd9d2f3bece2 100644 --- a/fs/jffs2/compr_rtime.c +++ b/fs/jffs2/compr_rtime.c @@ -95,7 +95,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in, positions[value]=outpos; if (repeat) { - if ((outpos + repeat) >= destlen) { + if ((outpos + repeat) > destlen) { return 1; } if (backoffs + repeat >= outpos) { From 4c6ac5446d060f0bf435ccc8bc3aa7b7b5f718ad Mon Sep 17 00:00:00 2001 From: Purushothama Siddaiah Date: Thu, 5 Dec 2024 12:34:26 +0530 Subject: [PATCH 301/366] spi: omap2-mcspi: Fix the IS_ERR() bug for devm_clk_get_optional_enabled() The devm_clk_get_optional_enabled() function returns error pointers(PTR_ERR()). So use IS_ERR() to check it. Verified on K3-J7200 EVM board, without clock node mentioned in the device tree. Signed-off-by: Purushothama Siddaiah Reviewed-by: Corey Minyard Link: https://patch.msgid.link/20241205070426.1861048-1-psiddaiah@mvista.com Signed-off-by: Mark Brown --- drivers/spi/spi-omap2-mcspi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index e2400a067a956..add6247d34819 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -1561,10 +1561,10 @@ static int omap2_mcspi_probe(struct platform_device *pdev) } mcspi->ref_clk = devm_clk_get_optional_enabled(&pdev->dev, NULL); - if (mcspi->ref_clk) - mcspi->ref_clk_hz = clk_get_rate(mcspi->ref_clk); - else + if (IS_ERR(mcspi->ref_clk)) mcspi->ref_clk_hz = OMAP2_MCSPI_MAX_FREQ; + else + mcspi->ref_clk_hz = clk_get_rate(mcspi->ref_clk); ctlr->max_speed_hz = mcspi->ref_clk_hz; ctlr->min_speed_hz = mcspi->ref_clk_hz >> 15; From d0ceea662d459726487030237689835fcc0483e5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 4 Dec 2024 11:27:14 +0000 Subject: [PATCH 302/366] x86/mm: Add _PAGE_NOPTISHADOW bit to avoid updating userspace page tables The set_p4d() and set_pgd() functions (in 4-level or 5-level page table setups respectively) assume that the root page table is actually a 8KiB allocation, with the userspace root immediately after the kernel root page table (so that the former can enforce NX on on all the subordinate page tables, which are actually shared). However, users of the kernel_ident_mapping_init() code do not give it an 8KiB allocation for its PGD. Both swsusp_arch_resume() and acpi_mp_setup_reset() allocate only a single 4KiB page. The kexec code on x86_64 currently gets away with it purely by chance, because it allocates 8KiB for its "control code page" and then actually uses the first half for the PGD, then copies the actual trampoline code into the second half only after the identmap code has finished scribbling over it. Fix this by defining a _PAGE_NOPTISHADOW bit (which can use the same bit as _PAGE_SAVED_DIRTY since one is only for the PGD/P4D root and the other is exclusively for leaf PTEs.). This instructs __pti_set_user_pgtbl() not to write to the userspace 'shadow' PGD. Strictly, the _PAGE_NOPTISHADOW bit doesn't need to be written out to the actual page tables; since __pti_set_user_pgtbl() returns the value to be written to the kernel page table, it could be filtered out. But there seems to be no benefit to actually doing so. Suggested-by: Dave Hansen Signed-off-by: David Woodhouse Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/412c90a4df7aef077141d9f68d19cbe5602d6c6d.camel@infradead.org Cc: stable@kernel.org Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Rik van Riel --- arch/x86/include/asm/pgtable_types.h | 8 ++++++-- arch/x86/mm/ident_map.c | 6 +++--- arch/x86/mm/pti.c | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 6f82e75b61494..4b804531b03c3 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -36,10 +36,12 @@ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 #ifdef CONFIG_X86_64 -#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */ +#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */ +#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */ #else /* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */ -#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */ +#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */ +#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */ #endif /* If _PAGE_BIT_PRESENT is clear, we use these: */ @@ -139,6 +141,8 @@ #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) +#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW) + /* * Set of bits not changed in pte_modify. The pte's * protection key is treated like _PAGE_RW, for diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 437e96fb49773..5ab7bd2f1983c 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -174,7 +174,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, if (result) return result; - set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag)); + set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW)); } return 0; @@ -218,14 +218,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, if (result) return result; if (pgtable_l5_enabled()) { - set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); + set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag | _PAGE_NOPTISHADOW)); } else { /* * With p4d folded, pgd is equal to p4d. * The pgd entry has to point to the pud page table in this case. */ pud_t *pud = pud_offset(p4d, 0); - set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag)); + set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW)); } } diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 851ec8f1363a8..5f0d579932c68 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -132,7 +132,7 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) * Top-level entries added to init_mm's usermode pgd after boot * will not be automatically propagated to other mms. */ - if (!pgdp_maps_userspace(pgdp)) + if (!pgdp_maps_userspace(pgdp) || (pgd.pgd & _PAGE_NOPTISHADOW)) return pgd; /* From ec16a3cdf37e507013062f9c4a2067eacdd12b62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Tue, 3 Dec 2024 16:20:58 -0300 Subject: [PATCH 303/366] ASoC: mediatek: mt8188-mt6359: Remove hardcoded dmic codec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove hardcoded dmic codec from the UL_SRC dai link to avoid requiring a dmic codec to be present for the driver to probe, as not every MT8188-based platform might need a dmic codec. The codec can be assigned to the dai link through the dai-link property in Devicetree on the platforms where it is needed. No Devicetree currently relies on it so it is safe to remove without worrying about backward compatibility. Fixes: 9f08dcbddeb3 ("ASoC: mediatek: mt8188-mt6359: support new board with nau88255") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20241203-mt8188-6359-unhardcode-dmic-v1-1-346e3e5cbe6d@collabora.com Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8188/mt8188-mt6359.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/soc/mediatek/mt8188/mt8188-mt6359.c b/sound/soc/mediatek/mt8188/mt8188-mt6359.c index e04b88a57535c..2d0d04e0232da 100644 --- a/sound/soc/mediatek/mt8188/mt8188-mt6359.c +++ b/sound/soc/mediatek/mt8188/mt8188-mt6359.c @@ -188,9 +188,7 @@ SND_SOC_DAILINK_DEFS(pcm1, SND_SOC_DAILINK_DEFS(ul_src, DAILINK_COMP_ARRAY(COMP_CPU("UL_SRC")), DAILINK_COMP_ARRAY(COMP_CODEC("mt6359-sound", - "mt6359-snd-codec-aif1"), - COMP_CODEC("dmic-codec", - "dmic-hifi")), + "mt6359-snd-codec-aif1")), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(AFE_SOF_DL2, From 20c3b3e5f2641eff3d85f33e6a468ac052b169bd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 5 Dec 2024 10:28:33 +0000 Subject: [PATCH 304/366] ALSA: hda/realtek: Fix spelling mistake "Firelfy" -> "Firefly" There is a spelling mistake in a literal string in the alc269_fixup_tbl quirk table. Fix it. Fixes: 0d08f0eec961 ("ALSA: hda/realtek: fix micmute LEDs don't work on HP Laptops") Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20241205102833.476190-1-colin.i.king@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 37e1141e201e5..4e58c6810f221 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10527,7 +10527,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8d91, "HP ZBook Firefly 14 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d92, "HP ZBook Firefly 16 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e18, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8e19, "HP ZBook Firelfy 14 G12A", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8e19, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), From 73da582a476ea6e3512f89f8ed57dfed945829a2 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 2 Dec 2024 14:58:45 +0000 Subject: [PATCH 305/366] x86/cpu/topology: Remove limit of CPUs due to disabled IO/APIC The rework of possible CPUs management erroneously disabled SMP when the IO/APIC is disabled either by the 'noapic' command line parameter or during IO/APIC setup. SMP is possible without IO/APIC. Remove the ioapic_is_disabled conditions from the relevant possible CPU management code paths to restore the orgininal behaviour. Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241202145905.1482-1-ffmancera@riseup.net --- arch/x86/kernel/cpu/topology.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 621a151ccf7d0..b2e313ea17bf6 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -428,8 +428,8 @@ void __init topology_apply_cmdline_limits_early(void) { unsigned int possible = nr_cpu_ids; - /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */ - if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled) + /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' */ + if (!setup_max_cpus || apic_is_disabled) possible = 1; /* 'possible_cpus=N' */ @@ -443,7 +443,7 @@ void __init topology_apply_cmdline_limits_early(void) static __init bool restrict_to_up(void) { - if (!smp_found_config || ioapic_is_disabled) + if (!smp_found_config) return true; /* * XEN PV is special as it does not advertise the local APIC From a6fa67d26de385c3c7a23c1e109a0e23bfda4ec7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 Dec 2024 18:02:17 +0200 Subject: [PATCH 306/366] drm/dp_mst: Fix resetting msg rx state after topology removal If the MST topology is removed during the reception of an MST down reply or MST up request sideband message, the drm_dp_mst_topology_mgr::up_req_recv/down_rep_recv states could be reset from one thread via drm_dp_mst_topology_mgr_set_mst(false), racing with the reading/parsing of the message from another thread via drm_dp_mst_handle_down_rep() or drm_dp_mst_handle_up_req(). The race is possible since the reader/parser doesn't hold any lock while accessing the reception state. This in turn can lead to a memory corruption in the reader/parser as described by commit bd2fccac61b4 ("drm/dp_mst: Fix MST sideband message body length check"). Fix the above by resetting the message reception state if needed before reading/parsing a message. Another solution would be to hold the drm_dp_mst_topology_mgr::lock for the whole duration of the message reception/parsing in drm_dp_mst_handle_down_rep() and drm_dp_mst_handle_up_req(), however this would require a bigger change. Since the fix is also needed for stable, opting for the simpler solution in this patch. Cc: Lyude Paul Cc: Fixes: 1d082618bbf3 ("drm/display/dp_mst: Fix down/up message handling after sink disconnect") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13056 Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241203160223.2926014-2-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 21 +++++++++++++++++-- include/drm/display/drm_dp_mst_helper.h | 7 +++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index e6ee180815b20..1475aa95ab6b2 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3700,8 +3700,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms ret = 0; mgr->payload_id_table_cleared = false; - memset(&mgr->down_rep_recv, 0, sizeof(mgr->down_rep_recv)); - memset(&mgr->up_req_recv, 0, sizeof(mgr->up_req_recv)); + mgr->reset_rx_state = true; } out_unlock: @@ -3859,6 +3858,11 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, } EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume); +static void reset_msg_rx_state(struct drm_dp_sideband_msg_rx *msg) +{ + memset(msg, 0, sizeof(*msg)); +} + static bool drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up, struct drm_dp_mst_branch **mstb) @@ -4141,6 +4145,17 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) return 0; } +static void update_msg_rx_state(struct drm_dp_mst_topology_mgr *mgr) +{ + mutex_lock(&mgr->lock); + if (mgr->reset_rx_state) { + mgr->reset_rx_state = false; + reset_msg_rx_state(&mgr->down_rep_recv); + reset_msg_rx_state(&mgr->up_req_recv); + } + mutex_unlock(&mgr->lock); +} + /** * drm_dp_mst_hpd_irq_handle_event() - MST hotplug IRQ handle MST event * @mgr: manager to notify irq for. @@ -4175,6 +4190,8 @@ int drm_dp_mst_hpd_irq_handle_event(struct drm_dp_mst_topology_mgr *mgr, const u *handled = true; } + update_msg_rx_state(mgr); + if (esi[1] & DP_DOWN_REP_MSG_RDY) { ret = drm_dp_mst_handle_down_rep(mgr); *handled = true; diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index f6a1cbb0f600f..a80ba457a858f 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -699,6 +699,13 @@ struct drm_dp_mst_topology_mgr { */ bool payload_id_table_cleared : 1; + /** + * @reset_rx_state: The down request's reply and up request message + * receiver state must be reset, after the topology manager got + * removed. Protected by @lock. + */ + bool reset_rx_state : 1; + /** * @payload_count: The number of currently active payloads in hardware. This value is only * intended to be used internally by MST helpers for payload tracking, and is only safe to From 4d49e77a973d3b5d1881663c3f122906a0702940 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 Dec 2024 18:02:18 +0200 Subject: [PATCH 307/366] drm/dp_mst: Verify request type in the corresponding down message reply After receiving the response for an MST down request message, the response should be accepted/parsed only if the response type matches that of the request. Ensure this by checking if the request type code stored both in the request and the reply match, dropping the reply in case of a mismatch. This fixes the topology detection for an MST hub, as described in the Closes link below, where the hub sends an incorrect reply message after a CLEAR_PAYLOAD_TABLE -> LINK_ADDRESS down request message sequence. Cc: Lyude Paul Cc: Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12804 Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241203160223.2926014-3-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 1475aa95ab6b2..bcf3a33123be1 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3941,6 +3941,34 @@ drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up, return true; } +static int get_msg_request_type(u8 data) +{ + return data & 0x7f; +} + +static bool verify_rx_request_type(struct drm_dp_mst_topology_mgr *mgr, + const struct drm_dp_sideband_msg_tx *txmsg, + const struct drm_dp_sideband_msg_rx *rxmsg) +{ + const struct drm_dp_sideband_msg_hdr *hdr = &rxmsg->initial_hdr; + const struct drm_dp_mst_branch *mstb = txmsg->dst; + int tx_req_type = get_msg_request_type(txmsg->msg[0]); + int rx_req_type = get_msg_request_type(rxmsg->msg[0]); + char rad_str[64]; + + if (tx_req_type == rx_req_type) + return true; + + drm_dp_mst_rad_to_str(mstb->rad, mstb->lct, rad_str, sizeof(rad_str)); + drm_dbg_kms(mgr->dev, + "Got unexpected MST reply, mstb: %p seqno: %d lct: %d rad: %s rx_req_type: %s (%02x) != tx_req_type: %s (%02x)\n", + mstb, hdr->seqno, mstb->lct, rad_str, + drm_dp_mst_req_type_str(rx_req_type), rx_req_type, + drm_dp_mst_req_type_str(tx_req_type), tx_req_type); + + return false; +} + static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) { struct drm_dp_sideband_msg_tx *txmsg; @@ -3970,6 +3998,9 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) goto out_clear_reply; } + if (!verify_rx_request_type(mgr, txmsg, msg)) + goto out_clear_reply; + drm_dp_sideband_parse_reply(mgr, msg, &txmsg->reply); if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK) { From b559b68d2761739b7c2e44d6fa59092b0d03e9ed Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 Dec 2024 18:02:19 +0200 Subject: [PATCH 308/366] drm/dp_mst: Simplify error path in drm_dp_mst_handle_down_rep() Simplify the error return path in drm_dp_mst_handle_down_rep(), preparing for the next patch. While at it use reset_msg_rx_state() instead of open-coding it. Cc: Lyude Paul Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241203160223.2926014-4-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index bcf3a33123be1..6ec8680998d5a 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -4013,9 +4013,6 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) txmsg->reply.u.nak.nak_data); } - memset(msg, 0, sizeof(struct drm_dp_sideband_msg_rx)); - drm_dp_mst_topology_put_mstb(mstb); - mutex_lock(&mgr->qlock); txmsg->state = DRM_DP_SIDEBAND_TX_RX; list_del(&txmsg->next); @@ -4023,10 +4020,8 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) wake_up_all(&mgr->tx_waitq); - return 0; - out_clear_reply: - memset(msg, 0, sizeof(struct drm_dp_sideband_msg_rx)); + reset_msg_rx_state(msg); out: if (mstb) drm_dp_mst_topology_put_mstb(mstb); From 3f611855031f94385c2eeb32b1f99dd7a9fa566b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 Dec 2024 19:46:32 +0200 Subject: [PATCH 309/366] drm/dp_mst: Fix down request message timeout handling If receiving a reply for an MST down request message times out, the thread receiving the reply in drm_dp_mst_handle_down_rep() could try to dereference the drm_dp_sideband_msg_tx txmsg request message after the thread waiting for the reply - calling drm_dp_mst_wait_tx_reply() - has timed out and freed txmsg, hence leading to a use-after-free in drm_dp_mst_handle_down_rep(). Prevent the above by holding the drm_dp_mst_topology_mgr::qlock in drm_dp_mst_handle_down_rep() for the whole duration txmsg is looked up from the request list and dereferenced. v2: Fix unlocking mgr->qlock after verify_rx_request_type() fails. Cc: Lyude Paul Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241203174632.2941402-1-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 6ec8680998d5a..ab21855d5c0f7 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3984,9 +3984,9 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) /* find the message */ mutex_lock(&mgr->qlock); + txmsg = list_first_entry_or_null(&mgr->tx_msg_downq, struct drm_dp_sideband_msg_tx, next); - mutex_unlock(&mgr->qlock); /* Were we actually expecting a response, and from this mstb? */ if (!txmsg || txmsg->dst != mstb) { @@ -3995,11 +3995,17 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) hdr = &msg->initial_hdr; drm_dbg_kms(mgr->dev, "Got MST reply with no msg %p %d %d %02x %02x\n", mstb, hdr->seqno, hdr->lct, hdr->rad[0], msg->msg[0]); + + mutex_unlock(&mgr->qlock); + goto out_clear_reply; } - if (!verify_rx_request_type(mgr, txmsg, msg)) + if (!verify_rx_request_type(mgr, txmsg, msg)) { + mutex_unlock(&mgr->qlock); + goto out_clear_reply; + } drm_dp_sideband_parse_reply(mgr, msg, &txmsg->reply); @@ -4013,9 +4019,9 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) txmsg->reply.u.nak.nak_data); } - mutex_lock(&mgr->qlock); txmsg->state = DRM_DP_SIDEBAND_TX_RX; list_del(&txmsg->next); + mutex_unlock(&mgr->qlock); wake_up_all(&mgr->tx_waitq); From e54b00086f7473dbda1a7d6fc47720ced157c6a8 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 4 Dec 2024 15:20:07 +0200 Subject: [PATCH 310/366] drm/dp_mst: Ensure mst_primary pointer is valid in drm_dp_mst_handle_up_req() While receiving an MST up request message from one thread in drm_dp_mst_handle_up_req(), the MST topology could be removed from another thread via drm_dp_mst_topology_mgr_set_mst(false), freeing mst_primary and setting drm_dp_mst_topology_mgr::mst_primary to NULL. This could lead to a NULL deref/use-after-free of mst_primary in drm_dp_mst_handle_up_req(). Avoid the above by holding a reference for mst_primary in drm_dp_mst_handle_up_req() while it's used. v2: Fix kfreeing the request if getting an mst_primary reference fails. Cc: Lyude Paul Reviewed-by: Lyude Paul (v1) Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241204132007.3132494-1-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index ab21855d5c0f7..1d1b5f37b0076 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -4109,9 +4109,10 @@ static void drm_dp_mst_up_req_work(struct work_struct *work) static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) { struct drm_dp_pending_up_req *up_req; + struct drm_dp_mst_branch *mst_primary; if (!drm_dp_get_one_sb_msg(mgr, true, NULL)) - goto out; + goto out_clear_reply; if (!mgr->up_req_recv.have_eomt) return 0; @@ -4129,10 +4130,19 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) drm_dbg_kms(mgr->dev, "Received unknown up req type, ignoring: %x\n", up_req->msg.req_type); kfree(up_req); - goto out; + goto out_clear_reply; + } + + mutex_lock(&mgr->lock); + mst_primary = mgr->mst_primary; + if (!mst_primary || !drm_dp_mst_topology_try_get_mstb(mst_primary)) { + mutex_unlock(&mgr->lock); + kfree(up_req); + goto out_clear_reply; } + mutex_unlock(&mgr->lock); - drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, up_req->msg.req_type, + drm_dp_send_up_ack_reply(mgr, mst_primary, up_req->msg.req_type, false); if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { @@ -4149,13 +4159,13 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) conn_stat->peer_device_type); mutex_lock(&mgr->probe_lock); - handle_csn = mgr->mst_primary->link_address_sent; + handle_csn = mst_primary->link_address_sent; mutex_unlock(&mgr->probe_lock); if (!handle_csn) { drm_dbg_kms(mgr->dev, "Got CSN before finish topology probing. Skip it."); kfree(up_req); - goto out; + goto out_put_primary; } } else if (up_req->msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { const struct drm_dp_resource_status_notify *res_stat = @@ -4172,7 +4182,9 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) mutex_unlock(&mgr->up_req_lock); queue_work(system_long_wq, &mgr->up_req_work); -out: +out_put_primary: + drm_dp_mst_topology_put_mstb(mst_primary); +out_clear_reply: memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); return 0; } From 2b245c97b1af5d8f04c359e0826cb5a5c81ef704 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 Dec 2024 18:02:22 +0200 Subject: [PATCH 311/366] drm/dp_mst: Reset message rx state after OOM in drm_dp_mst_handle_up_req() After an out-of-memory error the reception state should be reset, so that the next attempt receiving a message doesn't fail (due to getting a start-of-message packet, while the reception state has already the start-of-message flag set). Cc: Lyude Paul Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241203160223.2926014-7-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 1d1b5f37b0076..e5713e3be80e3 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -4110,6 +4110,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) { struct drm_dp_pending_up_req *up_req; struct drm_dp_mst_branch *mst_primary; + int ret = 0; if (!drm_dp_get_one_sb_msg(mgr, true, NULL)) goto out_clear_reply; @@ -4118,8 +4119,10 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) return 0; up_req = kzalloc(sizeof(*up_req), GFP_KERNEL); - if (!up_req) - return -ENOMEM; + if (!up_req) { + ret = -ENOMEM; + goto out_clear_reply; + } INIT_LIST_HEAD(&up_req->next); @@ -4186,7 +4189,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) drm_dp_mst_topology_put_mstb(mst_primary); out_clear_reply: memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); - return 0; + return ret; } static void update_msg_rx_state(struct drm_dp_mst_topology_mgr *mgr) From dc1b157b828dfe412c776ac1dd8db158f6016b39 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Dec 2024 10:04:14 -0500 Subject: [PATCH 312/366] tracing: Fix archs that still call tracepoints without RCU watching Tracepoints require having RCU "watching" as it uses RCU to do updates to the tracepoints. There are some cases that would call a tracepoint when RCU was not "watching". This was usually in the idle path where RCU has "shutdown". For the few locations that had tracepoints without RCU watching, there was an trace_*_rcuidle() variant that could be used. This used SRCU for protection. There are tracepoints that trace when interrupts and preemption are enabled and disabled. In some architectures, these tracepoints are called in a path where RCU is not watching. When x86 and arm64 removed these locations, it was incorrectly assumed that it would be safe to remove the trace_*_rcuidle() variant and also remove the SRCU logic, as it made the code more complex and harder to implement new tracepoint features (like faultable tracepoints and tracepoints in rust). Instead of bringing back the trace_*_rcuidle(), as it will not be trivial to do as new code has already been added depending on its removal, add a workaround to the one file that still requires it (trace_preemptirq.c). If the architecture does not define CONFIG_ARCH_WANTS_NO_INSTR, then check if the code is in the idle path, and if so, call ct_irq_enter/exit() which will enable RCU around the tracepoint. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Mark Rutland Link: https://lore.kernel.org/20241204100414.4d3e06d0@gandalf.local.home Reported-by: Geert Uytterhoeven Fixes: 48bcda684823 ("tracing: Remove definition of trace_*_rcuidle()") Closes: https://lore.kernel.org/all/bddb02de-957a-4df5-8e77-829f55728ea2@roeck-us.net/ Acked-by: Paul E. McKenney Tested-by: Guenter Roeck Tested-by: Geert Uytterhoeven Tested-by: Madhavan Srinivasan Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_preemptirq.c | 43 ++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index 5c03633316a66..0c42b15c38004 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -10,11 +10,42 @@ #include #include #include +#include #include "trace.h" #define CREATE_TRACE_POINTS #include +/* + * Use regular trace points on architectures that implement noinstr + * tooling: these calls will only happen with RCU enabled, which can + * use a regular tracepoint. + * + * On older architectures, RCU may not be watching in idle. In that + * case, wake up RCU to watch while calling the tracepoint. These + * aren't NMI-safe - so exclude NMI contexts: + */ +#ifdef CONFIG_ARCH_WANTS_NO_INSTR +#define trace(point, args) trace_##point(args) +#else +#define trace(point, args) \ + do { \ + if (trace_##point##_enabled()) { \ + bool exit_rcu = false; \ + if (in_nmi()) \ + break; \ + if (!IS_ENABLED(CONFIG_TINY_RCU) && \ + is_idle_task(current)) { \ + ct_irq_enter(); \ + exit_rcu = true; \ + } \ + trace_##point(args); \ + if (exit_rcu) \ + ct_irq_exit(); \ + } \ + } while (0) +#endif + #ifdef CONFIG_TRACE_IRQFLAGS /* Per-cpu variable to prevent redundant calls when IRQs already off */ static DEFINE_PER_CPU(int, tracing_irq_cpu); @@ -28,7 +59,7 @@ static DEFINE_PER_CPU(int, tracing_irq_cpu); void trace_hardirqs_on_prepare(void) { if (this_cpu_read(tracing_irq_cpu)) { - trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1); + trace(irq_enable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1)); tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); this_cpu_write(tracing_irq_cpu, 0); } @@ -39,7 +70,7 @@ NOKPROBE_SYMBOL(trace_hardirqs_on_prepare); void trace_hardirqs_on(void) { if (this_cpu_read(tracing_irq_cpu)) { - trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1); + trace(irq_enable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1)); tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); this_cpu_write(tracing_irq_cpu, 0); } @@ -61,7 +92,7 @@ void trace_hardirqs_off_finish(void) if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); - trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1); + trace(irq_disable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1)); } } @@ -75,7 +106,7 @@ void trace_hardirqs_off(void) if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); - trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1); + trace(irq_disable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1)); } } EXPORT_SYMBOL(trace_hardirqs_off); @@ -86,13 +117,13 @@ NOKPROBE_SYMBOL(trace_hardirqs_off); void trace_preempt_on(unsigned long a0, unsigned long a1) { - trace_preempt_enable(a0, a1); + trace(preempt_enable, TP_ARGS(a0, a1)); tracer_preempt_on(a0, a1); } void trace_preempt_off(unsigned long a0, unsigned long a1) { - trace_preempt_disable(a0, a1); + trace(preempt_disable, TP_ARGS(a0, a1)); tracer_preempt_off(a0, a1); } #endif From 59ca0e1c97c5d752f85ca2922ff258ff5e62bc73 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 Dec 2024 18:02:23 +0200 Subject: [PATCH 313/366] drm/dp_mst: Use reset_msg_rx_state() instead of open coding it Use reset_msg_rx_state() in drm_dp_mst_handle_up_req() instead of open-coding it. Cc: Lyude Paul Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241203160223.2926014-8-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index e5713e3be80e3..dc4446d589e77 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -4188,7 +4188,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) out_put_primary: drm_dp_mst_topology_put_mstb(mst_primary); out_clear_reply: - memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); + reset_msg_rx_state(&mgr->up_req_recv); return ret; } From 16d5306629c06ec2653c12794f344168a0eea809 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 5 Dec 2024 14:36:34 +0000 Subject: [PATCH 314/366] coco: virt: arm64: Do not enable cca guest driver by default As per the guidelines, new drivers may not be set to default on. An expert user can always select it. Reported-by: Dan Williams Cc: Will Deacon Cc: Steven Price Cc: Sami Mujawar Link: https://lore.kernel.org/r/6750c695194cd_2508129427@dwillia2-xfh.jf.intel.com.notmuch Link: https://lore.kernel.org/r/20241205143634.306114-1-suzuki.poulose@arm.com Signed-off-by: Suzuki K Poulose Reviewed-by: Steven Price Signed-off-by: Catalin Marinas --- drivers/virt/coco/arm-cca-guest/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/virt/coco/arm-cca-guest/Kconfig b/drivers/virt/coco/arm-cca-guest/Kconfig index 9dd27c3ee2152..3f0f013f03f15 100644 --- a/drivers/virt/coco/arm-cca-guest/Kconfig +++ b/drivers/virt/coco/arm-cca-guest/Kconfig @@ -1,7 +1,6 @@ config ARM_CCA_GUEST tristate "Arm CCA Guest driver" depends on ARM64 - default m select TSM_REPORTS help The driver provides userspace interface to request and From 76031d9536a076bf023bedbdb1b4317fc801dd67 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Dec 2024 11:16:30 +0100 Subject: [PATCH 315/366] clocksource: Make negative motion detection more robust Guenter reported boot stalls on a emulated ARM 32-bit platform, which has a 24-bit wide clocksource. It turns out that the calculated maximal idle time, which limits idle sleeps to prevent clocksource wrap arounds, is close to the point where the negative motion detection triggers. max_idle_ns: 597268854 ns negative motion tripping point: 671088640 ns If the idle wakeup is delayed beyond that point, the clocksource advances far enough to trigger the negative motion detection. This prevents the clock to advance and in the worst case the system stalls completely if the consecutive sleeps based on the stale clock are delayed as well. Cure this by calculating a more robust cut-off value for negative motion, which covers 87.5% of the actual clocksource counter width. Compare the delta against this value to catch negative motion. This is specifically for clock sources with a small counter width as their wrap around time is close to the half counter width. For clock sources with wide counters this is not a problem because the maximum idle time is far from the half counter width due to the math overflow protection constraints. For the case at hand this results in a tripping point of 1174405120ns. Note, that this cannot prevent issues when the delay exceeds the 87.5% margin, but that's not different from the previous unchecked version which allowed arbitrary time jumps. Systems with small counter width are prone to invalid results, but this problem is unlikely to be seen on real hardware. If such a system completely stalls for more than half a second, then there are other more urgent problems than the counter wrapping around. Fixes: c163e40af9b2 ("timekeeping: Always check for negative motion") Reported-by: Guenter Roeck Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Link: https://lore.kernel.org/all/8734j5ul4x.ffs@tglx Closes: https://lore.kernel.org/all/387b120b-d68a-45e8-b6ab-768cd95d11c2@roeck-us.net --- include/linux/clocksource.h | 2 ++ kernel/time/clocksource.c | 11 ++++++++++- kernel/time/timekeeping.c | 6 ++++-- kernel/time/timekeeping_internal.h | 8 ++++---- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index ef1b16da6ad52..65b7c41471c39 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -49,6 +49,7 @@ struct module; * @archdata: Optional arch-specific data * @max_cycles: Maximum safe cycle value which won't overflow on * multiplication + * @max_raw_delta: Maximum safe delta value for negative motion detection * @name: Pointer to clocksource name * @list: List head for registration (internal) * @freq_khz: Clocksource frequency in khz. @@ -109,6 +110,7 @@ struct clocksource { struct arch_clocksource_data archdata; #endif u64 max_cycles; + u64 max_raw_delta; const char *name; struct list_head list; u32 freq_khz; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index aab6472853fab..7304d7cf47f2d 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -24,7 +24,7 @@ static void clocksource_enqueue(struct clocksource *cs); static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end) { - u64 delta = clocksource_delta(end, start, cs->mask); + u64 delta = clocksource_delta(end, start, cs->mask, cs->max_raw_delta); if (likely(delta < cs->max_cycles)) return clocksource_cyc2ns(delta, cs->mult, cs->shift); @@ -993,6 +993,15 @@ static inline void clocksource_update_max_deferment(struct clocksource *cs) cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, cs->mask, &cs->max_cycles); + + /* + * Threshold for detecting negative motion in clocksource_delta(). + * + * Allow for 0.875 of the counter width so that overly long idle + * sleeps, which go slightly over mask/2, do not trigger the + * negative motion detection. + */ + cs->max_raw_delta = (cs->mask >> 1) + (cs->mask >> 2) + (cs->mask >> 3); } static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 0ca85ff4fbb4a..3d128825d3437 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -755,7 +755,8 @@ static void timekeeping_forward_now(struct timekeeper *tk) u64 cycle_now, delta; cycle_now = tk_clock_read(&tk->tkr_mono); - delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); + delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, + tk->tkr_mono.clock->max_raw_delta); tk->tkr_mono.cycle_last = cycle_now; tk->tkr_raw.cycle_last = cycle_now; @@ -2230,7 +2231,8 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) return false; offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), - tk->tkr_mono.cycle_last, tk->tkr_mono.mask); + tk->tkr_mono.cycle_last, tk->tkr_mono.mask, + tk->tkr_mono.clock->max_raw_delta); /* Check if there's really nothing to do */ if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index 63e600e943a76..8c9079108ffb2 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -30,15 +30,15 @@ static inline void timekeeping_inc_mg_floor_swaps(void) #endif -static inline u64 clocksource_delta(u64 now, u64 last, u64 mask) +static inline u64 clocksource_delta(u64 now, u64 last, u64 mask, u64 max_delta) { u64 ret = (now - last) & mask; /* - * Prevent time going backwards by checking the MSB of mask in - * the result. If set, return 0. + * Prevent time going backwards by checking the result against + * @max_delta. If greater, return 0. */ - return ret & ~(mask >> 1) ? 0 : ret; + return ret > max_delta ? 0 : ret; } /* Semi public for serialization of non timekeeper VDSO updates. */ From 7678abee0867e6b7fb89aa40f6e9f575f755fb37 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 12 Nov 2024 20:58:21 +0800 Subject: [PATCH 316/366] virtio-blk: don't keep queue frozen during system suspend Commit 4ce6e2db00de ("virtio-blk: Ensure no requests in virtqueues before deleting vqs.") replaces queue quiesce with queue freeze in virtio-blk's PM callbacks. And the motivation is to drain inflight IOs before suspending. block layer's queue freeze looks very handy, but it is also easy to cause deadlock, such as, any attempt to call into bio_queue_enter() may run into deadlock if the queue is frozen in current context. There are all kinds of ->suspend() called in suspend context, so keeping queue frozen in the whole suspend context isn't one good idea. And Marek reported lockdep warning[1] caused by virtio-blk's freeze queue in virtblk_freeze(). [1] https://lore.kernel.org/linux-block/ca16370e-d646-4eee-b9cc-87277c89c43c@samsung.com/ Given the motivation is to drain in-flight IOs, it can be done by calling freeze & unfreeze, meantime restore to previous behavior by keeping queue quiesced during suspend. Cc: Yi Sun Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Stefan Hajnoczi Cc: virtualization@lists.linux.dev Reported-by: Marek Szyprowski Signed-off-by: Ming Lei Acked-by: Stefan Hajnoczi Link: https://lore.kernel.org/r/20241112125821.1475793-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c0cdba71f4364..3efe378f13866 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1586,9 +1586,12 @@ static void virtblk_remove(struct virtio_device *vdev) static int virtblk_freeze(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + struct request_queue *q = vblk->disk->queue; /* Ensure no requests in virtqueues before deleting vqs. */ - blk_mq_freeze_queue(vblk->disk->queue); + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue_nowait(q); + blk_mq_unfreeze_queue(q); /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); @@ -1612,8 +1615,8 @@ static int virtblk_restore(struct virtio_device *vdev) return ret; virtio_device_ready(vdev); + blk_mq_unquiesce_queue(vblk->disk->queue); - blk_mq_unfreeze_queue(vblk->disk->queue); return 0; } #endif From f00b53f1614f7be554fd28b9594ef4e63e2686c5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 5 Dec 2024 13:48:10 +0000 Subject: [PATCH 317/366] arm64: cpufeature: Add GCS to cpucap_is_possible() Since system_supports_gcs() ends up referring to cpucap_is_possible(), teach the latter about GCS for consistency with similar features. Signed-off-by: Robin Murphy Acked-by: Mark Rutland Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/416c7369fcdce4ebb2a8f12daae234507be27e38.1733406275.git.robin.murphy@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpucaps.h | 2 ++ arch/arm64/include/asm/cpufeature.h | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 201a46efd9188..cbbf70e0f2048 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -44,6 +44,8 @@ cpucap_is_possible(const unsigned int cap) return IS_ENABLED(CONFIG_ARM64_TLB_RANGE); case ARM64_HAS_S1POE: return IS_ENABLED(CONFIG_ARM64_POE); + case ARM64_HAS_GCS: + return IS_ENABLED(CONFIG_ARM64_GCS); case ARM64_UNMAP_KERNEL_AT_EL0: return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0); case ARM64_WORKAROUND_843419: diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b64e49bd9d106..8b4e5a3cd24c8 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -847,8 +847,7 @@ static inline bool system_supports_poe(void) static inline bool system_supports_gcs(void) { - return IS_ENABLED(CONFIG_ARM64_GCS) && - alternative_has_cap_unlikely(ARM64_HAS_GCS); + return alternative_has_cap_unlikely(ARM64_HAS_GCS); } static inline bool system_supports_haft(void) From c98b10496b2f3c4f576af3482c71aadcfcbf765e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Wed, 4 Dec 2024 09:28:31 -0300 Subject: [PATCH 318/366] drm/v3d: Enable Performance Counters before clearing them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the Raspberry Pi 5, performance counters are not being cleared when `v3d_perfmon_start()` is called, even though we write to the CLR register. As a result, their values accumulate until they overflow. The expected behavior is for performance counters to reset to zero at the start of a job. When the job finishes and the perfmon is stopped, the counters should accurately reflect the values for that specific job. To ensure this behavior, the performance counters are now enabled before being cleared. This allows the CLR register to function as intended, zeroing the counter values when the job begins. Fixes: 26a4dc29b74a ("drm/v3d: Expose performance counters to userspace") Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20241204122831.17015-1-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_perfmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 00cd081d78732..6ee56cbd3f1bf 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -254,9 +254,9 @@ void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon) V3D_CORE_WRITE(0, V3D_V4_PCTR_0_SRC_X(source), channel); } + V3D_CORE_WRITE(0, V3D_V4_PCTR_0_EN, mask); V3D_CORE_WRITE(0, V3D_V4_PCTR_0_CLR, mask); V3D_CORE_WRITE(0, V3D_PCTR_0_OVERFLOW, mask); - V3D_CORE_WRITE(0, V3D_V4_PCTR_0_EN, mask); v3d->active_perfmon = perfmon; } From ca62d90085f4af36de745883faab9f8a7cbb45d3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Dec 2024 12:16:52 +0000 Subject: [PATCH 319/366] arm64: ptrace: fix partial SETREGSET for NT_ARM_TAGGED_ADDR_CTRL Currently tagged_addr_ctrl_set() doesn't initialize the temporary 'ctrl' variable, and a SETREGSET call with a length of zero will leave this uninitialized. Consequently tagged_addr_ctrl_set() will consume an arbitrary value, potentially leaking up to 64 bits of memory from the kernel stack. The read is limited to a specific slot on the stack, and the issue does not provide a write mechanism. As set_tagged_addr_ctrl() only accepts values where bits [63:4] zero and rejects other values, a partial SETREGSET attempt will randomly succeed or fail depending on the value of the uninitialized value, and the exposure is significantly limited. Fix this by initializing the temporary value before copying the regset from userspace, as for other regsets (e.g. NT_PRSTATUS, NT_PRFPREG, NT_ARM_SYSTEM_CALL). In the case of a zero-length write, the existing value of the tagged address ctrl will be retained. The NT_ARM_TAGGED_ADDR_CTRL regset is only visible in the user_aarch64_view used by a native AArch64 task to manipulate another native AArch64 task. As get_tagged_addr_ctrl() only returns an error value when called for a compat task, tagged_addr_ctrl_get() and tagged_addr_ctrl_set() should never observe an error value from get_tagged_addr_ctrl(). Add a WARN_ON_ONCE() to both to indicate that such an error would be unexpected, and error handlnig is not missing in either case. Fixes: 2200aa7154cb ("arm64: mte: ptrace: Add NT_ARM_TAGGED_ADDR_CTRL regset") Cc: # 5.10.x Signed-off-by: Mark Rutland Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241205121655.1824269-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e4437f62a2cda..8525d03f0fb40 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1427,7 +1427,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target, { long ctrl = get_tagged_addr_ctrl(target); - if (IS_ERR_VALUE(ctrl)) + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) return ctrl; return membuf_write(&to, &ctrl, sizeof(ctrl)); @@ -1441,6 +1441,10 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct int ret; long ctrl; + ctrl = get_tagged_addr_ctrl(target); + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) + return ctrl; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); if (ret) return ret; From f5d71291841aecfe5d8435da2dfa7f58ccd18bc8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Dec 2024 12:16:53 +0000 Subject: [PATCH 320/366] arm64: ptrace: fix partial SETREGSET for NT_ARM_FPMR Currently fpmr_set() doesn't initialize the temporary 'fpmr' variable, and a SETREGSET call with a length of zero will leave this uninitialized. Consequently an arbitrary value will be written back to target->thread.uw.fpmr, potentially leaking up to 64 bits of memory from the kernel stack. The read is limited to a specific slot on the stack, and the issue does not provide a write mechanism. Fix this by initializing the temporary value before copying the regset from userspace, as for other regsets (e.g. NT_PRSTATUS, NT_PRFPREG, NT_ARM_SYSTEM_CALL). In the case of a zero-length write, the existing contents of FPMR will be retained. Before this patch: | # ./fpmr-test | Attempting to write NT_ARM_FPMR::fpmr = 0x900d900d900d900d | SETREGSET(nt=0x40e, len=8) wrote 8 bytes | | Attempting to read NT_ARM_FPMR::fpmr | GETREGSET(nt=0x40e, len=8) read 8 bytes | Read NT_ARM_FPMR::fpmr = 0x900d900d900d900d | | Attempting to write NT_ARM_FPMR (zero length) | SETREGSET(nt=0x40e, len=0) wrote 0 bytes | | Attempting to read NT_ARM_FPMR::fpmr | GETREGSET(nt=0x40e, len=8) read 8 bytes | Read NT_ARM_FPMR::fpmr = 0xffff800083963d50 After this patch: | # ./fpmr-test | Attempting to write NT_ARM_FPMR::fpmr = 0x900d900d900d900d | SETREGSET(nt=0x40e, len=8) wrote 8 bytes | | Attempting to read NT_ARM_FPMR::fpmr | GETREGSET(nt=0x40e, len=8) read 8 bytes | Read NT_ARM_FPMR::fpmr = 0x900d900d900d900d | | Attempting to write NT_ARM_FPMR (zero length) | SETREGSET(nt=0x40e, len=0) wrote 0 bytes | | Attempting to read NT_ARM_FPMR::fpmr | GETREGSET(nt=0x40e, len=8) read 8 bytes | Read NT_ARM_FPMR::fpmr = 0x900d900d900d900d Fixes: 4035c22ef7d4 ("arm64/ptrace: Expose FPMR via ptrace") Cc: # 6.9.x Signed-off-by: Mark Rutland Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241205121655.1824269-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 8525d03f0fb40..85c862dc845b3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -720,6 +720,8 @@ static int fpmr_set(struct task_struct *target, const struct user_regset *regset if (!system_supports_fpmr()) return -EINVAL; + fpmr = target->thread.uw.fpmr; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count); if (ret) return ret; From 594bfc4947c4fcabba1318d8384c61a29a6b89fb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Dec 2024 12:16:54 +0000 Subject: [PATCH 321/366] arm64: ptrace: fix partial SETREGSET for NT_ARM_POE Currently poe_set() doesn't initialize the temporary 'ctrl' variable, and a SETREGSET call with a length of zero will leave this uninitialized. Consequently an arbitrary value will be written back to target->thread.por_el0, potentially leaking up to 64 bits of memory from the kernel stack. The read is limited to a specific slot on the stack, and the issue does not provide a write mechanism. Fix this by initializing the temporary value before copying the regset from userspace, as for other regsets (e.g. NT_PRSTATUS, NT_PRFPREG, NT_ARM_SYSTEM_CALL). In the case of a zero-length write, the existing contents of POR_EL1 will be retained. Before this patch: | # ./poe-test | Attempting to write NT_ARM_POE::por_el0 = 0x900d900d900d900d | SETREGSET(nt=0x40f, len=8) wrote 8 bytes | | Attempting to read NT_ARM_POE::por_el0 | GETREGSET(nt=0x40f, len=8) read 8 bytes | Read NT_ARM_POE::por_el0 = 0x900d900d900d900d | | Attempting to write NT_ARM_POE (zero length) | SETREGSET(nt=0x40f, len=0) wrote 0 bytes | | Attempting to read NT_ARM_POE::por_el0 | GETREGSET(nt=0x40f, len=8) read 8 bytes | Read NT_ARM_POE::por_el0 = 0xffff8000839c3d50 After this patch: | # ./poe-test | Attempting to write NT_ARM_POE::por_el0 = 0x900d900d900d900d | SETREGSET(nt=0x40f, len=8) wrote 8 bytes | | Attempting to read NT_ARM_POE::por_el0 | GETREGSET(nt=0x40f, len=8) read 8 bytes | Read NT_ARM_POE::por_el0 = 0x900d900d900d900d | | Attempting to write NT_ARM_POE (zero length) | SETREGSET(nt=0x40f, len=0) wrote 0 bytes | | Attempting to read NT_ARM_POE::por_el0 | GETREGSET(nt=0x40f, len=8) read 8 bytes | Read NT_ARM_POE::por_el0 = 0x900d900d900d900d Fixes: 175198199262 ("arm64/ptrace: add support for FEAT_POE") Cc: # 6.12.x Signed-off-by: Mark Rutland Cc: Joey Gouly Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241205121655.1824269-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 85c862dc845b3..f17810030fa05 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1478,6 +1478,8 @@ static int poe_set(struct task_struct *target, const struct if (!system_supports_poe()) return -EINVAL; + ctrl = target->thread.por_el0; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); if (ret) return ret; From d60624f72d15862a96965b945f6ddfee9a1359e7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Dec 2024 12:16:55 +0000 Subject: [PATCH 322/366] arm64: ptrace: fix partial SETREGSET for NT_ARM_GCS Currently gcs_set() doesn't initialize the temporary 'user_gcs' variable, and a SETREGSET call with a length of 0, 8, or 16 will leave some portion of this uninitialized. Consequently some arbitrary uninitialized values may be written back to the relevant fields in task struct, potentially leaking up to 192 bits of memory from the kernel stack. The read is limited to a specific slot on the stack, and the issue does not provide a write mechanism. As gcs_set() rejects cases where user_gcs::features_enabled has bits set other than PR_SHADOW_STACK_SUPPORTED_STATUS_MASK, a SETREGSET call with a length of zero will randomly succeed or fail depending on the value of the uninitialized value, it isn't possible to leak the full 192 bits. With a length of 8 or 16, user_gcs::features_enabled can be initialized to an accepted value, making it practical to leak 128 or 64 bits. Fix this by initializing the temporary value before copying the regset from userspace, as for other regsets (e.g. NT_PRSTATUS, NT_PRFPREG, NT_ARM_SYSTEM_CALL). In the case of a zero-length or partial write, the existing contents of the fields which are not written to will be retained. To ensure that the extraction and insertion of fields is consistent across the GETREGSET and SETREGSET calls, new task_gcs_to_user() and task_gcs_from_user() helpers are added, matching the style of pac_address_keys_to_user() and pac_address_keys_from_user(). Before this patch: | # ./gcs-test | Attempting to write NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x0000000000000000, | .gcspr_el0 = 0x900d900d900d900d, | } | SETREGSET(nt=0x410, len=24) wrote 24 bytes | | Attempting to read NT_ARM_GCS::user_gcs | GETREGSET(nt=0x410, len=24) read 24 bytes | Read NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x0000000000000000, | .gcspr_el0 = 0x900d900d900d900d, | } | | Attempting partial write NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x1de7ec7edbadc0de, | .gcspr_el0 = 0x1de7ec7edbadc0de, | } | SETREGSET(nt=0x410, len=8) wrote 8 bytes | | Attempting to read NT_ARM_GCS::user_gcs | GETREGSET(nt=0x410, len=24) read 24 bytes | Read NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x000000000093e780, | .gcspr_el0 = 0xffff800083a63d50, | } After this patch: | # ./gcs-test | Attempting to write NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x0000000000000000, | .gcspr_el0 = 0x900d900d900d900d, | } | SETREGSET(nt=0x410, len=24) wrote 24 bytes | | Attempting to read NT_ARM_GCS::user_gcs | GETREGSET(nt=0x410, len=24) read 24 bytes | Read NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x0000000000000000, | .gcspr_el0 = 0x900d900d900d900d, | } | | Attempting partial write NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x1de7ec7edbadc0de, | .gcspr_el0 = 0x1de7ec7edbadc0de, | } | SETREGSET(nt=0x410, len=8) wrote 8 bytes | | Attempting to read NT_ARM_GCS::user_gcs | GETREGSET(nt=0x410, len=24) read 24 bytes | Read NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x0000000000000000, | .gcspr_el0 = 0x900d900d900d900d, | } Fixes: 7ec3b57cb29f ("arm64/ptrace: Expose GCS via ptrace and core files") Signed-off-by: Mark Rutland Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241205121655.1824269-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index f17810030fa05..f79b0d5f71ac9 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1491,6 +1491,22 @@ static int poe_set(struct task_struct *target, const struct #endif #ifdef CONFIG_ARM64_GCS +static void task_gcs_to_user(struct user_gcs *user_gcs, + const struct task_struct *target) +{ + user_gcs->features_enabled = target->thread.gcs_el0_mode; + user_gcs->features_locked = target->thread.gcs_el0_locked; + user_gcs->gcspr_el0 = target->thread.gcspr_el0; +} + +static void task_gcs_from_user(struct task_struct *target, + const struct user_gcs *user_gcs) +{ + target->thread.gcs_el0_mode = user_gcs->features_enabled; + target->thread.gcs_el0_locked = user_gcs->features_locked; + target->thread.gcspr_el0 = user_gcs->gcspr_el0; +} + static int gcs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) @@ -1503,9 +1519,7 @@ static int gcs_get(struct task_struct *target, if (target == current) gcs_preserve_current_state(); - user_gcs.features_enabled = target->thread.gcs_el0_mode; - user_gcs.features_locked = target->thread.gcs_el0_locked; - user_gcs.gcspr_el0 = target->thread.gcspr_el0; + task_gcs_to_user(&user_gcs, target); return membuf_write(&to, &user_gcs, sizeof(user_gcs)); } @@ -1521,6 +1535,8 @@ static int gcs_set(struct task_struct *target, const struct if (!system_supports_gcs()) return -EINVAL; + task_gcs_to_user(&user_gcs, target); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1); if (ret) return ret; @@ -1528,9 +1544,7 @@ static int gcs_set(struct task_struct *target, const struct if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) return -EINVAL; - target->thread.gcs_el0_mode = user_gcs.features_enabled; - target->thread.gcs_el0_locked = user_gcs.features_locked; - target->thread.gcspr_el0 = user_gcs.gcspr_el0; + task_gcs_from_user(target, &user_gcs); return 0; } From def13795928b82ecca4ae3ea16f375114ff88685 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 14 Nov 2024 15:44:21 -0800 Subject: [PATCH 323/366] fs/proc/vmcore.c: fix warning when CONFIG_MMU=n >> fs/proc/vmcore.c:424:19: warning: 'mmap_vmcore_fault' defined but not used [-Wunused-function] 424 | static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf) Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202411140156.2o0nS4fl-lkp@intel.com/ Cc: Qi Xi Signed-off-by: Andrew Morton --- fs/proc/vmcore.c | 56 ++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index b4521b0960588..3d8a82cee63e3 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -414,6 +414,34 @@ static ssize_t read_vmcore(struct kiocb *iocb, struct iov_iter *iter) return __read_vmcore(iter, &iocb->ki_pos); } +/** + * vmcore_alloc_buf - allocate buffer in vmalloc memory + * @size: size of buffer + * + * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap + * the buffer to user-space by means of remap_vmalloc_range(). + * + * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is + * disabled and there's no need to allow users to mmap the buffer. + */ +static inline char *vmcore_alloc_buf(size_t size) +{ +#ifdef CONFIG_MMU + return vmalloc_user(size); +#else + return vzalloc(size); +#endif +} + +/* + * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is + * essential for mmap_vmcore() in order to map physically + * non-contiguous objects (ELF header, ELF note segment and memory + * regions in the 1st kernel pointed to by PT_LOAD entries) into + * virtually contiguous user-space in ELF layout. + */ +#ifdef CONFIG_MMU + /* * The vmcore fault handler uses the page cache and fills data using the * standard __read_vmcore() function. @@ -457,34 +485,6 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf) #endif } -/** - * vmcore_alloc_buf - allocate buffer in vmalloc memory - * @size: size of buffer - * - * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap - * the buffer to user-space by means of remap_vmalloc_range(). - * - * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is - * disabled and there's no need to allow users to mmap the buffer. - */ -static inline char *vmcore_alloc_buf(size_t size) -{ -#ifdef CONFIG_MMU - return vmalloc_user(size); -#else - return vzalloc(size); -#endif -} - -/* - * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is - * essential for mmap_vmcore() in order to map physically - * non-contiguous objects (ELF header, ELF note segment and memory - * regions in the 1st kernel pointed to by PT_LOAD entries) into - * virtually contiguous user-space in ELF layout. - */ -#ifdef CONFIG_MMU - static const struct vm_operations_struct vmcore_mmap_ops = { .fault = mmap_vmcore_fault, }; From a1268be280d8e484ab3606d7476edd0f14bb9961 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Wed, 20 Nov 2024 19:49:33 -0800 Subject: [PATCH 324/366] mm/gup: handle NULL pages in unpin_user_pages() The recent addition of "pofs" (pages or folios) handling to gup has a flaw: it assumes that unpin_user_pages() handles NULL pages in the pages** array. That's not the case, as I discovered when I ran on a new configuration on my test machine. Fix this by skipping NULL pages in unpin_user_pages(), just like unpin_folios() already does. Details: when booting on x86 with "numa=fake=2 movablecore=4G" on Linux 6.12, and running this: tools/testing/selftests/mm/gup_longterm ...I get the following crash: BUG: kernel NULL pointer dereference, address: 0000000000000008 RIP: 0010:sanity_check_pinned_pages+0x3a/0x2d0 ... Call Trace: ? __die_body+0x66/0xb0 ? page_fault_oops+0x30c/0x3b0 ? do_user_addr_fault+0x6c3/0x720 ? irqentry_enter+0x34/0x60 ? exc_page_fault+0x68/0x100 ? asm_exc_page_fault+0x22/0x30 ? sanity_check_pinned_pages+0x3a/0x2d0 unpin_user_pages+0x24/0xe0 check_and_migrate_movable_pages_or_folios+0x455/0x4b0 __gup_longterm_locked+0x3bf/0x820 ? mmap_read_lock_killable+0x12/0x50 ? __pfx_mmap_read_lock_killable+0x10/0x10 pin_user_pages+0x66/0xa0 gup_test_ioctl+0x358/0xb20 __se_sys_ioctl+0x6b/0xc0 do_syscall_64+0x7b/0x150 entry_SYSCALL_64_after_hwframe+0x76/0x7e Link: https://lkml.kernel.org/r/20241121034933.77502-1-jhubbard@nvidia.com Fixes: 94efde1d1539 ("mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases") Signed-off-by: John Hubbard Acked-by: David Hildenbrand Cc: Oscar Salvador Cc: Vivek Kasireddy Cc: Dave Airlie Cc: Gerd Hoffmann Cc: Matthew Wilcox Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Peter Xu Cc: Arnd Bergmann Cc: Daniel Vetter Cc: Dongwon Kim Cc: Hugh Dickins Cc: Junxiao Chang Cc: Signed-off-by: Andrew Morton --- mm/gup.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index 746070a1d8bfb..3b75e631f3691 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -52,7 +52,12 @@ static inline void sanity_check_pinned_pages(struct page **pages, */ for (; npages; npages--, pages++) { struct page *page = *pages; - struct folio *folio = page_folio(page); + struct folio *folio; + + if (!page) + continue; + + folio = page_folio(page); if (is_zero_page(page) || !folio_test_anon(folio)) @@ -409,6 +414,10 @@ void unpin_user_pages(struct page **pages, unsigned long npages) sanity_check_pinned_pages(pages, npages); for (i = 0; i < npages; i += nr) { + if (!pages[i]) { + nr = 1; + continue; + } folio = gup_folio_next(pages, npages, i, &nr); gup_put_folio(folio, nr, FOLL_PIN); } From 091c1dd2d4df6edd1beebe0e5863d4034ade9572 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 20 Nov 2024 21:11:51 +0100 Subject: [PATCH 325/366] mm/mempolicy: fix migrate_to_node() assuming there is at least one VMA in a MM We currently assume that there is at least one VMA in a MM, which isn't true. So we might end up having find_vma() return NULL, to then de-reference NULL. So properly handle find_vma() returning NULL. This fixes the report: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 UID: 0 PID: 6021 Comm: syz-executor284 Not tainted 6.12.0-rc7-syzkaller-00187-gf868cd251776 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/30/2024 RIP: 0010:migrate_to_node mm/mempolicy.c:1090 [inline] RIP: 0010:do_migrate_pages+0x403/0x6f0 mm/mempolicy.c:1194 Code: ... RSP: 0018:ffffc9000375fd08 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffc9000375fd78 RCX: 0000000000000000 RDX: ffff88807e171300 RSI: dffffc0000000000 RDI: ffff88803390c044 RBP: ffff88807e171428 R08: 0000000000000014 R09: fffffbfff2039ef1 R10: ffffffff901cf78f R11: 0000000000000000 R12: 0000000000000003 R13: ffffc9000375fe90 R14: ffffc9000375fe98 R15: ffffc9000375fdf8 FS: 00005555919e1380(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005555919e1ca8 CR3: 000000007f12a000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: kernel_migrate_pages+0x5b2/0x750 mm/mempolicy.c:1709 __do_sys_migrate_pages mm/mempolicy.c:1727 [inline] __se_sys_migrate_pages mm/mempolicy.c:1723 [inline] __x64_sys_migrate_pages+0x96/0x100 mm/mempolicy.c:1723 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f [akpm@linux-foundation.org: add unlikely()] Link: https://lkml.kernel.org/r/20241120201151.9518-1-david@redhat.com Fixes: 39743889aaf7 ("[PATCH] Swap Migration V5: sys_migrate_pages interface") Signed-off-by: David Hildenbrand Reported-by: syzbot+3511625422f7aa637f0d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/673d2696.050a0220.3c9d61.012f.GAE@google.com/T/ Reviewed-by: Liam R. Howlett Reviewed-by: Christoph Lameter Cc: Liam R. Howlett Cc: Signed-off-by: Andrew Morton --- mm/mempolicy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index bb37cd1a51d87..04f35659717ae 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1080,6 +1080,10 @@ static long migrate_to_node(struct mm_struct *mm, int source, int dest, mmap_read_lock(mm); vma = find_vma(mm, 0); + if (unlikely(!vma)) { + mmap_read_unlock(mm); + return 0; + } /* * This does not migrate the range, but isolates all pages that From e30a0361b8515d424c73c67de1a43e45a13b8ba2 Mon Sep 17 00:00:00 2001 From: Jared Kangas Date: Tue, 19 Nov 2024 13:02:34 -0800 Subject: [PATCH 326/366] kasan: make report_lock a raw spinlock If PREEMPT_RT is enabled, report_lock is a sleeping spinlock and must not be locked when IRQs are disabled. However, KASAN reports may be triggered in such contexts. For example: char *s = kzalloc(1, GFP_KERNEL); kfree(s); local_irq_disable(); char c = *s; /* KASAN report here leads to spin_lock() */ local_irq_enable(); Make report_spinlock a raw spinlock to prevent rescheduling when PREEMPT_RT is enabled. Link: https://lkml.kernel.org/r/20241119210234.1602529-1-jkangas@redhat.com Fixes: 342a93247e08 ("locking/spinlock: Provide RT variant header: ") Signed-off-by: Jared Kangas Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- mm/kasan/report.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 50fb19ad43881..3fe77a360f1c5 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -201,7 +201,7 @@ static inline void fail_non_kasan_kunit_test(void) { } #endif /* CONFIG_KUNIT */ -static DEFINE_SPINLOCK(report_lock); +static DEFINE_RAW_SPINLOCK(report_lock); static void start_report(unsigned long *flags, bool sync) { @@ -212,7 +212,7 @@ static void start_report(unsigned long *flags, bool sync) lockdep_off(); /* Make sure we don't end up in loop. */ report_suppress_start(); - spin_lock_irqsave(&report_lock, *flags); + raw_spin_lock_irqsave(&report_lock, *flags); pr_err("==================================================================\n"); } @@ -222,7 +222,7 @@ static void end_report(unsigned long *flags, const void *addr, bool is_write) trace_error_report_end(ERROR_DETECTOR_KASAN, (unsigned long)addr); pr_err("==================================================================\n"); - spin_unlock_irqrestore(&report_lock, *flags); + raw_spin_unlock_irqrestore(&report_lock, *flags); if (!test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) check_panic_on_warn("KASAN"); switch (kasan_arg_fault) { From 985ebec4ab0a28bb5910c3b1481a40fbf7f9e61d Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 20 Nov 2024 02:23:37 +0900 Subject: [PATCH 327/366] nilfs2: fix potential out-of-bounds memory access in nilfs_find_entry() Syzbot reported that when searching for records in a directory where the inode's i_size is corrupted and has a large value, memory access outside the folio/page range may occur, or a use-after-free bug may be detected if KASAN is enabled. This is because nilfs_last_byte(), which is called by nilfs_find_entry() and others to calculate the number of valid bytes of directory data in a page from i_size and the page index, loses the upper 32 bits of the 64-bit size information due to an inappropriate type of local variable to which the i_size value is assigned. This caused a large byte offset value due to underflow in the end address calculation in the calling nilfs_find_entry(), resulting in memory access that exceeds the folio/page size. Fix this issue by changing the type of the local variable causing the bit loss from "unsigned int" to "u64". The return value of nilfs_last_byte() is also of type "unsigned int", but it is truncated so as not to exceed PAGE_SIZE and no bit loss occurs, so no change is required. Link: https://lkml.kernel.org/r/20241119172403.9292-1-konishi.ryusuke@gmail.com Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+96d5d14c47d97015c624@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=96d5d14c47d97015c624 Tested-by: syzbot+96d5d14c47d97015c624@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 14e8d82f86298..0a3aea6c416bc 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -70,7 +70,7 @@ static inline unsigned int nilfs_chunk_size(struct inode *inode) */ static unsigned int nilfs_last_byte(struct inode *inode, unsigned long page_nr) { - unsigned int last_byte = inode->i_size; + u64 last_byte = inode->i_size; last_byte -= page_nr << PAGE_SHIFT; if (last_byte > PAGE_SIZE) From 965b5dd1894f4525f38c1b5f99b0106a07dbb5db Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 23 Nov 2024 22:28:34 +0900 Subject: [PATCH 328/366] ocfs2: free inode when ocfs2_get_init_inode() fails syzbot is reporting busy inodes after unmount, for commit 9c89fe0af826 ("ocfs2: Handle error from dquot_initialize()") forgot to call iput() when new_inode() succeeded and dquot_initialize() failed. Link: https://lkml.kernel.org/r/e68c0224-b7c6-4784-b4fa-a9fc8c675525@I-love.SAKURA.ne.jp Fixes: 9c89fe0af826 ("ocfs2: Handle error from dquot_initialize()") Signed-off-by: Tetsuo Handa Reported-by: syzbot+0af00f6a2cba2058b5db@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0af00f6a2cba2058b5db Tested-by: syzbot+0af00f6a2cba2058b5db@syzkaller.appspotmail.com Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 59c92353151a8..5550f8afa4380 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -200,8 +200,10 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode) mode = mode_strip_sgid(&nop_mnt_idmap, dir, mode); inode_init_owner(&nop_mnt_idmap, inode, dir, mode); status = dquot_initialize(inode); - if (status) + if (status) { + iput(inode); return ERR_PTR(status); + } return inode; } From 4ae132c693896b0713db572676c90ffd855a4246 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 27 Nov 2024 16:14:22 +0000 Subject: [PATCH 329/366] selftest: hugetlb_dio: fix test naming The string logged when a test passes or fails is used by the selftest framework to identify which test is being reported. The hugetlb_dio test not only uses the same strings for every test that is run but it also uses different strings for test passes and failures which means that test automation is unable to follow what the test is doing at all. Pull the existing duplicated logging of the number of free huge pages before and after the test out of the conditional and replace that and the logging of the result with a single ksft_print_result() which incorporates the parameters passed into the test into the output. Link: https://lkml.kernel.org/r/20241127-kselftest-mm-hugetlb-dio-names-v1-1-22aab01bf550@kernel.org Fixes: fae1980347bf ("selftests: hugetlb_dio: fixup check for initial conditions to skip in the start") Signed-off-by: Mark Brown Reviewed-by: Muhammad Usama Anjum Cc: Donet Tom Cc: Ritesh Harjani (IBM) Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/hugetlb_dio.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c index 432d5af15e66b..db63abe5ee5e8 100644 --- a/tools/testing/selftests/mm/hugetlb_dio.c +++ b/tools/testing/selftests/mm/hugetlb_dio.c @@ -76,19 +76,15 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off) /* Get the free huge pages after unmap*/ free_hpage_a = get_free_hugepages(); + ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); + ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); + /* * If the no. of free hugepages before allocation and after unmap does * not match - that means there could still be a page which is pinned. */ - if (free_hpage_a != free_hpage_b) { - ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); - ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); - ksft_test_result_fail(": Huge pages not freed!\n"); - } else { - ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); - ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); - ksft_test_result_pass(": Huge pages freed successfully !\n"); - } + ksft_test_result(free_hpage_a == free_hpage_b, + "free huge pages from %u-%u\n", start_off, end_off); } int main(void) From 4a475c0a7eeb3368eca40fe7cb02d157eeddc77a Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Wed, 27 Nov 2024 12:08:53 +0000 Subject: [PATCH 330/366] selftests/damon: add _damon_sysfs.py to TEST_FILES When running selftests I encountered the following error message with some damon tests: # Traceback (most recent call last): # File "[...]/damon/./damos_quota.py", line 7, in # import _damon_sysfs # ModuleNotFoundError: No module named '_damon_sysfs' Fix this by adding the _damon_sysfs.py file to TEST_FILES so that it will be available when running the respective damon selftests. Link: https://lkml.kernel.org/r/20241127-picks-visitor-7416685b-mheyne@amazon.de Fixes: 306abb63a8ca ("selftests/damon: implement a python module for test-purpose DAMON sysfs controls") Signed-off-by: Maximilian Heyne Reviewed-by: SeongJae Park Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 5b2a6a5dd1af7..812f656260fba 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -6,7 +6,7 @@ TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race TEST_GEN_FILES += debugfs_target_ids_pid_leak TEST_GEN_FILES += access_memory access_memory_even -TEST_FILES = _chk_dependency.sh _debugfs_common.sh +TEST_FILES = _chk_dependency.sh _debugfs_common.sh _damon_sysfs.py # functionality tests TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh From a220d6b95b1ae12c7626283d7609f0a1438e6437 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 26 Nov 2024 15:52:08 +0100 Subject: [PATCH 331/366] Revert "readahead: properly shorten readahead when falling back to do_page_cache_ra()" This reverts commit 7c877586da3178974a8a94577b6045a48377ff25. Anders and Philippe have reported that recent kernels occasionally hang when used with NFS in readahead code. The problem has been bisected to 7c877586da3 ("readahead: properly shorten readahead when falling back to do_page_cache_ra()"). The cause of the problem is that ra->size can be shrunk by read_pages() call and subsequently we end up calling do_page_cache_ra() with negative (read huge positive) number of pages. Let's revert 7c877586da3 for now until we can find a proper way how the logic in read_pages() and page_cache_ra_order() can coexist. This can lead to reduced readahead throughput due to readahead window confusion but that's better than outright hangs. Link: https://lkml.kernel.org/r/20241126145208.985-1-jack@suse.cz Fixes: 7c877586da31 ("readahead: properly shorten readahead when falling back to do_page_cache_ra()") Reported-by: Anders Blomdell Reported-by: Philippe Troin Signed-off-by: Jan Kara Tested-by: Philippe Troin Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton --- mm/readahead.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index 8f1cf599b5722..ea650b8b02fb1 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -458,8 +458,7 @@ void page_cache_ra_order(struct readahead_control *ractl, struct file_ra_state *ra, unsigned int new_order) { struct address_space *mapping = ractl->mapping; - pgoff_t start = readahead_index(ractl); - pgoff_t index = start; + pgoff_t index = readahead_index(ractl); unsigned int min_order = mapping_min_folio_order(mapping); pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT; pgoff_t mark = index + ra->size - ra->async_size; @@ -522,7 +521,7 @@ void page_cache_ra_order(struct readahead_control *ractl, if (!err) return; fallback: - do_page_cache_ra(ractl, ra->size - (index - start), ra->async_size); + do_page_cache_ra(ractl, ra->size, ra->async_size); } static unsigned long ractl_max_pages(struct readahead_control *ractl, From d699440f58ce9bd71103cc7b692e3ab76a20bfcd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Nov 2024 16:52:06 -0800 Subject: [PATCH 332/366] mm: fix vrealloc()'s KASAN poisoning logic When vrealloc() reuses already allocated vmap_area, we need to re-annotate poisoned and unpoisoned portions of underlying memory according to the new size. This results in a KASAN splat recorded at [1]. A KASAN mis-reporting issue where there is none. Note, hard-coding KASAN_VMALLOC_PROT_NORMAL might not be exactly correct, but KASAN flag logic is pretty involved and spread out throughout __vmalloc_node_range_noprof(), so I'm using the bare minimum flag here and leaving the rest to mm people to refactor this logic and reuse it here. Link: https://lkml.kernel.org/r/20241126005206.3457974-1-andrii@kernel.org Link: https://lore.kernel.org/bpf/67450f9b.050a0220.21d33d.0004.GAE@google.com/ [1] Fixes: 3ddc2fefe6f3 ("mm: vmalloc: implement vrealloc()") Signed-off-by: Andrii Nakryiko Cc: Alexei Starovoitov Cc: Christoph Hellwig Cc: Michal Hocko Cc: Uladzislau Rezki (Sony) Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7ed39d1042015..f009b21705c16 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4093,7 +4093,8 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) /* Zero out spare memory. */ if (want_init_on_alloc(flags)) memset((void *)p + size, 0, old_size - size); - + kasan_poison_vmalloc(p + size, old_size - size); + kasan_unpoison_vmalloc(p, size, KASAN_VMALLOC_PROT_NORMAL); return (void *)p; } From 4de22b2a6a7477d84d9a01eb6b62a9117309d722 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 25 Nov 2024 20:17:18 +0000 Subject: [PATCH 333/366] mm: open-code PageTail in folio_flags() and const_folio_flags() It is unsafe to call PageTail() in dump_page() as page_is_fake_head() will almost certainly return true when called on a head page that is copied to the stack. That will cause the VM_BUG_ON_PGFLAGS() in const_folio_flags() to trigger when it shouldn't. Fortunately, we don't need to call PageTail() here; it's fine to have a pointer to a virtual alias of the page's flag word rather than the real page's flag word. Link: https://lkml.kernel.org/r/20241125201721.2963278-1-willy@infradead.org Fixes: fae7d834c43c ("mm: add __dump_folio()") Signed-off-by: Matthew Wilcox (Oracle) Cc: Kees Cook Cc: Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2220bfec278eb..cf46ac7208029 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -306,7 +306,7 @@ static const unsigned long *const_folio_flags(const struct folio *folio, { const struct page *page = &folio->page; - VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); return &page[n].flags; } @@ -315,7 +315,7 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) { struct page *page = &folio->page; - VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); return &page[n].flags; } From 6a7de1bf218d75f27f68d6a3f5ae1eb7332b941e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 25 Nov 2024 20:17:19 +0000 Subject: [PATCH 334/366] mm: open-code page_folio() in dump_page() page_folio() calls page_fixed_fake_head() which will misidentify this page as being a fake head and load off the end of 'precise'. We may have a pointer to a fake head, but that's OK because it contains the right information for dump_page(). gcc-15 is smart enough to catch this with -Warray-bounds: In function 'page_fixed_fake_head', inlined from '_compound_head' at ../include/linux/page-flags.h:251:24, inlined from '__dump_page' at ../mm/debug.c:123:11: ../include/asm-generic/rwonce.h:44:26: warning: array subscript 9 is outside +array bounds of 'struct page[1]' [-Warray-bounds=] Link: https://lkml.kernel.org/r/20241125201721.2963278-2-willy@infradead.org Fixes: fae7d834c43c ("mm: add __dump_folio()") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Kees Cook Cc: Signed-off-by: Andrew Morton --- mm/debug.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/debug.c b/mm/debug.c index aa57d3ffd4edf..95b6ab809c0ee 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -124,19 +124,22 @@ static void __dump_page(const struct page *page) { struct folio *foliop, folio; struct page precise; + unsigned long head; unsigned long pfn = page_to_pfn(page); unsigned long idx, nr_pages = 1; int loops = 5; again: memcpy(&precise, page, sizeof(*page)); - foliop = page_folio(&precise); - if (foliop == (struct folio *)&precise) { + head = precise.compound_head; + if ((head & 1) == 0) { + foliop = (struct folio *)&precise; idx = 0; if (!folio_test_large(foliop)) goto dump; foliop = (struct folio *)page; } else { + foliop = (struct folio *)(head - 1); idx = folio_page_idx(foliop, page); } From 031e04bdc834cda3b054ef6b698503b2b97e8186 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 22 Nov 2024 16:39:47 +0100 Subject: [PATCH 335/366] stackdepot: fix stack_depot_save_flags() in NMI context Per documentation, stack_depot_save_flags() was meant to be usable from NMI context if STACK_DEPOT_FLAG_CAN_ALLOC is unset. However, it still would try to take the pool_lock in an attempt to save a stack trace in the current pool (if space is available). This could result in deadlock if an NMI is handled while pool_lock is already held. To avoid deadlock, only try to take the lock in NMI context and give up if unsuccessful. The documentation is fixed to clearly convey this. Link: https://lkml.kernel.org/r/Z0CcyfbPqmxJ9uJH@elver.google.com Link: https://lkml.kernel.org/r/20241122154051.3914732-1-elver@google.com Fixes: 4434a56ec209 ("stackdepot: make fast paths lock-less again") Signed-off-by: Marco Elver Reported-by: Sebastian Andrzej Siewior Reviewed-by: Sebastian Andrzej Siewior Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Oscar Salvador Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 6 +++--- lib/stackdepot.c | 10 +++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index e9ec32fb97d4a..2cc21ffcdaf9e 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -147,7 +147,7 @@ static inline int stack_depot_early_init(void) { return 0; } * If the provided stack trace comes from the interrupt context, only the part * up to the interrupt entry is saved. * - * Context: Any context, but setting STACK_DEPOT_FLAG_CAN_ALLOC is required if + * Context: Any context, but unsetting STACK_DEPOT_FLAG_CAN_ALLOC is required if * alloc_pages() cannot be used from the current context. Currently * this is the case for contexts where neither %GFP_ATOMIC nor * %GFP_NOWAIT can be used (NMI, raw_spin_lock). @@ -156,7 +156,7 @@ static inline int stack_depot_early_init(void) { return 0; } */ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, unsigned int nr_entries, - gfp_t gfp_flags, + gfp_t alloc_flags, depot_flags_t depot_flags); /** @@ -175,7 +175,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, * Return: Handle of the stack trace stored in depot, 0 on failure */ depot_stack_handle_t stack_depot_save(unsigned long *entries, - unsigned int nr_entries, gfp_t gfp_flags); + unsigned int nr_entries, gfp_t alloc_flags); /** * __stack_depot_get_stack_record - Get a pointer to a stack_record struct diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 5ed34cc963fc3..245d5b4166999 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -630,7 +630,15 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, prealloc = page_address(page); } - raw_spin_lock_irqsave(&pool_lock, flags); + if (in_nmi()) { + /* We can never allocate in NMI context. */ + WARN_ON_ONCE(can_alloc); + /* Best effort; bail if we fail to take the lock. */ + if (!raw_spin_trylock_irqsave(&pool_lock, flags)) + goto exit; + } else { + raw_spin_lock_irqsave(&pool_lock, flags); + } printk_deferred_enter(); /* Try to find again, to avoid concurrently inserting duplicates. */ From 914eec5e980171bc128e7e24f7a22aa1d803570e Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Tue, 19 Nov 2024 09:45:00 -0800 Subject: [PATCH 336/366] ocfs2: update seq_file index in ocfs2_dlm_seq_next The following INFO level message was seen: seq_file: buggy .next function ocfs2_dlm_seq_next [ocfs2] did not update position index Fix: Update *pos (so m->index) to make seq_read_iter happy though the index its self makes no sense to ocfs2_dlm_seq_next. Link: https://lkml.kernel.org/r/20241119174500.9198-1-wen.gang.wang@oracle.com Signed-off-by: Wengang Wang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/dlmglue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 60df52e4c1f87..764ecbd5ad41d 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3110,6 +3110,7 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) struct ocfs2_lock_res *iter = v; struct ocfs2_lock_res *dummy = &priv->p_iter_res; + (*pos)++; spin_lock(&ocfs2_dlm_tracking_lock); iter = ocfs2_dlm_next_res(iter, priv); list_del_init(&dummy->l_debug_list); From 51f43d5d82ed2ba3f9a3f9a2390c52f28e42af32 Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Fri, 29 Nov 2024 10:52:13 +0800 Subject: [PATCH 337/366] mm/codetag: swap tags when migrate pages Current solution to adjust codetag references during page migration is done in 3 steps: 1. sets the codetag reference of the old page as empty (not pointing to any codetag); 2. subtracts counters of the new page to compensate for its own allocation; 3. sets codetag reference of the new page to point to the codetag of the old page. This does not work if CONFIG_MEM_ALLOC_PROFILING_DEBUG=n because set_codetag_empty() becomes NOOP. Instead, let's simply swap codetag references so that the new page is referencing the old codetag and the old page is referencing the new codetag. This way accounting stays valid and the logic makes more sense. Link: https://lkml.kernel.org/r/20241129025213.34836-1-00107082@163.com Fixes: e0a955bf7f61 ("mm/codetag: add pgalloc_tag_copy()") Signed-off-by: David Wang <00107082@163.com> Closes: https://lore.kernel.org/lkml/20241124074318.399027-1-00107082@163.com/ Acked-by: Suren Baghdasaryan Suggested-by: Suren Baghdasaryan Acked-by: Yu Zhao Cc: Kent Overstreet Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 4 ++-- lib/alloc_tag.c | 36 ++++++++++++++++++++++-------------- mm/migrate.c | 2 +- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 0e43ab653ab62..3469c4b201053 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -231,7 +231,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) } void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); -void pgalloc_tag_copy(struct folio *new, struct folio *old); +void pgalloc_tag_swap(struct folio *new, struct folio *old); void __init alloc_tag_sec_init(void); @@ -245,7 +245,7 @@ static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} static inline void alloc_tag_sec_init(void) {} static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} -static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) {} +static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} #endif /* CONFIG_MEM_ALLOC_PROFILING */ diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 2414a7ee7ec77..35f7560a309a4 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -189,26 +189,34 @@ void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) } } -void pgalloc_tag_copy(struct folio *new, struct folio *old) +void pgalloc_tag_swap(struct folio *new, struct folio *old) { - union pgtag_ref_handle handle; - union codetag_ref ref; - struct alloc_tag *tag; + union pgtag_ref_handle handle_old, handle_new; + union codetag_ref ref_old, ref_new; + struct alloc_tag *tag_old, *tag_new; - tag = pgalloc_tag_get(&old->page); - if (!tag) + tag_old = pgalloc_tag_get(&old->page); + if (!tag_old) + return; + tag_new = pgalloc_tag_get(&new->page); + if (!tag_new) return; - if (!get_page_tag_ref(&new->page, &ref, &handle)) + if (!get_page_tag_ref(&old->page, &ref_old, &handle_old)) return; + if (!get_page_tag_ref(&new->page, &ref_new, &handle_new)) { + put_page_tag_ref(handle_old); + return; + } + + /* swap tags */ + __alloc_tag_ref_set(&ref_old, tag_new); + update_page_tag_ref(handle_old, &ref_old); + __alloc_tag_ref_set(&ref_new, tag_old); + update_page_tag_ref(handle_new, &ref_new); - /* Clear the old ref to the original allocation tag. */ - clear_page_tag_ref(&old->page); - /* Decrement the counters of the tag on get_new_folio. */ - alloc_tag_sub(&ref, folio_size(new)); - __alloc_tag_ref_set(&ref, tag); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); + put_page_tag_ref(handle_old); + put_page_tag_ref(handle_new); } static void shutdown_mem_profiling(bool remove_file) diff --git a/mm/migrate.c b/mm/migrate.c index 2ce6b4b814df0..cc68583c86f96 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -745,7 +745,7 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio) folio_set_readahead(newfolio); folio_copy_owner(newfolio, folio); - pgalloc_tag_copy(newfolio, folio); + pgalloc_tag_swap(newfolio, folio); mem_cgroup_migrate(folio, newfolio); } From 89dd878282881306c38f7e354e7614fca98cb9a6 Mon Sep 17 00:00:00 2001 From: John Sperbeck Date: Thu, 28 Nov 2024 12:39:59 -0800 Subject: [PATCH 338/366] mm: memcg: declare do_memsw_account inline In commit 66d60c428b23 ("mm: memcg: move legacy memcg event code into memcontrol-v1.c"), the static do_memsw_account() function was moved from a .c file to a .h file. Unfortunately, the traditional inline keyword wasn't added. If a file (e.g., a unit test) includes the .h file, but doesn't refer to do_memsw_account(), it will get a warning like: mm/memcontrol-v1.h:41:13: warning: unused function 'do_memsw_account' [-Wunused-function] 41 | static bool do_memsw_account(void) | ^~~~~~~~~~~~~~~~ Link: https://lkml.kernel.org/r/20241128203959.726527-1-jsperbeck@google.com Fixes: 66d60c428b23 ("mm: memcg: move legacy memcg event code into memcontrol-v1.c") Signed-off-by: John Sperbeck Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Shakeel Butt Cc: Signed-off-by: Andrew Morton --- mm/memcontrol-v1.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h index 0e3b82951d915..144d71b659073 100644 --- a/mm/memcontrol-v1.h +++ b/mm/memcontrol-v1.h @@ -38,7 +38,7 @@ void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n); iter = mem_cgroup_iter(NULL, iter, NULL)) /* Whether legacy memory+swap accounting is active */ -static bool do_memsw_account(void) +static inline bool do_memsw_account(void) { return !cgroup_subsys_on_dfl(memory_cgrp_subsys); } From 249608ee47132cab3b1adacd9e463548f57bd316 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 18 Nov 2024 13:46:48 -0800 Subject: [PATCH 339/366] mm: respect mmap hint address when aligning for THP Commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") updated __get_unmapped_area() to align the start address for the VMA to a PMD boundary if CONFIG_TRANSPARENT_HUGEPAGE=y. It does this by effectively looking up a region that is of size, request_size + PMD_SIZE, and aligning up the start to a PMD boundary. Commit 4ef9ad19e176 ("mm: huge_memory: don't force huge page alignment on 32 bit") opted out of this for 32bit due to regressions in mmap base randomization. Commit d4148aeab412 ("mm, mmap: limit THP alignment of anonymous mappings to PMD-aligned sizes") restricted this to only mmap sizes that are multiples of the PMD_SIZE due to reported regressions in some performance benchmarks -- which seemed mostly due to the reduced spatial locality of related mappings due to the forced PMD-alignment. Another unintended side effect has emerged: When a user specifies an mmap hint address, the THP alignment logic modifies the behavior, potentially ignoring the hint even if a sufficiently large gap exists at the requested hint location. Example Scenario: Consider the following simplified virtual address (VA) space: ... 0x200000-0x400000 --- VMA A 0x400000-0x600000 --- Hole 0x600000-0x800000 --- VMA B ... A call to mmap() with hint=0x400000 and len=0x200000 behaves differently: - Before THP alignment: The requested region (size 0x200000) fits into the gap at 0x400000, so the hint is respected. - After alignment: The logic searches for a region of size 0x400000 (len + PMD_SIZE) starting at 0x400000. This search fails due to the mapping at 0x600000 (VMA B), and the hint is ignored, falling back to arch_get_unmapped_area[_topdown](). In general the hint is effectively ignored, if there is any existing mapping in the below range: [mmap_hint + mmap_size, mmap_hint + mmap_size + PMD_SIZE) This changes the semantics of mmap hint; from ""Respect the hint if a sufficiently large gap exists at the requested location" to "Respect the hint only if an additional PMD-sized gap exists beyond the requested size". This has performance implications for allocators that allocate their heap using mmap but try to keep it "as contiguous as possible" by using the end of the exisiting heap as the address hint. With the new behavior it's more likely to get a much less contiguous heap, adding extra fragmentation and performance overhead. To restore the expected behavior; don't use thp_get_unmapped_area_vmflags() when the user provided a hint address, for anonymous mappings. Note: As Yang Shi pointed out: the issue still remains for filesystems which are using thp_get_unmapped_area() for their get_unmapped_area() op. It is unclear what worklaods will regress for if we ignore THP alignment when the hint address is provided for such file backed mappings -- so this fix will be handled separately. Link: https://lkml.kernel.org/r/20241118214650.3667577-1-kaleshsingh@google.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Signed-off-by: Kalesh Singh Reviewed-by: Rik van Riel Reviewed-by: Vlastimil Babka Reviewed-by: David Hildenbrand Cc: Kefeng Wang Cc: Vlastimil Babka Cc: Yang Shi Cc: Rik van Riel Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Minchan Kim Cc: Hans Boehm Cc: Lokesh Gidra Cc: Signed-off-by: Andrew Morton --- mm/mmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/mmap.c b/mm/mmap.c index 386429f7db5a0..d32b7e701058a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -889,6 +889,7 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, if (get_area) { addr = get_area(file, addr, len, pgoff, flags); } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) + && !addr /* no hint */ && IS_ALIGNED(len, PMD_SIZE)) { /* Ensures that larger anonymous mappings are THP aligned. */ addr = thp_get_unmapped_area_vmflags(file, addr, len, From cbb70e45348769172cb24cca2d2b6437f4c9240b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 18 Nov 2024 17:54:14 +0000 Subject: [PATCH 340/366] mm: correct typo in MMAP_STATE() macro We mistakenly refer to len rather than len_ here. The only existing caller passes len to the len_ parameter so this has no impact on the code, but it is obviously incorrect to do this, so fix it. Link: https://lkml.kernel.org/r/20241118175414.390827-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Wei Yang Cc: Jann Horn Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vma.c b/mm/vma.c index 8a454a7bbc80b..8e31b7e25aeb1 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -35,7 +35,7 @@ struct mmap_state { .mm = mm_, \ .vmi = vmi_, \ .addr = addr_, \ - .end = (addr_) + len, \ + .end = (addr_) + (len_), \ .pgoff = pgoff_, \ .pglen = PHYS_PFN(len_), \ .flags = flags_, \ From d89c8ec0546184267cb211b579514ebaf8916100 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 29 Nov 2024 18:24:06 -0800 Subject: [PATCH 341/366] scatterlist: fix incorrect func name in kernel-doc Fix a kernel-doc warning by making the kernel-doc function description match the function name: include/linux/scatterlist.h:323: warning: expecting prototype for sg_unmark_bus_address(). Prototype was for sg_dma_unmark_bus_address() instead Link: https://lkml.kernel.org/r/20241130022406.537973-1-rdunlap@infradead.org Fixes: 42399301203e ("lib/scatterlist: add flag for indicating P2PDMA segments in an SGL") Signed-off-by: Randy Dunlap Cc: Logan Gunthorpe Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- include/linux/scatterlist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index c5e2239b550ef..d836e7440ee86 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -313,7 +313,7 @@ static inline void sg_dma_mark_bus_address(struct scatterlist *sg) } /** - * sg_unmark_bus_address - Unmark the scatterlist entry as a bus address + * sg_dma_unmark_bus_address - Unmark the scatterlist entry as a bus address * @sg: SG entry * * Description: From 3203b3ab0fcf22132caadd72caebfad47bf0dd2b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 29 Nov 2024 13:53:03 +0100 Subject: [PATCH 342/366] mm/filemap: don't call folio_test_locked() without a reference in next_uptodate_folio() The folio can get freed + buddy-merged + reallocated in the meantime, resulting in us calling folio_test_locked() possibly on a tail page. This makes const_folio_flags VM_BUG_ON_PGFLAGS() when stumbling over the tail page. Could this result in other issues? Doesn't look like it. False positives and false negatives don't really matter, because this folio would get skipped either way when detecting that they have been reallocated in the meantime. Fix it by performing the folio_test_locked() checked after grabbing a reference. If this ever becomes a real problem, we could add a special helper that racily checks if the bit is set even on tail pages ... but let's hope that's not required so we can just handle it cleaner: work on the folio after we hold a reference. Do we really need the folio_test_locked() check if we are going to trylock briefly after? Well, we can at least avoid a xas_reload(). It's a bit unclear which exact change introduced that issue. Likely, ever since we made PG_locked obey to the PF_NO_TAIL policy it could have been triggered in some way. Link: https://lkml.kernel.org/r/20241129125303.4033164-1-david@redhat.com Fixes: 48c935ad88f5 ("page-flags: define PG_locked behavior on compound pages") Signed-off-by: David Hildenbrand Reported-by: syzbot+9f9a7f73fb079b2387a6@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/674184c9.050a0220.1cc393.0001.GAE@google.com/ Acked-by: Kirill A. Shutemov Cc: "Matthew Wilcox (Oracle)" Cc: Hillf Danton Signed-off-by: Andrew Morton --- mm/filemap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 7c76a123ba18b..f61cf51c22389 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3501,10 +3501,10 @@ static struct folio *next_uptodate_folio(struct xa_state *xas, continue; if (xa_is_value(folio)) continue; - if (folio_test_locked(folio)) - continue; if (!folio_try_get(folio)) continue; + if (folio_test_locked(folio)) + goto skip; /* Has the page moved or been split? */ if (unlikely(folio != xas_reload(xas))) goto skip; From 5c3793604f91123bf49bc792ce697a0bef4c173c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 17 Nov 2024 03:38:13 -0800 Subject: [PATCH 343/366] lib: stackinit: hide never-taken branch from compiler The never-taken branch leads to an invalid bounds condition, which is by design. To avoid the unwanted warning from the compiler, hide the variable from the optimizer. ../lib/stackinit_kunit.c: In function 'do_nothing_u16_zero': ../lib/stackinit_kunit.c:51:49: error: array subscript 1 is outside array bounds of 'u16[0]' {aka 'short unsigned int[]'} [-Werror=array-bounds=] 51 | #define DO_NOTHING_RETURN_SCALAR(ptr) *(ptr) | ^~~~~~ ../lib/stackinit_kunit.c:219:24: note: in expansion of macro 'DO_NOTHING_RETURN_SCALAR' 219 | return DO_NOTHING_RETURN_ ## which(ptr + 1); \ | ^~~~~~~~~~~~~~~~~~ Link: https://lkml.kernel.org/r/20241117113813.work.735-kees@kernel.org Signed-off-by: Kees Cook Cc: Signed-off-by: Andrew Morton --- lib/stackinit_kunit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index c14c6f8e6308d..c40818ec9c180 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -212,6 +212,7 @@ static noinline void test_ ## name (struct kunit *test) \ static noinline DO_NOTHING_TYPE_ ## which(var_type) \ do_nothing_ ## name(var_type *ptr) \ { \ + OPTIMIZER_HIDE_VAR(ptr); \ /* Will always be true, but compiler doesn't know. */ \ if ((unsigned long)ptr > 0x2) \ return DO_NOTHING_RETURN_ ## which(ptr); \ From 6535b8669c1a74078098517174e53fc907ce9d56 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 15 Nov 2024 10:20:23 -0800 Subject: [PATCH 344/366] mm/damon: fix order of arguments in damos_before_apply tracepoint Since the order of the scheme_idx and target_idx arguments in TP_ARGS is reversed, they are stored in the trace record in reverse. Link: https://lkml.kernel.org/r/20241115182023.43118-1-sj@kernel.org Link: https://patch.msgid.link/20241112154828.40307-1-akinobu.mita@gmail.com Fixes: c603c630b509 ("mm/damon/core: add a tracepoint for damos apply target regions") Signed-off-by: Akinobu Mita Signed-off-by: SeongJae Park Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Signed-off-by: Andrew Morton --- include/trace/events/damon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index 23200aabccacb..da4bd9fd11625 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -15,7 +15,7 @@ TRACE_EVENT_CONDITION(damos_before_apply, unsigned int target_idx, struct damon_region *r, unsigned int nr_regions, bool do_trace), - TP_ARGS(context_idx, target_idx, scheme_idx, r, nr_regions, do_trace), + TP_ARGS(context_idx, scheme_idx, target_idx, r, nr_regions, do_trace), TP_CONDITION(do_trace), From 5f1b64e9a9b7ee9cfd32c6b2fab796e29bfed075 Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Wed, 13 Nov 2024 18:21:46 +0800 Subject: [PATCH 345/366] sched/numa: fix memory leak due to the overwritten vma->numab_state [Problem Description] When running the hackbench program of LTP, the following memory leak is reported by kmemleak. # /opt/ltp/testcases/bin/hackbench 20 thread 1000 Running with 20*40 (== 800) tasks. # dmesg | grep kmemleak ... kmemleak: 480 new suspected memory leaks (see /sys/kernel/debug/kmemleak) kmemleak: 665 new suspected memory leaks (see /sys/kernel/debug/kmemleak) # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff888cd8ca2c40 (size 64): comm "hackbench", pid 17142, jiffies 4299780315 hex dump (first 32 bytes): ac 74 49 00 01 00 00 00 4c 84 49 00 01 00 00 00 .tI.....L.I..... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc bff18fd4): [] __kmalloc_cache_noprof+0x2f9/0x3f0 [] task_numa_work+0x725/0xa00 [] task_work_run+0x58/0x90 [] syscall_exit_to_user_mode+0x1c8/0x1e0 [] do_syscall_64+0x85/0x150 [] entry_SYSCALL_64_after_hwframe+0x76/0x7e ... This issue can be consistently reproduced on three different servers: * a 448-core server * a 256-core server * a 192-core server [Root Cause] Since multiple threads are created by the hackbench program (along with the command argument 'thread'), a shared vma might be accessed by two or more cores simultaneously. When two or more cores observe that vma->numab_state is NULL at the same time, vma->numab_state will be overwritten. Although current code ensures that only one thread scans the VMAs in a single 'numa_scan_period', there might be a chance for another thread to enter in the next 'numa_scan_period' while we have not gotten till numab_state allocation [1]. Note that the command `/opt/ltp/testcases/bin/hackbench 50 process 1000` cannot the reproduce the issue. It is verified with 200+ test runs. [Solution] Use the cmpxchg atomic operation to ensure that only one thread executes the vma->numab_state assignment. [1] https://lore.kernel.org/lkml/1794be3c-358c-4cdc-a43d-a1f841d91ef7@amd.com/ Link: https://lkml.kernel.org/r/20241113102146.2384-1-ahuang12@lenovo.com Fixes: ef6a22b70f6d ("sched/numa: apply the scan delay to every new vma") Signed-off-by: Adrian Huang Reported-by: Jiwei Sun Reviewed-by: Raghavendra K T Reviewed-by: Vlastimil Babka Cc: Ben Segall Cc: Dietmar Eggemann Cc: Ingo Molnar Cc: Juri Lelli Cc: Mel Gorman Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Valentin Schneider Cc: Vincent Guittot Cc: Signed-off-by: Andrew Morton --- kernel/sched/fair.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fbdca89c677f4..a59ae2e23dafa 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3399,10 +3399,16 @@ static void task_numa_work(struct callback_head *work) /* Initialise new per-VMA NUMAB state. */ if (!vma->numab_state) { - vma->numab_state = kzalloc(sizeof(struct vma_numab_state), - GFP_KERNEL); - if (!vma->numab_state) + struct vma_numab_state *ptr; + + ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + if (!ptr) + continue; + + if (cmpxchg(&vma->numab_state, NULL, ptr)) { + kfree(ptr); continue; + } vma->numab_state->start_scan_seq = mm->numa_scan_seq; From f1ee5483e40881d8ad5a63aa148b753b5c6a839b Mon Sep 17 00:00:00 2001 From: Jakob Hauser Date: Fri, 29 Nov 2024 22:25:07 +0100 Subject: [PATCH 346/366] iio: magnetometer: yas530: use signed integer type for clamp limits In the function yas537_measure() there is a clamp_val() with limits of -BIT(13) and BIT(13) - 1. The input clamp value h[] is of type s32. The BIT() is of type unsigned long integer due to its define in include/vdso/bits.h. The lower limit -BIT(13) is recognized as -8192 but expressed as an unsigned long integer. The size of an unsigned long integer differs between 32-bit and 64-bit architectures. Converting this to type s32 may lead to undesired behavior. Additionally, in the calculation lines h[0], h[1] and h[2] the unsigned long integer divisor BIT(13) causes an unsigned division, shifting the left-hand side of the equation back and forth, possibly ending up in large positive values instead of negative values on 32-bit architectures. To solve those two issues, declare a signed integer with a value of BIT(13). There is another omission in the clamp line: clamp_val() returns a value and it's going nowhere here. Self-assign it to h[i] to make use of the clamp macro. Finally, replace clamp_val() macro by clamp() because after changing the limits from type unsigned long integer to signed integer it's fine that way. Link: https://lkml.kernel.org/r/11609b2243c295d65ab4d47e78c239d61ad6be75.1732914810.git.jahau@rocketmail.com Fixes: 65f79b501030 ("iio: magnetometer: yas530: Add YAS537 variant") Signed-off-by: Jakob Hauser Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202411230458.dhZwh3TT-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202411282222.oF0B4110-lkp@intel.com/ Reviewed-by: David Laight Acked-by: Jonathan Cameron Cc: Lars-Peter Clausen Cc: Linus Walleij Signed-off-by: Andrew Morton --- drivers/iio/magnetometer/yamaha-yas530.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/iio/magnetometer/yamaha-yas530.c b/drivers/iio/magnetometer/yamaha-yas530.c index 65011a8598d33..c55a38650c0d4 100644 --- a/drivers/iio/magnetometer/yamaha-yas530.c +++ b/drivers/iio/magnetometer/yamaha-yas530.c @@ -372,6 +372,7 @@ static int yas537_measure(struct yas5xx *yas5xx, u16 *t, u16 *x, u16 *y1, u16 *y u8 data[8]; u16 xy1y2[3]; s32 h[3], s[3]; + int half_range = BIT(13); int i, ret; mutex_lock(&yas5xx->lock); @@ -406,13 +407,13 @@ static int yas537_measure(struct yas5xx *yas5xx, u16 *t, u16 *x, u16 *y1, u16 *y /* The second version of YAS537 needs to include calibration coefficients */ if (yas5xx->version == YAS537_VERSION_1) { for (i = 0; i < 3; i++) - s[i] = xy1y2[i] - BIT(13); - h[0] = (c->k * (128 * s[0] + c->a2 * s[1] + c->a3 * s[2])) / BIT(13); - h[1] = (c->k * (c->a4 * s[0] + c->a5 * s[1] + c->a6 * s[2])) / BIT(13); - h[2] = (c->k * (c->a7 * s[0] + c->a8 * s[1] + c->a9 * s[2])) / BIT(13); + s[i] = xy1y2[i] - half_range; + h[0] = (c->k * (128 * s[0] + c->a2 * s[1] + c->a3 * s[2])) / half_range; + h[1] = (c->k * (c->a4 * s[0] + c->a5 * s[1] + c->a6 * s[2])) / half_range; + h[2] = (c->k * (c->a7 * s[0] + c->a8 * s[1] + c->a9 * s[2])) / half_range; for (i = 0; i < 3; i++) { - clamp_val(h[i], -BIT(13), BIT(13) - 1); - xy1y2[i] = h[i] + BIT(13); + h[i] = clamp(h[i], -half_range, half_range - 1); + xy1y2[i] = h[i] + half_range; } } From 07fa619f2a40c221ea27747a3323cabc59ab25eb Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 5 Dec 2024 15:05:07 +0000 Subject: [PATCH 347/366] x86/kexec: Restore GDT on return from ::preserve_context kexec The restore_processor_state() function explicitly states that "the asm code that gets us here will have restored a usable GDT". That wasn't true in the case of returning from a ::preserve_context kexec. Make it so. Without this, the kernel was depending on the called function to reload a GDT which is appropriate for the kernel before returning. Test program: #include #include #include #include #include #include #include #include int main (void) { struct kexec_segment segment = {}; unsigned char purgatory[] = { 0x66, 0xba, 0xf8, 0x03, // mov $0x3f8, %dx 0xb0, 0x42, // mov $0x42, %al 0xee, // outb %al, (%dx) 0xc3, // ret }; int ret; segment.buf = &purgatory; segment.bufsz = sizeof(purgatory); segment.mem = (void *)0x400000; segment.memsz = 0x1000; ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT); if (ret) { perror("kexec_load"); exit(1); } ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC); if (ret) { perror("kexec reboot"); exit(1); } printf("Success\n"); return 0; } Signed-off-by: David Woodhouse Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20241205153343.3275139-2-dwmw2@infradead.org --- arch/x86/kernel/relocate_kernel_64.S | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index e9e88c342f752..1236f25fc8d12 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -242,6 +242,13 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) movq CR0(%r8), %r8 movq %rax, %cr3 movq %r8, %cr0 + +#ifdef CONFIG_KEXEC_JUMP + /* Saved in save_processor_state. */ + movq $saved_context, %rax + lgdt saved_context_gdt_desc(%rax) +#endif + movq %rbp, %rax popf From b3fce429a1e030b50c1c91351d69b8667eef627b Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Wed, 27 Nov 2024 16:22:46 -0800 Subject: [PATCH 348/366] cacheinfo: Allocate memory during CPU hotplug if not done from the primary CPU Commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU") adds functionality that architectures can use to optionally allocate and build cacheinfo early during boot. Commit 6539cffa9495 ("cacheinfo: Add arch specific early level initializer") lets secondary CPUs correct (and reallocate memory) cacheinfo data if needed. If the early build functionality is not used and cacheinfo does not need correction, memory for cacheinfo is never allocated. x86 does not use the early build functionality. Consequently, during the cacheinfo CPU hotplug callback, last_level_cache_is_valid() attempts to dereference a NULL pointer: BUG: kernel NULL pointer dereference, address: 0000000000000100 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not present page PGD 0 P4D 0 Oops: 0000 [#1] PREEPMT SMP NOPTI CPU: 0 PID 19 Comm: cpuhp/0 Not tainted 6.4.0-rc2 #1 RIP: 0010: last_level_cache_is_valid+0x95/0xe0a Allocate memory for cacheinfo during the cacheinfo CPU hotplug callback if not done earlier. Moreover, before determining the validity of the last-level cache info, ensure that it has been allocated. Simply checking for non-zero cache_leaves() is not sufficient, as some architectures (e.g., Intel processors) have non-zero cache_leaves() before allocation. Dereferencing NULL cacheinfo can occur in update_per_cpu_data_slice_size(). This function iterates over all online CPUs. However, a CPU may have come online recently, but its cacheinfo may not have been allocated yet. While here, remove an unnecessary indentation in allocate_cache_info(). [ bp: Massage. ] Fixes: 6539cffa9495 ("cacheinfo: Add arch specific early level initializer") Signed-off-by: Ricardo Neri Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Radu Rendec Reviewed-by: Nikolay Borisov Reviewed-by: Andreas Herrmann Reviewed-by: Sudeep Holla Cc: stable@vger.kernel.org # 6.3+ Link: https://lore.kernel.org/r/20241128002247.26726-2-ricardo.neri-calderon@linux.intel.com --- drivers/base/cacheinfo.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 609935ad50915..cf0d455209d7f 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -58,7 +58,7 @@ bool last_level_cache_is_valid(unsigned int cpu) { struct cacheinfo *llc; - if (!cache_leaves(cpu)) + if (!cache_leaves(cpu) || !per_cpu_cacheinfo(cpu)) return false; llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1); @@ -458,11 +458,9 @@ int __weak populate_cache_leaves(unsigned int cpu) return -ENOENT; } -static inline -int allocate_cache_info(int cpu) +static inline int allocate_cache_info(int cpu) { - per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), - sizeof(struct cacheinfo), GFP_ATOMIC); + per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_ATOMIC); if (!per_cpu_cacheinfo(cpu)) { cache_leaves(cpu) = 0; return -ENOMEM; @@ -534,7 +532,11 @@ static inline int init_level_allocate_ci(unsigned int cpu) */ ci_cacheinfo(cpu)->early_ci_levels = false; - if (cache_leaves(cpu) <= early_leaves) + /* + * Some architectures (e.g., x86) do not use early initialization. + * Allocate memory now in such case. + */ + if (cache_leaves(cpu) <= early_leaves && per_cpu_cacheinfo(cpu)) return 0; kfree(per_cpu_cacheinfo(cpu)); From 9677be09e5e4fbe48aeccb06ae3063c5eba331c3 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Wed, 27 Nov 2024 16:22:47 -0800 Subject: [PATCH 349/366] x86/cacheinfo: Delete global num_cache_leaves Linux remembers cpu_cachinfo::num_leaves per CPU, but x86 initializes all CPUs from the same global "num_cache_leaves". This is erroneous on systems such as Meteor Lake, where each CPU has a distinct num_leaves value. Delete the global "num_cache_leaves" and initialize num_leaves on each CPU. init_cache_level() no longer needs to set num_leaves. Also, it never had to set num_levels as it is unnecessary in x86. Keep checking for zero cache leaves. Such condition indicates a bug. [ bp: Cleanup. ] Signed-off-by: Ricardo Neri Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org # 6.3+ Link: https://lore.kernel.org/r/20241128002247.26726-3-ricardo.neri-calderon@linux.intel.com --- arch/x86/kernel/cpu/cacheinfo.c | 43 ++++++++++++++++----------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 392d09c936d60..e6fa03ed9172c 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -178,8 +178,6 @@ struct _cpuid4_info_regs { struct amd_northbridge *nb; }; -static unsigned short num_cache_leaves; - /* AMD doesn't have CPUID4. Emulate it here to report the same information to the user. This makes some assumptions about the machine: L2 not shared, no SMT etc. that is currently true on AMD CPUs. @@ -717,20 +715,23 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c) void init_amd_cacheinfo(struct cpuinfo_x86 *c) { + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - num_cache_leaves = find_num_cache_leaves(c); + ci->num_leaves = find_num_cache_leaves(c); } else if (c->extended_cpuid_level >= 0x80000006) { if (cpuid_edx(0x80000006) & 0xf000) - num_cache_leaves = 4; + ci->num_leaves = 4; else - num_cache_leaves = 3; + ci->num_leaves = 3; } } void init_hygon_cacheinfo(struct cpuinfo_x86 *c) { - num_cache_leaves = find_num_cache_leaves(c); + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); + + ci->num_leaves = find_num_cache_leaves(c); } void init_intel_cacheinfo(struct cpuinfo_x86 *c) @@ -740,21 +741,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); if (c->cpuid_level > 3) { - static int is_initialized; - - if (is_initialized == 0) { - /* Init num_cache_leaves from boot CPU */ - num_cache_leaves = find_num_cache_leaves(c); - is_initialized++; - } + /* + * There should be at least one leaf. A non-zero value means + * that the number of leaves has been initialized. + */ + if (!ci->num_leaves) + ci->num_leaves = find_num_cache_leaves(c); /* * Whenever possible use cpuid(4), deterministic cache * parameters cpuid leaf to find the cache details */ - for (i = 0; i < num_cache_leaves; i++) { + for (i = 0; i < ci->num_leaves; i++) { struct _cpuid4_info_regs this_leaf = {}; int retval; @@ -790,14 +791,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for * trace cache */ - if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { + if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) { /* supports eax=2 call */ int j, n; unsigned int regs[4]; unsigned char *dp = (unsigned char *)regs; int only_trace = 0; - if (num_cache_leaves != 0 && c->x86 == 15) + if (ci->num_leaves && c->x86 == 15) only_trace = 1; /* Number of times to iterate */ @@ -991,14 +992,12 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, int init_cache_level(unsigned int cpu) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); - if (!num_cache_leaves) + /* There should be at least one leaf. */ + if (!ci->num_leaves) return -ENOENT; - if (!this_cpu_ci) - return -EINVAL; - this_cpu_ci->num_levels = 3; - this_cpu_ci->num_leaves = num_cache_leaves; + return 0; } From ddca5023091588eb303e3c0097d95c325992d05f Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 4 Dec 2024 17:46:00 -0600 Subject: [PATCH 350/366] smb3.1.1: fix posix mounts to older servers Some servers which implement the SMB3.1.1 POSIX extensions did not set the file type in the mode in the infolevel 100 response. With the recent changes for checking the file type via the mode field, this can cause the root directory to be reported incorrectly and mounts (e.g. to ksmbd) to fail. Fixes: 6a832bc8bbb2 ("fs/smb/client: Implement new SMB3 POSIX type") Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Cc: Ralph Boehme Signed-off-by: Steve French --- fs/smb/client/cifsproto.h | 2 +- fs/smb/client/inode.c | 11 ++++++++--- fs/smb/client/readdir.c | 3 ++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 90e3297ea8479..754417cb3294e 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -669,7 +669,7 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode, int cifs_sfu_make_node(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev); -umode_t wire_mode_to_posix(u32 wire); +umode_t wire_mode_to_posix(u32 wire, bool is_dir); #ifdef CONFIG_CIFS_DFS_UPCALL static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index ef913d65c67f2..668098d3b108e 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -814,13 +814,17 @@ static u32 wire_filetype_to_posix(u32 wire_type) return posix_filetypes[wire_type]; } -umode_t wire_mode_to_posix(u32 wire) +umode_t wire_mode_to_posix(u32 wire, bool is_dir) { u32 wire_type; u32 mode; wire_type = (wire & POSIX_FILETYPE_MASK) >> POSIX_FILETYPE_SHIFT; - mode = (wire_perms_to_posix(wire) | wire_filetype_to_posix(wire_type)); + /* older servers do not set POSIX file type in the mode field in the response */ + if ((wire_type == 0) && is_dir) + mode = wire_perms_to_posix(wire) | S_IFDIR; + else + mode = (wire_perms_to_posix(wire) | wire_filetype_to_posix(wire_type)); return (umode_t)mode; } @@ -860,7 +864,8 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, fattr->cf_bytes = le64_to_cpu(info->AllocationSize); fattr->cf_createtime = le64_to_cpu(info->CreationTime); fattr->cf_nlink = le32_to_cpu(info->HardLinks); - fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode)); + fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode), + fattr->cf_cifsattrs & ATTR_DIRECTORY); if (cifs_open_data_reparse(data) && cifs_reparse_point_to_fattr(cifs_sb, fattr, data)) diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index b906bf78af1ec..273358d20a46c 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -261,7 +261,8 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info, fattr->cf_cifstag = le32_to_cpu(info->ReparseTag); /* The Mode field in the response can now include the file type as well */ - fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode)); + fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode), + fattr->cf_cifsattrs & ATTR_DIRECTORY); fattr->cf_dtype = S_DT(le32_to_cpu(info->Mode)); switch (fattr->cf_mode & S_IFMT) { From c32b624fa4f7ca5a2ff217a0b1b2f1352bb4ec11 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 6 Dec 2024 11:49:07 -0300 Subject: [PATCH 351/366] smb: client: fix potential race in cifs_put_tcon() dfs_cache_refresh() delayed worker could race with cifs_put_tcon(), so make sure to call list_replace_init() on @tcon->dfs_ses_list after kworker is cancelled or finished. Fixes: 4f42a8b54b5c ("smb: client: fix DFS interlink failover") Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/connect.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 56b3a9eb9b055..2372538a12118 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2532,9 +2532,6 @@ cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace) list_del_init(&tcon->tcon_list); tcon->status = TID_EXITING; -#ifdef CONFIG_CIFS_DFS_UPCALL - list_replace_init(&tcon->dfs_ses_list, &ses_list); -#endif spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); @@ -2542,6 +2539,7 @@ cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace) cancel_delayed_work_sync(&tcon->query_interfaces); #ifdef CONFIG_CIFS_DFS_UPCALL cancel_delayed_work_sync(&tcon->dfs_cache_work); + list_replace_init(&tcon->dfs_ses_list, &ses_list); #endif if (tcon->use_witness) { From 4bf485a7db5d82ddd0f3ad2b299893199090375e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 6 Dec 2024 19:16:06 +0800 Subject: [PATCH 352/366] blk-mq: register cpuhp callback after hctx is added to xarray table We need to retrieve 'hctx' from xarray table in the cpuhp callback, so the callback should be registered after this 'hctx' is added to xarray table. Cc: Reinette Chatre Cc: Fenghua Yu Cc: Peter Newman Cc: Babu Moger Cc: Luck Tony Signed-off-by: Ming Lei Tested-by: Tony Luck Link: https://lore.kernel.org/r/20241206111611.978870-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 424239c075e28..a404465036de3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3824,16 +3824,11 @@ static int blk_mq_init_hctx(struct request_queue *q, { hctx->queue_num = hctx_idx; - if (!(hctx->flags & BLK_MQ_F_STACKING)) - cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE, - &hctx->cpuhp_online); - cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); - hctx->tags = set->tags[hctx_idx]; if (set->ops->init_hctx && set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) - goto unregister_cpu_notifier; + goto fail; if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, hctx->numa_node)) @@ -3842,6 +3837,11 @@ static int blk_mq_init_hctx(struct request_queue *q, if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL)) goto exit_flush_rq; + if (!(hctx->flags & BLK_MQ_F_STACKING)) + cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE, + &hctx->cpuhp_online); + cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); + return 0; exit_flush_rq: @@ -3850,8 +3850,7 @@ static int blk_mq_init_hctx(struct request_queue *q, exit_hctx: if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); - unregister_cpu_notifier: - blk_mq_remove_cpuhp(hctx); + fail: return -1; } From 22465bbac53c821319089016f268a2437de9b00a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 6 Dec 2024 19:16:07 +0800 Subject: [PATCH 353/366] blk-mq: move cpuhp callback registering out of q->sysfs_lock Registering and unregistering cpuhp callback requires global cpu hotplug lock, which is used everywhere. Meantime q->sysfs_lock is used in block layer almost everywhere. It is easy to trigger lockdep warning[1] by connecting the two locks. Fix the warning by moving blk-mq's cpuhp callback registering out of q->sysfs_lock. Add one dedicated global lock for covering registering & unregistering hctx's cpuhp, and it is safe to do so because hctx is guaranteed to be live if our request_queue is live. [1] https://lore.kernel.org/lkml/Z04pz3AlvI4o0Mr8@agluck-desk3/ Cc: Reinette Chatre Cc: Fenghua Yu Cc: Peter Newman Cc: Babu Moger Reported-by: Luck Tony Signed-off-by: Ming Lei Tested-by: Tony Luck Link: https://lore.kernel.org/r/20241206111611.978870-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 103 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 92 insertions(+), 11 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index a404465036de3..aa340b097b6e4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -43,6 +43,7 @@ static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd); +static DEFINE_MUTEX(blk_mq_cpuhp_lock); static void blk_mq_insert_request(struct request *rq, blk_insert_t flags); static void blk_mq_request_bypass_insert(struct request *rq, @@ -3739,13 +3740,91 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node) return 0; } -static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) +static void __blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) { - if (!(hctx->flags & BLK_MQ_F_STACKING)) + lockdep_assert_held(&blk_mq_cpuhp_lock); + + if (!(hctx->flags & BLK_MQ_F_STACKING) && + !hlist_unhashed(&hctx->cpuhp_online)) { cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE, &hctx->cpuhp_online); - cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD, - &hctx->cpuhp_dead); + INIT_HLIST_NODE(&hctx->cpuhp_online); + } + + if (!hlist_unhashed(&hctx->cpuhp_dead)) { + cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD, + &hctx->cpuhp_dead); + INIT_HLIST_NODE(&hctx->cpuhp_dead); + } +} + +static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) +{ + mutex_lock(&blk_mq_cpuhp_lock); + __blk_mq_remove_cpuhp(hctx); + mutex_unlock(&blk_mq_cpuhp_lock); +} + +static void __blk_mq_add_cpuhp(struct blk_mq_hw_ctx *hctx) +{ + lockdep_assert_held(&blk_mq_cpuhp_lock); + + if (!(hctx->flags & BLK_MQ_F_STACKING) && + hlist_unhashed(&hctx->cpuhp_online)) + cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE, + &hctx->cpuhp_online); + + if (hlist_unhashed(&hctx->cpuhp_dead)) + cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, + &hctx->cpuhp_dead); +} + +static void __blk_mq_remove_cpuhp_list(struct list_head *head) +{ + struct blk_mq_hw_ctx *hctx; + + lockdep_assert_held(&blk_mq_cpuhp_lock); + + list_for_each_entry(hctx, head, hctx_list) + __blk_mq_remove_cpuhp(hctx); +} + +/* + * Unregister cpuhp callbacks from exited hw queues + * + * Safe to call if this `request_queue` is live + */ +static void blk_mq_remove_hw_queues_cpuhp(struct request_queue *q) +{ + LIST_HEAD(hctx_list); + + spin_lock(&q->unused_hctx_lock); + list_splice_init(&q->unused_hctx_list, &hctx_list); + spin_unlock(&q->unused_hctx_lock); + + mutex_lock(&blk_mq_cpuhp_lock); + __blk_mq_remove_cpuhp_list(&hctx_list); + mutex_unlock(&blk_mq_cpuhp_lock); + + spin_lock(&q->unused_hctx_lock); + list_splice(&hctx_list, &q->unused_hctx_list); + spin_unlock(&q->unused_hctx_lock); +} + +/* + * Register cpuhp callbacks from all hw queues + * + * Safe to call if this `request_queue` is live + */ +static void blk_mq_add_hw_queues_cpuhp(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned long i; + + mutex_lock(&blk_mq_cpuhp_lock); + queue_for_each_hw_ctx(q, hctx, i) + __blk_mq_add_cpuhp(hctx); + mutex_unlock(&blk_mq_cpuhp_lock); } /* @@ -3796,8 +3875,6 @@ static void blk_mq_exit_hctx(struct request_queue *q, if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); - blk_mq_remove_cpuhp(hctx); - xa_erase(&q->hctx_table, hctx_idx); spin_lock(&q->unused_hctx_lock); @@ -3814,6 +3891,7 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, queue_for_each_hw_ctx(q, hctx, i) { if (i == nr_queue) break; + blk_mq_remove_cpuhp(hctx); blk_mq_exit_hctx(q, set, hctx, i); } } @@ -3837,11 +3915,6 @@ static int blk_mq_init_hctx(struct request_queue *q, if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL)) goto exit_flush_rq; - if (!(hctx->flags & BLK_MQ_F_STACKING)) - cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE, - &hctx->cpuhp_online); - cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); - return 0; exit_flush_rq: @@ -3876,6 +3949,8 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); + INIT_HLIST_NODE(&hctx->cpuhp_dead); + INIT_HLIST_NODE(&hctx->cpuhp_online); hctx->queue = q; hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; @@ -4414,6 +4489,12 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, xa_for_each_start(&q->hctx_table, j, hctx, j) blk_mq_exit_hctx(q, set, hctx, j); mutex_unlock(&q->sysfs_lock); + + /* unregister cpuhp callbacks for exited hctxs */ + blk_mq_remove_hw_queues_cpuhp(q); + + /* register cpuhp for new initialized hctxs */ + blk_mq_add_hw_queues_cpuhp(q); } int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, From 156c977c539e87e173f505b23989d7b0ec0bc7d8 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 6 Dec 2024 19:06:14 +0800 Subject: [PATCH 354/366] bpf: Remove unnecessary check when updating LPM trie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When "node->prefixlen == matchlen" is true, it means that the node is fully matched. If "node->prefixlen == key->prefixlen" is false, it means the prefix length of key is greater than the prefix length of node, otherwise, matchlen will not be equal with node->prefixlen. However, it also implies that the prefix length of node must be less than max_prefixlen. Therefore, "node->prefixlen == trie->max_prefixlen" will always be false when the check of "node->prefixlen == key->prefixlen" returns false. Remove this unnecessary comparison. Reviewed-by: Toke Høiland-Jørgensen Acked-by: Daniel Borkmann Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241206110622.1161752-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 9b60eda0f727b..73fd593d37451 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -364,8 +364,7 @@ static long trie_update_elem(struct bpf_map *map, matchlen = longest_prefix_match(trie, node, key); if (node->prefixlen != matchlen || - node->prefixlen == key->prefixlen || - node->prefixlen == trie->max_prefixlen) + node->prefixlen == key->prefixlen) break; next_bit = extract_bit(key->data, node->prefixlen); From 3d5611b4d7efbefb85a74fcdbc35c603847cc022 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 6 Dec 2024 19:06:15 +0800 Subject: [PATCH 355/366] bpf: Remove unnecessary kfree(im_node) in lpm_trie_update_elem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to call kfree(im_node) when updating element fails, because im_node must be NULL. Remove the unnecessary kfree() for im_node. Reviewed-by: Toke Høiland-Jørgensen Acked-by: Daniel Borkmann Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241206110622.1161752-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 73fd593d37451..b5e281a55760b 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -315,7 +315,7 @@ static long trie_update_elem(struct bpf_map *map, void *_key, void *value, u64 flags) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); - struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL; + struct lpm_trie_node *node, *im_node, *new_node = NULL; struct lpm_trie_node *free_node = NULL; struct lpm_trie_node __rcu **slot; struct bpf_lpm_trie_key_u8 *key = _key; @@ -431,9 +431,7 @@ static long trie_update_elem(struct bpf_map *map, if (ret) { if (new_node) trie->n_entries--; - kfree(new_node); - kfree(im_node); } spin_unlock_irqrestore(&trie->lock, irq_flags); From eae6a075e9537dd69891cf77ca5a88fa8a28b4a1 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 6 Dec 2024 19:06:16 +0800 Subject: [PATCH 356/366] bpf: Handle BPF_EXIST and BPF_NOEXIST for LPM trie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the currently missing handling for the BPF_EXIST and BPF_NOEXIST flags. These flags can be specified by users and are relevant since LPM trie supports exact matches during update. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reviewed-by: Toke Høiland-Jørgensen Acked-by: Daniel Borkmann Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241206110622.1161752-4-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index b5e281a55760b..be5bf03895328 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -375,6 +375,10 @@ static long trie_update_elem(struct bpf_map *map, * simply assign the @new_node to that slot and be done. */ if (!node) { + if (flags == BPF_EXIST) { + ret = -ENOENT; + goto out; + } rcu_assign_pointer(*slot, new_node); goto out; } @@ -383,18 +387,31 @@ static long trie_update_elem(struct bpf_map *map, * which already has the correct data array set. */ if (node->prefixlen == matchlen) { + if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) { + if (flags == BPF_NOEXIST) { + ret = -EEXIST; + goto out; + } + trie->n_entries--; + } else if (flags == BPF_EXIST) { + ret = -ENOENT; + goto out; + } + new_node->child[0] = node->child[0]; new_node->child[1] = node->child[1]; - if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) - trie->n_entries--; - rcu_assign_pointer(*slot, new_node); free_node = node; goto out; } + if (flags == BPF_EXIST) { + ret = -ENOENT; + goto out; + } + /* If the new node matches the prefix completely, it must be inserted * as an ancestor. Simply insert it between @node and *@slot. */ From 532d6b36b2bfac5514426a97a4df8d103d700d43 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 6 Dec 2024 19:06:17 +0800 Subject: [PATCH 357/366] bpf: Handle in-place update for full LPM trie correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a LPM trie is full, in-place updates of existing elements incorrectly return -ENOSPC. Fix this by deferring the check of trie->n_entries. For new insertions, n_entries must not exceed max_entries. However, in-place updates are allowed even when the trie is full. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241206110622.1161752-5-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 44 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index be5bf03895328..df6cc0a1c9bf7 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -310,6 +310,16 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, return node; } +static int trie_check_add_elem(struct lpm_trie *trie, u64 flags) +{ + if (flags == BPF_EXIST) + return -ENOENT; + if (trie->n_entries == trie->map.max_entries) + return -ENOSPC; + trie->n_entries++; + return 0; +} + /* Called from syscall or from eBPF program */ static long trie_update_elem(struct bpf_map *map, void *_key, void *value, u64 flags) @@ -333,20 +343,12 @@ static long trie_update_elem(struct bpf_map *map, spin_lock_irqsave(&trie->lock, irq_flags); /* Allocate and fill a new node */ - - if (trie->n_entries == trie->map.max_entries) { - ret = -ENOSPC; - goto out; - } - new_node = lpm_trie_node_alloc(trie, value); if (!new_node) { ret = -ENOMEM; goto out; } - trie->n_entries++; - new_node->prefixlen = key->prefixlen; RCU_INIT_POINTER(new_node->child[0], NULL); RCU_INIT_POINTER(new_node->child[1], NULL); @@ -375,10 +377,10 @@ static long trie_update_elem(struct bpf_map *map, * simply assign the @new_node to that slot and be done. */ if (!node) { - if (flags == BPF_EXIST) { - ret = -ENOENT; + ret = trie_check_add_elem(trie, flags); + if (ret) goto out; - } + rcu_assign_pointer(*slot, new_node); goto out; } @@ -392,10 +394,10 @@ static long trie_update_elem(struct bpf_map *map, ret = -EEXIST; goto out; } - trie->n_entries--; - } else if (flags == BPF_EXIST) { - ret = -ENOENT; - goto out; + } else { + ret = trie_check_add_elem(trie, flags); + if (ret) + goto out; } new_node->child[0] = node->child[0]; @@ -407,10 +409,9 @@ static long trie_update_elem(struct bpf_map *map, goto out; } - if (flags == BPF_EXIST) { - ret = -ENOENT; + ret = trie_check_add_elem(trie, flags); + if (ret) goto out; - } /* If the new node matches the prefix completely, it must be inserted * as an ancestor. Simply insert it between @node and *@slot. @@ -424,6 +425,7 @@ static long trie_update_elem(struct bpf_map *map, im_node = lpm_trie_node_alloc(trie, NULL); if (!im_node) { + trie->n_entries--; ret = -ENOMEM; goto out; } @@ -445,12 +447,8 @@ static long trie_update_elem(struct bpf_map *map, rcu_assign_pointer(*slot, im_node); out: - if (ret) { - if (new_node) - trie->n_entries--; + if (ret) kfree(new_node); - } - spin_unlock_irqrestore(&trie->lock, irq_flags); kfree_rcu(free_node, rcu); From 27abc7b3fa2e09bbe41e2924d328121546865eda Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 6 Dec 2024 19:06:18 +0800 Subject: [PATCH 358/366] bpf: Fix exact match conditions in trie_get_next_key() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit trie_get_next_key() uses node->prefixlen == key->prefixlen to identify an exact match, However, it is incorrect because when the target key doesn't fully match the found node (e.g., node->prefixlen != matchlen), these two nodes may also have the same prefixlen. It will return expected result when the passed key exist in the trie. However when a recently-deleted key or nonexistent key is passed to trie_get_next_key(), it may skip keys and return incorrect result. Fix it by using node->prefixlen == matchlen to identify exact matches. When the condition is true after the search, it also implies node->prefixlen equals key->prefixlen, otherwise, the search would return NULL instead. Fixes: b471f2f1de8b ("bpf: implement MAP_GET_NEXT_KEY command for LPM_TRIE map") Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241206110622.1161752-6-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index df6cc0a1c9bf7..9ba6ae1452397 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -645,7 +645,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) struct lpm_trie_node **node_stack = NULL; int err = 0, stack_ptr = -1; unsigned int next_bit; - size_t matchlen; + size_t matchlen = 0; /* The get_next_key follows postorder. For the 4 node example in * the top of this file, the trie_get_next_key() returns the following @@ -684,7 +684,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) next_bit = extract_bit(key->data, node->prefixlen); node = rcu_dereference(node->child[next_bit]); } - if (!node || node->prefixlen != key->prefixlen || + if (!node || node->prefixlen != matchlen || (node->flags & LPM_TREE_NODE_FLAG_IM)) goto find_leftmost; From 3d8dc43eb2a3d179809f5fc27c88c93a57ea123d Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 6 Dec 2024 19:06:19 +0800 Subject: [PATCH 359/366] bpf: Switch to bpf mem allocator for LPM trie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple syzbot warnings have been reported. These warnings are mainly about the lock order between trie->lock and kmalloc()'s internal lock. See report [1] as an example: ====================================================== WARNING: possible circular locking dependency detected 6.10.0-rc7-syzkaller-00003-g4376e966ecb7 #0 Not tainted ------------------------------------------------------ syz.3.2069/15008 is trying to acquire lock: ffff88801544e6d8 (&n->list_lock){-.-.}-{2:2}, at: get_partial_node ... but task is already holding lock: ffff88802dcc89f8 (&trie->lock){-.-.}-{2:2}, at: trie_update_elem ... which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&trie->lock){-.-.}-{2:2}: __raw_spin_lock_irqsave _raw_spin_lock_irqsave+0x3a/0x60 trie_delete_elem+0xb0/0x820 ___bpf_prog_run+0x3e51/0xabd0 __bpf_prog_run32+0xc1/0x100 bpf_dispatcher_nop_func ...... bpf_trace_run2+0x231/0x590 __bpf_trace_contention_end+0xca/0x110 trace_contention_end.constprop.0+0xea/0x170 __pv_queued_spin_lock_slowpath+0x28e/0xcc0 pv_queued_spin_lock_slowpath queued_spin_lock_slowpath queued_spin_lock do_raw_spin_lock+0x210/0x2c0 __raw_spin_lock_irqsave _raw_spin_lock_irqsave+0x42/0x60 __put_partials+0xc3/0x170 qlink_free qlist_free_all+0x4e/0x140 kasan_quarantine_reduce+0x192/0x1e0 __kasan_slab_alloc+0x69/0x90 kasan_slab_alloc slab_post_alloc_hook slab_alloc_node kmem_cache_alloc_node_noprof+0x153/0x310 __alloc_skb+0x2b1/0x380 ...... -> #0 (&n->list_lock){-.-.}-{2:2}: check_prev_add check_prevs_add validate_chain __lock_acquire+0x2478/0x3b30 lock_acquire lock_acquire+0x1b1/0x560 __raw_spin_lock_irqsave _raw_spin_lock_irqsave+0x3a/0x60 get_partial_node.part.0+0x20/0x350 get_partial_node get_partial ___slab_alloc+0x65b/0x1870 __slab_alloc.constprop.0+0x56/0xb0 __slab_alloc_node slab_alloc_node __do_kmalloc_node __kmalloc_node_noprof+0x35c/0x440 kmalloc_node_noprof bpf_map_kmalloc_node+0x98/0x4a0 lpm_trie_node_alloc trie_update_elem+0x1ef/0xe00 bpf_map_update_value+0x2c1/0x6c0 map_update_elem+0x623/0x910 __sys_bpf+0x90c/0x49a0 ... other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&trie->lock); lock(&n->list_lock); lock(&trie->lock); lock(&n->list_lock); *** DEADLOCK *** [1]: https://syzkaller.appspot.com/bug?extid=9045c0a3d5a7f1b119f7 A bpf program attached to trace_contention_end() triggers after acquiring &n->list_lock. The program invokes trie_delete_elem(), which then acquires trie->lock. However, it is possible that another process is invoking trie_update_elem(). trie_update_elem() will acquire trie->lock first, then invoke kmalloc_node(). kmalloc_node() may invoke get_partial_node() and try to acquire &n->list_lock (not necessarily the same lock object). Therefore, lockdep warns about the circular locking dependency. Invoking kmalloc() before acquiring trie->lock could fix the warning. However, since BPF programs call be invoked from any context (e.g., through kprobe/tracepoint/fentry), there may still be lock ordering problems for internal locks in kmalloc() or trie->lock itself. To eliminate these potential lock ordering problems with kmalloc()'s internal locks, replacing kmalloc()/kfree()/kfree_rcu() with equivalent BPF memory allocator APIs that can be invoked in any context. The lock ordering problems with trie->lock (e.g., reentrance) will be handled separately. Three aspects of this change require explanation: 1. Intermediate and leaf nodes are allocated from the same allocator. Since the value size of LPM trie is usually small, using a single alocator reduces the memory overhead of the BPF memory allocator. 2. Leaf nodes are allocated before disabling IRQs. This handles cases where leaf_size is large (e.g., > 4KB - 8) and updates require intermediate node allocation. If leaf nodes were allocated in IRQ-disabled region, the free objects in BPF memory allocator would not be refilled timely and the intermediate node allocation may fail. 3. Paired migrate_{disable|enable}() calls for node alloc and free. The BPF memory allocator uses per-CPU struct internally, these paired calls are necessary to guarantee correctness. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241206110622.1161752-7-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 71 +++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 9ba6ae1452397..f850360e75ce6 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -15,6 +15,7 @@ #include #include #include +#include /* Intermediate node */ #define LPM_TREE_NODE_FLAG_IM BIT(0) @@ -22,7 +23,6 @@ struct lpm_trie_node; struct lpm_trie_node { - struct rcu_head rcu; struct lpm_trie_node __rcu *child[2]; u32 prefixlen; u32 flags; @@ -32,6 +32,7 @@ struct lpm_trie_node { struct lpm_trie { struct bpf_map map; struct lpm_trie_node __rcu *root; + struct bpf_mem_alloc ma; size_t n_entries; size_t max_prefixlen; size_t data_size; @@ -287,17 +288,18 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) return found->data + trie->data_size; } -static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, - const void *value) +static struct lpm_trie_node *lpm_trie_node_alloc(struct lpm_trie *trie, + const void *value, + bool disable_migration) { struct lpm_trie_node *node; - size_t size = sizeof(struct lpm_trie_node) + trie->data_size; - if (value) - size += trie->map.value_size; + if (disable_migration) + migrate_disable(); + node = bpf_mem_cache_alloc(&trie->ma); + if (disable_migration) + migrate_enable(); - node = bpf_map_kmalloc_node(&trie->map, size, GFP_NOWAIT | __GFP_NOWARN, - trie->map.numa_node); if (!node) return NULL; @@ -325,7 +327,7 @@ static long trie_update_elem(struct bpf_map *map, void *_key, void *value, u64 flags) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); - struct lpm_trie_node *node, *im_node, *new_node = NULL; + struct lpm_trie_node *node, *im_node, *new_node; struct lpm_trie_node *free_node = NULL; struct lpm_trie_node __rcu **slot; struct bpf_lpm_trie_key_u8 *key = _key; @@ -340,14 +342,14 @@ static long trie_update_elem(struct bpf_map *map, if (key->prefixlen > trie->max_prefixlen) return -EINVAL; - spin_lock_irqsave(&trie->lock, irq_flags); + /* Allocate and fill a new node. Need to disable migration before + * invoking bpf_mem_cache_alloc(). + */ + new_node = lpm_trie_node_alloc(trie, value, true); + if (!new_node) + return -ENOMEM; - /* Allocate and fill a new node */ - new_node = lpm_trie_node_alloc(trie, value); - if (!new_node) { - ret = -ENOMEM; - goto out; - } + spin_lock_irqsave(&trie->lock, irq_flags); new_node->prefixlen = key->prefixlen; RCU_INIT_POINTER(new_node->child[0], NULL); @@ -423,7 +425,8 @@ static long trie_update_elem(struct bpf_map *map, goto out; } - im_node = lpm_trie_node_alloc(trie, NULL); + /* migration is disabled within the locked scope */ + im_node = lpm_trie_node_alloc(trie, NULL, false); if (!im_node) { trie->n_entries--; ret = -ENOMEM; @@ -447,10 +450,13 @@ static long trie_update_elem(struct bpf_map *map, rcu_assign_pointer(*slot, im_node); out: - if (ret) - kfree(new_node); spin_unlock_irqrestore(&trie->lock, irq_flags); - kfree_rcu(free_node, rcu); + + migrate_disable(); + if (ret) + bpf_mem_cache_free(&trie->ma, new_node); + bpf_mem_cache_free_rcu(&trie->ma, free_node); + migrate_enable(); return ret; } @@ -548,8 +554,11 @@ static long trie_delete_elem(struct bpf_map *map, void *_key) out: spin_unlock_irqrestore(&trie->lock, irq_flags); - kfree_rcu(free_parent, rcu); - kfree_rcu(free_node, rcu); + + migrate_disable(); + bpf_mem_cache_free_rcu(&trie->ma, free_parent); + bpf_mem_cache_free_rcu(&trie->ma, free_node); + migrate_enable(); return ret; } @@ -571,6 +580,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key) static struct bpf_map *trie_alloc(union bpf_attr *attr) { struct lpm_trie *trie; + size_t leaf_size; + int err; /* check sanity of attributes */ if (attr->max_entries == 0 || @@ -595,7 +606,17 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) spin_lock_init(&trie->lock); + /* Allocate intermediate and leaf nodes from the same allocator */ + leaf_size = sizeof(struct lpm_trie_node) + trie->data_size + + trie->map.value_size; + err = bpf_mem_alloc_init(&trie->ma, leaf_size, false); + if (err) + goto free_out; return &trie->map; + +free_out: + bpf_map_area_free(trie); + return ERR_PTR(err); } static void trie_free(struct bpf_map *map) @@ -627,13 +648,17 @@ static void trie_free(struct bpf_map *map) continue; } - kfree(node); + /* No bpf program may access the map, so freeing the + * node without waiting for the extra RCU GP. + */ + bpf_mem_cache_raw_free(node); RCU_INIT_POINTER(*slot, NULL); break; } } out: + bpf_mem_alloc_destroy(&trie->ma); bpf_map_area_free(trie); } From 6a5c63d43c0216d64915baa0e0eacf2beb66b271 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 6 Dec 2024 19:06:20 +0800 Subject: [PATCH 360/366] bpf: Use raw_spinlock_t for LPM trie After switching from kmalloc() to the bpf memory allocator, there will be no blocking operation during the update of LPM trie. Therefore, change trie->lock from spinlock_t to raw_spinlock_t to make LPM trie usable in atomic context, even on RT kernels. The max value of prefixlen is 2048. Therefore, update or deletion operations will find the target after at most 2048 comparisons. Constructing a test case which updates an element after 2048 comparisons under a 8 CPU VM, and the average time and the maximal time for such update operation is about 210us and 900us. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241206110622.1161752-8-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index f850360e75ce6..f8bc1e0961823 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -36,7 +36,7 @@ struct lpm_trie { size_t n_entries; size_t max_prefixlen; size_t data_size; - spinlock_t lock; + raw_spinlock_t lock; }; /* This trie implements a longest prefix match algorithm that can be used to @@ -349,7 +349,7 @@ static long trie_update_elem(struct bpf_map *map, if (!new_node) return -ENOMEM; - spin_lock_irqsave(&trie->lock, irq_flags); + raw_spin_lock_irqsave(&trie->lock, irq_flags); new_node->prefixlen = key->prefixlen; RCU_INIT_POINTER(new_node->child[0], NULL); @@ -450,7 +450,7 @@ static long trie_update_elem(struct bpf_map *map, rcu_assign_pointer(*slot, im_node); out: - spin_unlock_irqrestore(&trie->lock, irq_flags); + raw_spin_unlock_irqrestore(&trie->lock, irq_flags); migrate_disable(); if (ret) @@ -477,7 +477,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key) if (key->prefixlen > trie->max_prefixlen) return -EINVAL; - spin_lock_irqsave(&trie->lock, irq_flags); + raw_spin_lock_irqsave(&trie->lock, irq_flags); /* Walk the tree looking for an exact key/length match and keeping * track of the path we traverse. We will need to know the node @@ -553,7 +553,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key) free_node = node; out: - spin_unlock_irqrestore(&trie->lock, irq_flags); + raw_spin_unlock_irqrestore(&trie->lock, irq_flags); migrate_disable(); bpf_mem_cache_free_rcu(&trie->ma, free_parent); @@ -604,7 +604,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) offsetof(struct bpf_lpm_trie_key_u8, data); trie->max_prefixlen = trie->data_size * 8; - spin_lock_init(&trie->lock); + raw_spin_lock_init(&trie->lock); /* Allocate intermediate and leaf nodes from the same allocator */ leaf_size = sizeof(struct lpm_trie_node) + trie->data_size + From 3e18f5f1e5a152a51dbb6c234e2a0421aec11e31 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 6 Dec 2024 19:06:21 +0800 Subject: [PATCH 361/366] selftests/bpf: Move test_lpm_map.c to map_tests Move test_lpm_map.c to map_tests/ to include LPM trie test cases in regular test_maps run. Most code remains unchanged, including the use of assert(). Only reduce n_lookups from 64K to 512, which decreases test_lpm_map runtime from 37s to 0.7s. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241206110622.1161752-9-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 2 +- .../lpm_trie_map_basic_ops.c} | 10 +++------- 3 files changed, 4 insertions(+), 9 deletions(-) rename tools/testing/selftests/bpf/{test_lpm_map.c => map_tests/lpm_trie_map_basic_ops.c} (99%) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index c2a1842c3d8b1..e9c377001f931 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -5,7 +5,6 @@ bpf-syscall* test_verifier test_maps test_lru_map -test_lpm_map test_tag FEATURE-DUMP.libbpf FEATURE-DUMP.selftests diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6ad3b1ba1920c..7eeb3cbe18c70 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -83,7 +83,7 @@ CLANG_CPUV4 := 1 endif # Order correspond to 'make run_tests' order -TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ +TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \ test_sockmap \ test_tcpnotify_user test_sysctl \ test_progs-no_alu32 diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c similarity index 99% rename from tools/testing/selftests/bpf/test_lpm_map.c rename to tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c index d98c72dc563ea..f375c89d78a45 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c @@ -223,7 +223,7 @@ static void test_lpm_map(int keysize) n_matches = 0; n_matches_after_delete = 0; n_nodes = 1 << 8; - n_lookups = 1 << 16; + n_lookups = 1 << 9; data = alloca(keysize); memset(data, 0, keysize); @@ -770,16 +770,13 @@ static void test_lpm_multi_thread(void) close(map_fd); } -int main(void) +void test_lpm_trie_map_basic_ops(void) { int i; /* we want predictable, pseudo random tests */ srand(0xf00ba1); - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - test_lpm_basic(); test_lpm_order(); @@ -792,6 +789,5 @@ int main(void) test_lpm_get_next_key(); test_lpm_multi_thread(); - printf("test_lpm: OK\n"); - return 0; + printf("%s: PASS\n", __func__); } From 04d4ce91b0bed4120e0c5fadc5291cebaa9c2a06 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 6 Dec 2024 19:06:22 +0800 Subject: [PATCH 362/366] selftests/bpf: Add more test cases for LPM trie Add more test cases for LPM trie in test_maps: 1) test_lpm_trie_update_flags It constructs various use cases for BPF_EXIST and BPF_NOEXIST and check whether the return value of update operation is expected. 2) test_lpm_trie_update_full_maps It tests the update operations on a full LPM trie map. Adding new node will fail and overwriting the value of existed node will succeed. 3) test_lpm_trie_iterate_strs and test_lpm_trie_iterate_ints There two test cases test whether the iteration through get_next_key is sorted and expected. These two test cases delete the minimal key after each iteration and check whether next iteration returns the second minimal key. The only difference between these two test cases is the former one saves strings in the LPM trie and the latter saves integers. Without the fix of get_next_key, these two cases will fail as shown below: test_lpm_trie_iterate_strs(1091):FAIL:iterate #2 got abc exp abS test_lpm_trie_iterate_ints(1142):FAIL:iterate #1 got 0x2 exp 0x1 Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241206110622.1161752-10-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- .../bpf/map_tests/lpm_trie_map_basic_ops.c | 395 ++++++++++++++++++ 1 file changed, 395 insertions(+) diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c index f375c89d78a45..d32e4edac9309 100644 --- a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c @@ -20,10 +20,12 @@ #include #include #include +#include #include #include #include +#include #include "bpf_util.h" @@ -33,6 +35,22 @@ struct tlpm_node { uint8_t key[]; }; +struct lpm_trie_bytes_key { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + unsigned char data[8]; +}; + +struct lpm_trie_int_key { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + unsigned int data; +}; + static struct tlpm_node *tlpm_match(struct tlpm_node *list, const uint8_t *key, size_t n_bits); @@ -770,6 +788,378 @@ static void test_lpm_multi_thread(void) close(map_fd); } +static int lpm_trie_create(unsigned int key_size, unsigned int value_size, unsigned int max_entries) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts); + int fd; + + opts.map_flags = BPF_F_NO_PREALLOC; + fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie", key_size, value_size, max_entries, + &opts); + CHECK(fd < 0, "bpf_map_create", "error %d\n", errno); + + return fd; +} + +static void test_lpm_trie_update_flags(void) +{ + struct lpm_trie_int_key key; + unsigned int value, got; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), 3); + + /* invalid flags (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_F_LOCK); + CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err); + + /* invalid flags (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST | BPF_EXIST); + CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err); + + /* overwrite an empty qp-trie (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite empty qp-trie", "error %d\n", err); + + /* add a new node */ + key.prefixlen = 16; + key.data = 0; + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add the same node as new node (Error) */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err != -EEXIST, "add new elem again", "error %d\n", err); + + /* overwrite the existed node */ + value = 4; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite the node */ + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "update elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite a non-existent node which is the prefix of the first + * node (Error). + */ + key.prefixlen = 8; + key.data = 0; + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err); + + /* add a new node which is the prefix of the first node */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add another new node which will be the sibling of the first node */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 5; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite the third node */ + value = 3; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* delete the second node to make it an intermediate node */ + key.prefixlen = 8; + key.data = 0; + err = bpf_map_delete_elem(fd, &key); + CHECK(err, "del elem", "error %d\n", err); + + /* overwrite the intermediate node (Error) */ + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err); + + close(fd); +} + +static void test_lpm_trie_update_full_map(void) +{ + struct lpm_trie_int_key key; + int value, got; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), 3); + + /* add a new node */ + key.prefixlen = 16; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add new node */ + key.prefixlen = 8; + key.data = 0; + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add new node */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* try to add more node (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 3; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err != -ENOSPC, "add to full trie", "error %d\n", err); + + /* update the value of an existed node with BPF_EXIST */ + key.prefixlen = 16; + key.data = 0; + value = 4; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* update the value of an existed node with BPF_ANY */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 5; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + close(fd); +} + +static int cmp_str(const void *a, const void *b) +{ + const char *str_a = *(const char **)a, *str_b = *(const char **)b; + + return strcmp(str_a, str_b); +} + +/* Save strings in LPM trie. The trailing '\0' for each string will be + * accounted in the prefixlen. The strings returned during the iteration + * should be sorted as expected. + */ +static void test_lpm_trie_iterate_strs(void) +{ + static const char * const keys[] = { + "ab", "abO", "abc", "abo", "abS", "abcd", + }; + const char *sorted_keys[ARRAY_SIZE(keys)]; + struct lpm_trie_bytes_key key, next_key; + unsigned int value, got, i, j, len; + struct lpm_trie_bytes_key *cur; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), ARRAY_SIZE(keys)); + + for (i = 0; i < ARRAY_SIZE(keys); i++) { + unsigned int flags; + + /* add i-th element */ + flags = i % 2 ? BPF_NOEXIST : 0; + len = strlen(keys[i]); + /* include the trailing '\0' */ + key.prefixlen = (len + 1) * 8; + memset(key.data, 0, sizeof(key.data)); + memcpy(key.data, keys[i], len); + value = i + 100; + err = bpf_map_update_elem(fd, &key, &value, flags); + CHECK(err, "add elem", "#%u error %d\n", i, err); + + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "#%u error %d\n", i, err); + CHECK(got != value, "lookup elem", "#%u expect %u got %u\n", i, value, got); + + /* re-add i-th element (Error) */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err != -EEXIST, "re-add elem", "#%u error %d\n", i, err); + + /* Overwrite i-th element */ + flags = i % 2 ? 0 : BPF_EXIST; + value = i; + err = bpf_map_update_elem(fd, &key, &value, flags); + CHECK(err, "update elem", "error %d\n", err); + + /* Lookup #[0~i] elements */ + for (j = 0; j <= i; j++) { + len = strlen(keys[j]); + key.prefixlen = (len + 1) * 8; + memset(key.data, 0, sizeof(key.data)); + memcpy(key.data, keys[j], len); + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "#%u/%u error %d\n", i, j, err); + CHECK(got != j, "lookup elem", "#%u/%u expect %u got %u\n", + i, j, value, got); + } + } + + /* Add element to a full qp-trie (Error) */ + key.prefixlen = sizeof(key.data) * 8; + memset(key.data, 0, sizeof(key.data)); + value = 0; + err = bpf_map_update_elem(fd, &key, &value, 0); + CHECK(err != -ENOSPC, "add to full qp-trie", "error %d\n", err); + + /* Iterate sorted elements: no deletion */ + memcpy(sorted_keys, keys, sizeof(keys)); + qsort(sorted_keys, ARRAY_SIZE(sorted_keys), sizeof(sorted_keys[0]), cmp_str); + cur = NULL; + for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) { + len = strlen(sorted_keys[i]); + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != (len + 1) * 8, "iterate", + "#%u invalid len %u expect %u\n", + i, next_key.prefixlen, (len + 1) * 8); + CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate", + "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]); + + cur = &next_key; + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "more element", "error %d\n", err); + + /* Iterate sorted elements: delete the found key after each iteration */ + cur = NULL; + for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) { + len = strlen(sorted_keys[i]); + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != (len + 1) * 8, "iterate", + "#%u invalid len %u expect %u\n", + i, next_key.prefixlen, (len + 1) * 8); + CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate", + "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]); + + cur = &next_key; + + err = bpf_map_delete_elem(fd, cur); + CHECK(err, "delete", "#%u error %d\n", i, err); + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "non-empty qp-trie", "error %d\n", err); + + close(fd); +} + +/* Use the fixed prefixlen (32) and save integers in LPM trie. The iteration of + * LPM trie will return these integers in big-endian order, therefore, convert + * these integers to big-endian before update. After each iteration, delete the + * found key (the smallest integer) and expect the next iteration will return + * the second smallest number. + */ +static void test_lpm_trie_iterate_ints(void) +{ + struct lpm_trie_int_key key, next_key; + unsigned int i, max_entries; + struct lpm_trie_int_key *cur; + unsigned int *data_set; + int fd, err; + bool value; + + max_entries = 4096; + data_set = calloc(max_entries, sizeof(*data_set)); + CHECK(!data_set, "malloc", "no mem\n"); + for (i = 0; i < max_entries; i++) + data_set[i] = i; + + fd = lpm_trie_create(sizeof(key), sizeof(value), max_entries); + value = true; + for (i = 0; i < max_entries; i++) { + key.prefixlen = 32; + key.data = htobe32(data_set[i]); + + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add elem", "#%u error %d\n", i, err); + } + + cur = NULL; + for (i = 0; i < max_entries; i++) { + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != 32, "iterate", "#%u invalid len %u\n", + i, next_key.prefixlen); + CHECK(be32toh(next_key.data) != data_set[i], "iterate", "#%u got 0x%x exp 0x%x\n", + i, be32toh(next_key.data), data_set[i]); + cur = &next_key; + + /* + * Delete the minimal key, the next call of bpf_get_next_key() + * will return the second minimal key. + */ + err = bpf_map_delete_elem(fd, &next_key); + CHECK(err, "del elem", "#%u elem error %d\n", i, err); + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "more element", "error %d\n", err); + + err = bpf_map_get_next_key(fd, NULL, &next_key); + CHECK(err != -ENOENT, "no-empty qp-trie", "error %d\n", err); + + free(data_set); + + close(fd); +} + void test_lpm_trie_map_basic_ops(void) { int i; @@ -789,5 +1179,10 @@ void test_lpm_trie_map_basic_ops(void) test_lpm_get_next_key(); test_lpm_multi_thread(); + test_lpm_trie_update_flags(); + test_lpm_trie_update_full_map(); + test_lpm_trie_iterate_strs(); + test_lpm_trie_iterate_ints(); + printf("%s: PASS\n", __func__); } From 492077668fb453b8b16c842fcf3fafc2ebc190e9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 6 Dec 2024 08:20:06 -0800 Subject: [PATCH 363/366] x86/CPU/AMD: WARN when setting EFER.AUTOIBRS if and only if the WRMSR fails When ensuring EFER.AUTOIBRS is set, WARN only on a negative return code from msr_set_bit(), as '1' is used to indicate the WRMSR was successful ('0' indicates the MSR bit was already set). Fixes: 8cc68c9c9e92 ("x86/CPU/AMD: Make sure EFER[AIBRSE] is set") Reported-by: Nathan Chancellor Signed-off-by: Sean Christopherson Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/Z1MkNofJjt7Oq0G6@google.com Closes: https://lore.kernel.org/all/20241205220604.GA2054199@thelio-3990X --- arch/x86/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d8408aafeed98..79d2e17f6582e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1065,7 +1065,7 @@ static void init_amd(struct cpuinfo_x86 *c) */ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && cpu_has(c, X86_FEATURE_AUTOIBRS)) - WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); + WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS) < 0); /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */ clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); From 7912405643a14b527cd4a4f33c1d4392da900888 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 1 Dec 2024 12:17:30 +0100 Subject: [PATCH 364/366] modpost: Add .irqentry.text to OTHER_SECTIONS The compiler can fully inline the actual handler function of an interrupt entry into the .irqentry.text entry point. If such a function contains an access which has an exception table entry, modpost complains about a section mismatch: WARNING: vmlinux.o(__ex_table+0x447c): Section mismatch in reference ... The relocation at __ex_table+0x447c references section ".irqentry.text" which is not in the list of authorized sections. Add .irqentry.text to OTHER_SECTIONS to cure the issue. Reported-by: Sergey Senozhatsky Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org # needed for linux-5.4-y Link: https://lore.kernel.org/all/20241128111844.GE10431@google.com/ Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 0584cbcdbd2d6..fb787a5715f5e 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -772,7 +772,7 @@ static void check_section(const char *modname, struct elf_info *elf, ".ltext", ".ltext.*" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", \ - ".coldtext", ".softirqentry.text" + ".coldtext", ".softirqentry.text", ".irqentry.text" #define ALL_TEXT_SECTIONS ".init.text", ".exit.text", \ TEXT_SECTIONS, OTHER_TEXT_SECTIONS From d8d326d64f6702caab01ea0cd48f6c0054f3d1b4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 8 Dec 2024 16:56:45 +0900 Subject: [PATCH 365/366] kbuild: deb-pkg: fix build error with O= Since commit 13b25489b6f8 ("kbuild: change working directory to external module directory with M="), the Debian package build fails if a relative path is specified with the O= option. $ make O=build bindeb-pkg [ snip ] dpkg-deb: building package 'linux-image-6.13.0-rc1' in '../linux-image-6.13.0-rc1_6.13.0-rc1-6_amd64.deb'. Rebuilding host programs with x86_64-linux-gnu-gcc... make[6]: Entering directory '/home/masahiro/linux/build' /home/masahiro/linux/Makefile:190: *** specified kernel directory "build" does not exist. Stop. This occurs because the sub_make_done flag is cleared, even though the working directory is already in the output directory. Passing KBUILD_OUTPUT=. resolves the issue. Fixes: 13b25489b6f8 ("kbuild: change working directory to external module directory with M=") Reported-by: Charlie Jenkins Closes: https://lore.kernel.org/all/Z1DnP-GJcfseyrM3@ghost/ Tested-by: Charlie Jenkins Reviewed-by: Charlie Jenkins Signed-off-by: Masahiro Yamada --- scripts/package/install-extmod-build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index 64d958ee45f38..d3c5b104c0631 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -69,7 +69,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # # Use the single-target build to avoid the modpost invocation, which # would overwrite Module.symvers. - "${MAKE}" HOSTCC="${CC}" KBUILD_EXTMOD="${destdir}" scripts/ + "${MAKE}" HOSTCC="${CC}" KBUILD_OUTPUT=. KBUILD_EXTMOD="${destdir}" scripts/ cat <<-'EOF' > "${destdir}/scripts/Kbuild" subdir-y := basic @@ -78,7 +78,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then EOF # Run once again to rebuild scripts/basic/ and scripts/mod/modpost. - "${MAKE}" HOSTCC="${CC}" KBUILD_EXTMOD="${destdir}" scripts/ + "${MAKE}" HOSTCC="${CC}" KBUILD_OUTPUT=. KBUILD_EXTMOD="${destdir}" scripts/ rm -f "${destdir}/Kbuild" "${destdir}/scripts/Kbuild" fi From fac04efc5c793dccbd07e2d59af9f90b7fc0dca4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 Dec 2024 14:03:39 -0800 Subject: [PATCH 366/366] Linux 6.13-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 93ab62cef244f..64c594bd7ad03 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION*