From c8084a89bd91b05f51a36bff61f63a94c800b0d6 Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Sun, 19 Jan 2025 16:43:12 +0800 Subject: [PATCH 001/503] Input: wdt87xx_i2c - fix compiler warning As reported by the kernel test robot, the following warning occur: >> drivers/input/touchscreen/wdt87xx_i2c.c:1166:36: warning: 'wdt87xx_acpi_id' defined but not used [-Wunused-const-variable=] 1166 | static const struct acpi_device_id wdt87xx_acpi_id[] = { | ^~~~~~~~~~~~~~~ The 'wdt87xx_acpi_id' array is only used when CONFIG_ACPI is enabled. Wrapping its definition and 'MODULE_DEVICE_TABLE' in '#ifdef CONFIG_ACPI' prevents a compiler warning when ACPI is disabled. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501181549.uzdlBwuN-lkp@intel.com/ Signed-off-by: Yu-Chun Lin Link: https://lore.kernel.org/r/20250119084312.1851486-1-eleanor15x@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wdt87xx_i2c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c index 27941245e962f..88d376090e6e6 100644 --- a/drivers/input/touchscreen/wdt87xx_i2c.c +++ b/drivers/input/touchscreen/wdt87xx_i2c.c @@ -1153,11 +1153,13 @@ static const struct i2c_device_id wdt87xx_dev_id[] = { }; MODULE_DEVICE_TABLE(i2c, wdt87xx_dev_id); +#ifdef CONFIG_ACPI static const struct acpi_device_id wdt87xx_acpi_id[] = { { "WDHT0001", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, wdt87xx_acpi_id); +#endif static struct i2c_driver wdt87xx_driver = { .probe = wdt87xx_ts_probe, From c9ccb88f534ca760d06590b67571c353a2f0cbcd Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Sat, 1 Feb 2025 12:43:24 +0100 Subject: [PATCH 002/503] Input: ads7846 - fix gpiod allocation commit 767d83361aaa ("Input: ads7846 - Convert to use software nodes") has simplified the code but accidentially converted a devm_gpiod_get() to gpiod_get(). This leaves the gpio reserved on module remove and the driver can no longer be loaded again. Fixes: 767d83361aaa ("Input: ads7846 - Convert to use software nodes") Cc: stable@vger.kernel.org Signed-off-by: H. Nikolaus Schaller Link: https://lore.kernel.org/r/6e9b143f19cdfda835711a8a7a3966e5a2494cff.1738410204.git.hns@goldelico.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 066dc04003fa8..67264c5b49cb4 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1021,7 +1021,7 @@ static int ads7846_setup_pendown(struct spi_device *spi, if (pdata->get_pendown_state) { ts->get_pendown_state = pdata->get_pendown_state; } else { - ts->gpio_pendown = gpiod_get(&spi->dev, "pendown", GPIOD_IN); + ts->gpio_pendown = devm_gpiod_get(&spi->dev, "pendown", GPIOD_IN); if (IS_ERR(ts->gpio_pendown)) { dev_err(&spi->dev, "failed to request pendown GPIO\n"); return PTR_ERR(ts->gpio_pendown); From 36e093c8dcc585d0a9e79a005f721f01f3365eba Mon Sep 17 00:00:00 2001 From: Nilton Perim Neto Date: Mon, 3 Feb 2025 07:13:09 -0800 Subject: [PATCH 003/503] Input: xpad - add 8BitDo SN30 Pro, Hyperkin X91 and Gamesir G7 SE controllers Add 8BitDo SN30 Pro, Hyperkin X91 and Gamesir G7 SE to the list of recognized controllers, and update vendor comments to match. Signed-off-by: Nilton Perim Neto Link: https://lore.kernel.org/r/20250122214814.102311-2-niltonperimneto@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 8fe2a51df649e..652afb37bf770 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -376,8 +376,10 @@ static const struct xpad_device { { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, + { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, + { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, @@ -389,6 +391,7 @@ static const struct xpad_device { { 0x3285, 0x0646, "Nacon Pro Compact", 0, XTYPE_XBOXONE }, { 0x3285, 0x0663, "Nacon Evol-X", 0, XTYPE_XBOXONE }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, + { 0x3537, 0x1010, "GameSir G7 SE", 0, XTYPE_XBOXONE }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } @@ -528,12 +531,12 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2563), /* OneXPlayer Gamepad */ XPAD_XBOX360_VENDOR(0x260d), /* Dareu H101 */ - XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */ + XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */ XPAD_XBOX360_VENDOR(0x2c22), /* Qanba Controllers */ - XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller */ - XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller for Xbox */ - XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Duke Xbox One pad */ - XPAD_XBOX360_VENDOR(0x2f24), /* GameSir controllers */ + XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Controllers */ + XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Controllers */ + XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Controllers */ + XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */ XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */ XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */ XPAD_XBOXONE_VENDOR(0x3285), /* Nacon Evol-X */ From 3492321e2e60ddfe91aa438bb9ac209016f48f7a Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Mon, 3 Feb 2025 07:22:27 -0800 Subject: [PATCH 004/503] Input: xpad - add multiple supported devices This is based on multiple commits at https://github.com/paroj/xpad that had bouncing email addresses and were not signed off. Signed-off-by: Pavel Rojtberg Link: https://lore.kernel.org/r/20250123175404.23254-1-rojtberg@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 652afb37bf770..16493235bf9ee 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -140,6 +140,7 @@ static const struct xpad_device { { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, + { 0x044f, 0xd01e, "ThrustMaster, Inc. ESWAP X 2 ELDEN RING EDITION", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f10, "Thrustmaster Modena GT Wheel", 0, XTYPE_XBOX }, { 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 }, { 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX }, @@ -177,6 +178,7 @@ static const struct xpad_device { { 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0201, "Saitek Adrenalin", 0, XTYPE_XBOX }, { 0x06a3, 0xf51a, "Saitek P3600", 0, XTYPE_XBOX360 }, + { 0x0738, 0x4503, "Mad Catz Racing Wheel", 0, XTYPE_XBOXONE }, { 0x0738, 0x4506, "Mad Catz 4506 Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4516, "Mad Catz Control Pad", 0, XTYPE_XBOX }, { 0x0738, 0x4520, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX }, @@ -238,6 +240,7 @@ static const struct xpad_device { { 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0147, "PDP Marvel Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x015c, "PDP Xbox One Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, + { 0x0e6f, 0x015d, "PDP Mirror's Edge Official Wired Controller for Xbox One", XTYPE_XBOXONE }, { 0x0e6f, 0x0161, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0162, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0163, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, @@ -276,12 +279,15 @@ static const struct xpad_device { { 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, + { 0x0f0d, 0x0151, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE }, + { 0x0f0d, 0x0152, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE }, { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX }, { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, + { 0x10f5, 0x7005, "Turtle Beach Recon Controller", 0, XTYPE_XBOXONE }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 }, { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, @@ -366,6 +372,7 @@ static const struct xpad_device { { 0x24c6, 0x5510, "Hori Fighting Commander ONE (Xbox 360/PC Mode)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x551a, "PowerA FUSION Pro Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x561a, "PowerA FUSION Controller", 0, XTYPE_XBOXONE }, + { 0x24c6, 0x581a, "ThrustMaster XB1 Classic Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5b00, "ThrustMaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, @@ -376,10 +383,12 @@ static const struct xpad_device { { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, - { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, + { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, + { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, + { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, @@ -387,12 +396,16 @@ static const struct xpad_device { { 0x31e3, 0x1230, "Wooting Two HE (ARM)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 }, + { 0x3285, 0x0603, "Nacon Pro Compact controller for Xbox", 0, XTYPE_XBOXONE }, { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, + { 0x3285, 0x0614, "Nacon Pro Compact", 0, XTYPE_XBOXONE }, { 0x3285, 0x0646, "Nacon Pro Compact", 0, XTYPE_XBOXONE }, + { 0x3285, 0x0662, "Nacon Revolution5 Pro", 0, XTYPE_XBOX360 }, { 0x3285, 0x0663, "Nacon Evol-X", 0, XTYPE_XBOXONE }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, { 0x3537, 0x1010, "GameSir G7 SE", 0, XTYPE_XBOXONE }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, + { 0x413d, 0x2104, "Black Shark Green Ghost Gamepad", 0, XTYPE_XBOX360 }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } }; @@ -491,6 +504,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x03f0), /* HP HyperX Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */ XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster Xbox 360 controllers */ + XPAD_XBOXONE_VENDOR(0x044f), /* Thrustmaster Xbox One controllers */ XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */ XPAD_XBOX360_VENDOR(0x046d), /* Logitech Xbox 360-style controllers */ @@ -537,11 +551,13 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Controllers */ XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Controllers */ XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */ + XPAD_XBOXONE_VENDOR(0x2e95), /* SCUF Gaming Controller */ XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */ XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */ XPAD_XBOXONE_VENDOR(0x3285), /* Nacon Evol-X */ XPAD_XBOX360_VENDOR(0x3537), /* GameSir Controllers */ XPAD_XBOXONE_VENDOR(0x3537), /* GameSir Controllers */ + XPAD_XBOX360_VENDOR(0x413d), /* Black Shark Green Ghost Controller */ { } }; @@ -694,7 +710,9 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x045e, 0x0b00, xboxone_s_init), XBOXONE_INIT_PKT(0x045e, 0x0b00, extra_input_packet_init), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_led_on), + XBOXONE_INIT_PKT(0x20d6, 0xa01a, xboxone_pdp_led_on), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_auth), + XBOXONE_INIT_PKT(0x20d6, 0xa01a, xboxone_pdp_auth), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), From a96d3e2beca0e51c8444d0a3b6b3ec484c4c5a8f Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sun, 12 Jan 2025 01:08:11 +0100 Subject: [PATCH 005/503] iio: light: apds9306: fix max_scale_nano values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two provided max_scale_nano values must be multiplied by 100 and 10 respectively to achieve nano units. According to the comments: Max scale for apds0306 is 16.326432 → the fractional part is 0.326432, which is 326432000 in NANO. The current value is 3264320. Max scale for apds0306-065 is 14.09721 → the fractional part is 0.09712, which is 97120000 in NANO. The current value is 9712000. Update max_scale_nano initialization to use the right NANO fractional parts. Cc: stable@vger.kernel.org Fixes: 620d1e6c7a3f ("iio: light: Add support for APDS9306 Light Sensor") Signed-off-by: Javier Carrasco Tested-by: subhajit.ghosh@tweaklogic.com Link: https://patch.msgid.link/20250112-apds9306_nano_vals-v1-1-82fb145d0b16@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/apds9306.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/apds9306.c b/drivers/iio/light/apds9306.c index 69a0d609cffc9..5ed7e17f49e76 100644 --- a/drivers/iio/light/apds9306.c +++ b/drivers/iio/light/apds9306.c @@ -108,11 +108,11 @@ static const struct part_id_gts_multiplier apds9306_gts_mul[] = { { .part_id = 0xB1, .max_scale_int = 16, - .max_scale_nano = 3264320, + .max_scale_nano = 326432000, }, { .part_id = 0xB3, .max_scale_int = 14, - .max_scale_nano = 9712000, + .max_scale_nano = 97120000, }, }; From 34d93804199fea23da2645dde35d0feb38a5d445 Mon Sep 17 00:00:00 2001 From: Victor Duicu Date: Fri, 17 Jan 2025 08:53:14 +0200 Subject: [PATCH 006/503] iio: adc: pac1921: Move ACPI_FREE() to cover all branches This patch moves ACPI_FREE() in pac1921_match_acpi_device() in order to cover all branches. Reported-by: Andy Shevchenko Fixes: 9fdf1d033316 ("iio: adc: pac1921: Add ACPI support to Microchip pac1921") Acked-by: Matteo Martelli Signed-off-by: Victor Duicu Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250117065314.4431-1-victor.duicu@microchip.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/pac1921.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/pac1921.c b/drivers/iio/adc/pac1921.c index 90f61c47b1c46..63f5182151565 100644 --- a/drivers/iio/adc/pac1921.c +++ b/drivers/iio/adc/pac1921.c @@ -1198,11 +1198,11 @@ static int pac1921_match_acpi_device(struct iio_dev *indio_dev) label = devm_kstrdup(dev, status->package.elements[0].string.pointer, GFP_KERNEL); + ACPI_FREE(status); if (!label) return -ENOMEM; indio_dev->label = label; - ACPI_FREE(status); return 0; } From aa5119c36d19639397d29ef305aa53a5ecd72b27 Mon Sep 17 00:00:00 2001 From: Nayab Sayed Date: Wed, 15 Jan 2025 11:37:04 +0530 Subject: [PATCH 007/503] iio: adc: at91-sama5d2_adc: fix sama7g5 realbits value The number of valid bits in SAMA7G5 ADC channel data register are 16. Hence changing the realbits value to 16 Fixes: 840bf6cb983f ("iio: adc: at91-sama5d2_adc: add support for sama7g5 device") Signed-off-by: Nayab Sayed Link: https://patch.msgid.link/20250115-fix-sama7g5-adc-realbits-v2-1-58a6e4087584@microchip.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91-sama5d2_adc.c | 68 ++++++++++++++++++------------ 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 8e5aaf15a9215..c3a1dea2aa82e 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -329,7 +329,7 @@ static const struct at91_adc_reg_layout sama7g5_layout = { #define AT91_HWFIFO_MAX_SIZE_STR "128" #define AT91_HWFIFO_MAX_SIZE 128 -#define AT91_SAMA5D2_CHAN_SINGLE(index, num, addr) \ +#define AT91_SAMA_CHAN_SINGLE(index, num, addr, rbits) \ { \ .type = IIO_VOLTAGE, \ .channel = num, \ @@ -337,7 +337,7 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .scan_index = index, \ .scan_type = { \ .sign = 'u', \ - .realbits = 14, \ + .realbits = rbits, \ .storagebits = 16, \ }, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ @@ -350,7 +350,13 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .indexed = 1, \ } -#define AT91_SAMA5D2_CHAN_DIFF(index, num, num2, addr) \ +#define AT91_SAMA5D2_CHAN_SINGLE(index, num, addr) \ + AT91_SAMA_CHAN_SINGLE(index, num, addr, 14) + +#define AT91_SAMA7G5_CHAN_SINGLE(index, num, addr) \ + AT91_SAMA_CHAN_SINGLE(index, num, addr, 16) + +#define AT91_SAMA_CHAN_DIFF(index, num, num2, addr, rbits) \ { \ .type = IIO_VOLTAGE, \ .differential = 1, \ @@ -360,7 +366,7 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .scan_index = index, \ .scan_type = { \ .sign = 's', \ - .realbits = 14, \ + .realbits = rbits, \ .storagebits = 16, \ }, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ @@ -373,6 +379,12 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .indexed = 1, \ } +#define AT91_SAMA5D2_CHAN_DIFF(index, num, num2, addr) \ + AT91_SAMA_CHAN_DIFF(index, num, num2, addr, 14) + +#define AT91_SAMA7G5_CHAN_DIFF(index, num, num2, addr) \ + AT91_SAMA_CHAN_DIFF(index, num, num2, addr, 16) + #define AT91_SAMA5D2_CHAN_TOUCH(num, name, mod) \ { \ .type = IIO_POSITIONRELATIVE, \ @@ -666,30 +678,30 @@ static const struct iio_chan_spec at91_sama5d2_adc_channels[] = { }; static const struct iio_chan_spec at91_sama7g5_adc_channels[] = { - AT91_SAMA5D2_CHAN_SINGLE(0, 0, 0x60), - AT91_SAMA5D2_CHAN_SINGLE(1, 1, 0x64), - AT91_SAMA5D2_CHAN_SINGLE(2, 2, 0x68), - AT91_SAMA5D2_CHAN_SINGLE(3, 3, 0x6c), - AT91_SAMA5D2_CHAN_SINGLE(4, 4, 0x70), - AT91_SAMA5D2_CHAN_SINGLE(5, 5, 0x74), - AT91_SAMA5D2_CHAN_SINGLE(6, 6, 0x78), - AT91_SAMA5D2_CHAN_SINGLE(7, 7, 0x7c), - AT91_SAMA5D2_CHAN_SINGLE(8, 8, 0x80), - AT91_SAMA5D2_CHAN_SINGLE(9, 9, 0x84), - AT91_SAMA5D2_CHAN_SINGLE(10, 10, 0x88), - AT91_SAMA5D2_CHAN_SINGLE(11, 11, 0x8c), - AT91_SAMA5D2_CHAN_SINGLE(12, 12, 0x90), - AT91_SAMA5D2_CHAN_SINGLE(13, 13, 0x94), - AT91_SAMA5D2_CHAN_SINGLE(14, 14, 0x98), - AT91_SAMA5D2_CHAN_SINGLE(15, 15, 0x9c), - AT91_SAMA5D2_CHAN_DIFF(16, 0, 1, 0x60), - AT91_SAMA5D2_CHAN_DIFF(17, 2, 3, 0x68), - AT91_SAMA5D2_CHAN_DIFF(18, 4, 5, 0x70), - AT91_SAMA5D2_CHAN_DIFF(19, 6, 7, 0x78), - AT91_SAMA5D2_CHAN_DIFF(20, 8, 9, 0x80), - AT91_SAMA5D2_CHAN_DIFF(21, 10, 11, 0x88), - AT91_SAMA5D2_CHAN_DIFF(22, 12, 13, 0x90), - AT91_SAMA5D2_CHAN_DIFF(23, 14, 15, 0x98), + AT91_SAMA7G5_CHAN_SINGLE(0, 0, 0x60), + AT91_SAMA7G5_CHAN_SINGLE(1, 1, 0x64), + AT91_SAMA7G5_CHAN_SINGLE(2, 2, 0x68), + AT91_SAMA7G5_CHAN_SINGLE(3, 3, 0x6c), + AT91_SAMA7G5_CHAN_SINGLE(4, 4, 0x70), + AT91_SAMA7G5_CHAN_SINGLE(5, 5, 0x74), + AT91_SAMA7G5_CHAN_SINGLE(6, 6, 0x78), + AT91_SAMA7G5_CHAN_SINGLE(7, 7, 0x7c), + AT91_SAMA7G5_CHAN_SINGLE(8, 8, 0x80), + AT91_SAMA7G5_CHAN_SINGLE(9, 9, 0x84), + AT91_SAMA7G5_CHAN_SINGLE(10, 10, 0x88), + AT91_SAMA7G5_CHAN_SINGLE(11, 11, 0x8c), + AT91_SAMA7G5_CHAN_SINGLE(12, 12, 0x90), + AT91_SAMA7G5_CHAN_SINGLE(13, 13, 0x94), + AT91_SAMA7G5_CHAN_SINGLE(14, 14, 0x98), + AT91_SAMA7G5_CHAN_SINGLE(15, 15, 0x9c), + AT91_SAMA7G5_CHAN_DIFF(16, 0, 1, 0x60), + AT91_SAMA7G5_CHAN_DIFF(17, 2, 3, 0x68), + AT91_SAMA7G5_CHAN_DIFF(18, 4, 5, 0x70), + AT91_SAMA7G5_CHAN_DIFF(19, 6, 7, 0x78), + AT91_SAMA7G5_CHAN_DIFF(20, 8, 9, 0x80), + AT91_SAMA7G5_CHAN_DIFF(21, 10, 11, 0x88), + AT91_SAMA7G5_CHAN_DIFF(22, 12, 13, 0x90), + AT91_SAMA7G5_CHAN_DIFF(23, 14, 15, 0x98), IIO_CHAN_SOFT_TIMESTAMP(24), AT91_SAMA5D2_CHAN_TEMP(AT91_SAMA7G5_ADC_TEMP_CHANNEL, "temp", 0xdc), }; From 5d702aa2a47bbab6231382f9ead5be40a287a53b Mon Sep 17 00:00:00 2001 From: Dheeraj Reddy Jonnalagadda Date: Tue, 7 Jan 2025 18:05:10 +0530 Subject: [PATCH 008/503] iio: proximity: Fix use-after-free in hx9023s_send_cfg() Reorder the assignment of fw_size to happen before release_firmware() to avoid accessing the firmware structure after it's been freed. Fixes: e9ed97be4fcc ("iio: proximity: hx9023s: Added firmware file parsing functionality") Closes: https://scan7.scan.coverity.com/#/project-view/52337/11354?selectedIssue=1602791 Signed-off-by: Dheeraj Reddy Jonnalagadda Reviewed-by: David Lechner Link: https://patch.msgid.link/20250107123510.44978-1-dheeraj.linuxdev@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/hx9023s.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/proximity/hx9023s.c b/drivers/iio/proximity/hx9023s.c index e092a935dbac7..5aa8e5a22f326 100644 --- a/drivers/iio/proximity/hx9023s.c +++ b/drivers/iio/proximity/hx9023s.c @@ -1036,12 +1036,13 @@ static int hx9023s_send_cfg(const struct firmware *fw, struct hx9023s_data *data return -ENOMEM; memcpy(bin->data, fw->data, fw->size); - release_firmware(fw); bin->fw_size = fw->size; bin->fw_ver = bin->data[FW_VER_OFFSET]; bin->reg_count = get_unaligned_le16(bin->data + FW_REG_CNT_OFFSET); + release_firmware(fw); + return hx9023s_bin_load(data, bin); } From 4eba4d92906c3814ca3ec65c16af27c46c12342e Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Mon, 16 Dec 2024 10:05:53 +0000 Subject: [PATCH 009/503] iio: hid-sensor-prox: Split difference from multiple channels When the driver was originally created, it was decided that sampling_frequency and hysteresis would be shared_per_type instead of shared_by_all (even though it is internally shared by all). Eg: in_proximity_raw in_proximity_sampling_frequency When we introduced support for more channels, we continued with shared_by_type which. Eg: in_proximity0_raw in_proximity1_raw in_proximity_sampling_frequency in_attention_raw in_attention_sampling_frequency Ideally we should change to shared_by_all, but it is not an option, because the current naming has been a stablished ABI by now. Luckily we can use separate instead. That will be more consistent: in_proximity0_raw in_proximity0_sampling_frequency in_proximity1_raw in_proximity1_sampling_frequency in_attention_raw in_attention_sampling_frequency Fixes: 596ef5cf654b ("iio: hid-sensor-prox: Add support for more channels") Signed-off-by: Ricardo Ribalda Link: https://patch.msgid.link/20241216-fix-hid-sensor-v2-1-ff8c1959ec4a@chromium.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-prox.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 7ab64f5c623c1..76b76d12b3882 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -49,9 +49,10 @@ static const u32 prox_sensitivity_addresses[] = { #define PROX_CHANNEL(_is_proximity, _channel) \ {\ .type = _is_proximity ? IIO_PROXIMITY : IIO_ATTENTION,\ - .info_mask_separate = _is_proximity ? BIT(IIO_CHAN_INFO_RAW) :\ - BIT(IIO_CHAN_INFO_PROCESSED),\ - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) |\ + .info_mask_separate = \ + (_is_proximity ? BIT(IIO_CHAN_INFO_RAW) :\ + BIT(IIO_CHAN_INFO_PROCESSED)) |\ + BIT(IIO_CHAN_INFO_OFFSET) |\ BIT(IIO_CHAN_INFO_SCALE) |\ BIT(IIO_CHAN_INFO_SAMP_FREQ) |\ BIT(IIO_CHAN_INFO_HYSTERESIS),\ From 21d7241faf406e8aee3ce348451cc362d5db6a02 Mon Sep 17 00:00:00 2001 From: Markus Burri Date: Fri, 24 Jan 2025 16:07:03 +0100 Subject: [PATCH 010/503] iio: adc: ad7192: fix channel select MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Channel configuration doesn't work as expected. For FIELD_PREP the bit mask is needed and not the bit number. Fixes: 874bbd1219c7 ("iio: adc: ad7192: Use bitfield access macros") Signed-off-by: Markus Burri Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250124150703.97848-1-markus.burri@mt.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index e96a5ae92375d..cfaf8f7e0a07d 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -1084,7 +1084,7 @@ static int ad7192_update_scan_mode(struct iio_dev *indio_dev, const unsigned lon conf &= ~AD7192_CONF_CHAN_MASK; for_each_set_bit(i, scan_mask, 8) - conf |= FIELD_PREP(AD7192_CONF_CHAN_MASK, i); + conf |= FIELD_PREP(AD7192_CONF_CHAN_MASK, BIT(i)); ret = ad_sd_write_reg(&st->sd, AD7192_REG_CONF, 3, conf); if (ret < 0) From e17b9f20da7d2bc1f48878ab2230523b2512d965 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Sat, 25 Jan 2025 17:24:32 +0100 Subject: [PATCH 011/503] iio: dac: ad3552r: clear reset status flag Clear reset status flag, to keep error status register clean after reset (ad3552r manual, rev B table 38). Reset error flag was left to 1, so debugging registers, the "Error Status Register" was dirty (0x01). It is important to clear this bit, so if there is any reset event over normal working mode, it is possible to detect it. Fixes: 8f2b54824b28 ("drivers:iio:dac: Add AD3552R driver support") Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250125-wip-bl-ad3552r-clear-reset-v2-1-aa3a27f3ff8c@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c index e7206af53af61..7944f5c1d264d 100644 --- a/drivers/iio/dac/ad3552r.c +++ b/drivers/iio/dac/ad3552r.c @@ -410,6 +410,12 @@ static int ad3552r_reset(struct ad3552r_desc *dac) return ret; } + /* Clear reset error flag, see ad3552r manual, rev B table 38. */ + ret = ad3552r_write_reg(dac, AD3552R_REG_ADDR_ERR_STATUS, + AD3552R_MASK_RESET_STATUS); + if (ret) + return ret; + return ad3552r_update_reg_field(dac, AD3552R_REG_ADDR_INTERFACE_CONFIG_A, AD3552R_MASK_ADDR_ASCENSION, From 02ccd7e5d81af4ae20852fc1ad67e7d943fa5778 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Wed, 29 Jan 2025 12:03:02 +0100 Subject: [PATCH 012/503] dt-bindings: iio: dac: adi-axi-adc: fix ad7606 pwm-names Fix make dt_binding_check warning: DTC [C] Documentation/devicetree/bindings/iio/adc/adi,axi-adc.example.dtb .../adc/adi,axi-adc.example.dtb: adc@0: pwm-names: ['convst1'] is too short from schema $id: http://devicetree.org/schemas/iio/adc/adi,ad7606.yaml# Add "minItems" to pwm-names, it allows to use one single pwm when connected to both adc conversion inputs. Fixes: 7c2357b10490 ("dt-bindings: iio: adc: ad7606: Add iio backend bindings") Signed-off-by: Angelo Dureghello Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250129-wip-bl-ad7606_add_backend_sw_mode-v3-1-c3aec77c0ab7@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml index ab5881d0d017f..52d3f1ce33678 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml @@ -146,6 +146,7 @@ properties: maxItems: 2 pwm-names: + minItems: 1 items: - const: convst1 - const: convst2 From bead181694df16de464ca2392d0cec2cf15fb978 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Wed, 29 Jan 2025 12:03:04 +0100 Subject: [PATCH 013/503] iio: adc: ad7606: fix wrong scale available Fix wrong scale available list since only one value is returned: ... iio:device1: ad7606b (buffer capable) 8 channels found: voltage0: (input, index: 0, format: le:S16/16>>0) 2 channel-specific attributes found: attr 0: scale value: 0.305176 attr 1: scale_available value: 0.076293 Fix as: voltage0: (input, index: 0, format: le:S16/16>>0) 2 channel-specific attributes found: attr 0: scale value: 0.305176 attr 1: scale_available value: 0.076293 0.152588 0.305176 Fixes: 97c6d857041d ("iio: adc: ad7606: rework scale-available to be static") Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250129-wip-bl-ad7606_add_backend_sw_mode-v3-3-c3aec77c0ab7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index d8e3c7a43678c..d39354afd5394 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -1047,7 +1047,7 @@ static int ad7606_read_avail(struct iio_dev *indio_dev, cs = &st->chan_scales[ch]; *vals = (int *)cs->scale_avail; - *length = cs->num_scales; + *length = cs->num_scales * 2; *type = IIO_VAL_INT_PLUS_MICRO; return IIO_AVAIL_LIST; From cc2c3540d9477a9931fb0fd851fcaeba524a5b35 Mon Sep 17 00:00:00 2001 From: Sam Winchenbach Date: Mon, 3 Feb 2025 13:34:34 +0000 Subject: [PATCH 014/503] iio: filter: admv8818: Force initialization of SDO When a weak pull-up is present on the SDO line, regmap_update_bits fails to write both the SOFTRESET and SDOACTIVE bits because it incorrectly reads them as already set. Since the soft reset disables the SDO line, performing a read-modify-write operation on ADI_SPI_CONFIG_A to enable the SDO line doesn't make sense. This change directly writes to the register instead of using regmap_update_bits. Fixes: f34fe888ad05 ("iio:filter:admv8818: add support for ADMV8818") Signed-off-by: Sam Winchenbach Link: https://patch.msgid.link/SA1P110MB106904C961B0F3FAFFED74C0BCF5A@SA1P110MB1069.NAMP110.PROD.OUTLOOK.COM Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/filter/admv8818.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c index 848baa6e3bbf5..d85b7d3de8660 100644 --- a/drivers/iio/filter/admv8818.c +++ b/drivers/iio/filter/admv8818.c @@ -574,21 +574,15 @@ static int admv8818_init(struct admv8818_state *st) struct spi_device *spi = st->spi; unsigned int chip_id; - ret = regmap_update_bits(st->regmap, ADMV8818_REG_SPI_CONFIG_A, - ADMV8818_SOFTRESET_N_MSK | - ADMV8818_SOFTRESET_MSK, - FIELD_PREP(ADMV8818_SOFTRESET_N_MSK, 1) | - FIELD_PREP(ADMV8818_SOFTRESET_MSK, 1)); + ret = regmap_write(st->regmap, ADMV8818_REG_SPI_CONFIG_A, + ADMV8818_SOFTRESET_N_MSK | ADMV8818_SOFTRESET_MSK); if (ret) { dev_err(&spi->dev, "ADMV8818 Soft Reset failed.\n"); return ret; } - ret = regmap_update_bits(st->regmap, ADMV8818_REG_SPI_CONFIG_A, - ADMV8818_SDOACTIVE_N_MSK | - ADMV8818_SDOACTIVE_MSK, - FIELD_PREP(ADMV8818_SDOACTIVE_N_MSK, 1) | - FIELD_PREP(ADMV8818_SDOACTIVE_MSK, 1)); + ret = regmap_write(st->regmap, ADMV8818_REG_SPI_CONFIG_A, + ADMV8818_SDOACTIVE_N_MSK | ADMV8818_SDOACTIVE_MSK); if (ret) { dev_err(&spi->dev, "ADMV8818 SDO Enable failed.\n"); return ret; From 49f27f29446a5bfe633dd2cc0cfebd48a1a5e77f Mon Sep 17 00:00:00 2001 From: Vitaliy Shevtsov Date: Fri, 31 Jan 2025 20:26:55 +0500 Subject: [PATCH 015/503] wifi: nl80211: reject cooked mode if it is set along with other flags It is possible to set both MONITOR_FLAG_COOK_FRAMES and MONITOR_FLAG_ACTIVE flags simultaneously on the same monitor interface from the userspace. This causes a sub-interface to be created with no IEEE80211_SDATA_IN_DRIVER bit set because the monitor interface is in the cooked state and it takes precedence over all other states. When the interface is then being deleted the kernel calls WARN_ONCE() from check_sdata_in_driver() because of missing that bit. Fix this by rejecting MONITOR_FLAG_COOK_FRAMES if it is set along with other flags. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 66f7ac50ed7c ("nl80211: Add monitor interface configuration flags") Cc: stable@vger.kernel.org Reported-by: syzbot+2e5c1e55b9e5c28a3da7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2e5c1e55b9e5c28a3da7 Signed-off-by: Vitaliy Shevtsov Link: https://patch.msgid.link/20250131152657.5606-1-v.shevtsov@mt-integration.ru Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d7d3da0f6833d..fdb2aac951d18 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4220,6 +4220,11 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) if (flags[flag]) *mntrflags |= (1< Date: Tue, 4 Feb 2025 13:31:29 +0100 Subject: [PATCH 016/503] wifi: mac80211: Cleanup sta TXQs on flush Drop the sta TXQs on flush when the drivers is not supporting flush. ieee80211_set_disassoc() tries to clean up everything for the sta. But it ignored queued frames in the sta TX queues when the driver isn't supporting the flush driver ops. Signed-off-by: Alexander Wetzel Link: https://patch.msgid.link/20250204123129.9162-1-Alexander@wetzel-home.de Signed-off-by: Johannes Berg --- net/mac80211/util.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f6b631faf4f7f..7f02bd5891eb9 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -687,7 +687,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, unsigned int queues, bool drop) { - if (!local->ops->flush) + if (!local->ops->flush && !drop) return; /* @@ -714,7 +714,8 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, } } - drv_flush(local, sdata, queues, drop); + if (local->ops->flush) + drv_flush(local, sdata, queues, drop); ieee80211_wake_queues_by_reason(&local->hw, queues, IEEE80211_QUEUE_STOP_REASON_FLUSH, From 646262c71aca87bb66945933abe4e620796d6c5a Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Tue, 4 Feb 2025 17:42:40 +0100 Subject: [PATCH 017/503] wifi: mac80211: remove debugfs dir for virtual monitor Don't call ieee80211_debugfs_recreate_netdev() for virtual monitor interface when deleting it. The virtual monitor interface shouldn't have debugfs entries and trying to update them will *create* them on deletion. And when the virtual monitor interface is created/destroyed multiple times we'll get warnings about debugfs name conflicts. Signed-off-by: Alexander Wetzel Link: https://patch.msgid.link/20250204164240.370153-1-Alexander@wetzel-home.de Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 10 ++++++++-- net/mac80211/iface.c | 11 ++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 299d38e9e8630..2fc60e1e77a55 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -116,8 +116,14 @@ void drv_remove_interface(struct ieee80211_local *local, sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; - /* Remove driver debugfs entries */ - ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links); + /* + * Remove driver debugfs entries. + * The virtual monitor interface doesn't get a debugfs + * entry, so it's exempt here. + */ + if (sdata != local->monitor_sdata) + ieee80211_debugfs_recreate_netdev(sdata, + sdata->vif.valid_links); trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 0ea7e77860b73..738de269e13f0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1206,16 +1206,17 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) return; } - RCU_INIT_POINTER(local->monitor_sdata, NULL); - mutex_unlock(&local->iflist_mtx); - - synchronize_net(); - + clear_bit(SDATA_STATE_RUNNING, &sdata->state); ieee80211_link_release_channel(&sdata->deflink); if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) drv_remove_interface(local, sdata); + RCU_INIT_POINTER(local->monitor_sdata, NULL); + mutex_unlock(&local->iflist_mtx); + + synchronize_net(); + kfree(sdata); } From 7774e3920029398ad49dc848b23840593f14d515 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 9 Feb 2025 14:34:45 +0200 Subject: [PATCH 018/503] wifi: iwlwifi: fw: allocate chained SG tables for dump The firmware dumps can be pretty big, and since we use single pages for each SG table entry, even the table itself may end up being an order-5 allocation. Build chained tables so that we need not allocate a higher-order table here. This could be improved and cleaned up, e.g. by using the SG pool code or simply kvmalloc(), but all of that would require also updating the devcoredump first since that frees it all, so we need to be more careful. SG pool might also run against the CONFIG_ARCH_NO_SG_CHAIN limitation, which is irrelevant here. Also use _devcd_free_sgtable() for the error paths now, much simpler especially since it's in two places now. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.697c7a465ac9.Iea982df46b5c075bfb77ade36f187d99a70c63db@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 86 ++++++++++++++------- 1 file changed, 58 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index fb2ea38e89aca..6594216f873c4 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -558,41 +558,71 @@ static void iwl_dump_prph(struct iwl_fw_runtime *fwrt, } /* - * alloc_sgtable - allocates scallerlist table in the given size, - * fills it with pages and returns it + * alloc_sgtable - allocates (chained) scatterlist in the given size, + * fills it with pages and returns it * @size: the size (in bytes) of the table -*/ -static struct scatterlist *alloc_sgtable(int size) + */ +static struct scatterlist *alloc_sgtable(ssize_t size) { - int alloc_size, nents, i; - struct page *new_page; - struct scatterlist *iter; - struct scatterlist *table; + struct scatterlist *result = NULL, *prev; + int nents, i, n_prev; nents = DIV_ROUND_UP(size, PAGE_SIZE); - table = kcalloc(nents, sizeof(*table), GFP_KERNEL); - if (!table) - return NULL; - sg_init_table(table, nents); - iter = table; - for_each_sg(table, iter, sg_nents(table), i) { - new_page = alloc_page(GFP_KERNEL); - if (!new_page) { - /* release all previous allocated pages in the table */ - iter = table; - for_each_sg(table, iter, sg_nents(table), i) { - new_page = sg_page(iter); - if (new_page) - __free_page(new_page); - } - kfree(table); + +#define N_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(*result)) + /* + * We need an additional entry for table chaining, + * this ensures the loop can finish i.e. we can + * fit at least two entries per page (obviously, + * many more really fit.) + */ + BUILD_BUG_ON(N_ENTRIES_PER_PAGE < 2); + + while (nents > 0) { + struct scatterlist *new, *iter; + int n_fill, n_alloc; + + if (nents <= N_ENTRIES_PER_PAGE) { + /* last needed table */ + n_fill = nents; + n_alloc = nents; + nents = 0; + } else { + /* fill a page with entries */ + n_alloc = N_ENTRIES_PER_PAGE; + /* reserve one for chaining */ + n_fill = n_alloc - 1; + nents -= n_fill; + } + + new = kcalloc(n_alloc, sizeof(*new), GFP_KERNEL); + if (!new) { + if (result) + _devcd_free_sgtable(result); return NULL; } - alloc_size = min_t(int, size, PAGE_SIZE); - size -= PAGE_SIZE; - sg_set_page(iter, new_page, alloc_size, 0); + sg_init_table(new, n_alloc); + + if (!result) + result = new; + else + sg_chain(prev, n_prev, new); + prev = new; + n_prev = n_alloc; + + for_each_sg(new, iter, n_fill, i) { + struct page *new_page = alloc_page(GFP_KERNEL); + + if (!new_page) { + _devcd_free_sgtable(result); + return NULL; + } + + sg_set_page(iter, new_page, PAGE_SIZE, 0); + } } - return table; + + return result; } static void iwl_fw_get_prph_len(struct iwl_fw_runtime *fwrt, From 3f8aa0b8a53df2247a84eaf3b3aa38b6ef86cb1c Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 9 Feb 2025 14:34:46 +0200 Subject: [PATCH 019/503] wifi: iwlwifi: fw: avoid using an uninitialized variable iwl_fwrt_read_err_table can return true also when it failed to read the memory. In this case, err_id argument is not initialized, but the callers are still using it. Simply initialize it to 0. If the error table was read successfully it'll be overridden. Fixes: 43e0b2ada519 ("wifi: iwlwifi: fw: add an error table status getter") Signed-off-by: Miri Korenblit Reviewed-by: Emmanuel Grumbach Link: https://patch.msgid.link/20250209143303.37cdbba4eb56.I95fe9bd95303b8179f946766558a9f15f4fe254c@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/dump.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dump.c b/drivers/net/wireless/intel/iwlwifi/fw/dump.c index 8e0c85a1240d7..c7b261c8ec969 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dump.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dump.c @@ -540,6 +540,9 @@ bool iwl_fwrt_read_err_table(struct iwl_trans *trans, u32 base, u32 *err_id) } err_info = {}; int ret; + if (err_id) + *err_id = 0; + if (!base) return false; From f9751163bffd3fe60794929829f810968c6de73d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 9 Feb 2025 14:34:47 +0200 Subject: [PATCH 020/503] wifi: iwlwifi: mvm: clean up ROC on failure If the firmware fails to start the session protection, then we do call iwl_mvm_roc_finished() here, but that won't do anything at all because IWL_MVM_STATUS_ROC_P2P_RUNNING was never set. Set IWL_MVM_STATUS_ROC_P2P_RUNNING in the failure/stop path. If it started successfully before, it's already set, so that doesn't matter, and if it didn't start it needs to be set to clean up. Not doing so will lead to a WARN_ON() later on a fresh remain- on-channel, since the link is already active when activated as it was never deactivated. Fixes: 35c1bbd93c4e ("wifi: iwlwifi: mvm: remove IWL_MVM_STATUS_NEED_FLUSH_P2P") Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.0fe36c291068.I67f5dac742170dd937f11e4d4f937f45f71b7cb4@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 9216c43a35c4d..ebfa88b38b71b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -1030,6 +1030,8 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm, /* End TE, notify mac80211 */ mvmvif->time_event_data.id = SESSION_PROTECT_CONF_MAX_ID; mvmvif->time_event_data.link_id = -1; + /* set the bit so the ROC cleanup will actually clean up */ + set_bit(IWL_MVM_STATUS_ROC_P2P_RUNNING, &mvm->status); iwl_mvm_roc_finished(mvm); ieee80211_remain_on_channel_expired(mvm->hw); } else if (le32_to_cpu(notif->start)) { From d48ff3ce92259bae7e77732c7cfd7cbc7992c021 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 9 Feb 2025 14:34:48 +0200 Subject: [PATCH 021/503] wifi: iwlwifi: mvm: don't dump the firmware state upon RFKILL while suspend This is not really a firmware error. We need to reload the firmware, but this doesn't mean that we should consider this as a firmware error. When the firmware was restarted upon resume, this wasn't felt by the driver. Now that we keep the firmware running during suspend even if we don't have wowlan, this started to pop-up. Fixes: e8bb19c1d590 ("wifi: iwlwifi: support fast resume") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.a10463a40318.I14131781c3124b58e60e1f5e9d793a2bc88b464c@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 77 ++++++++++++++------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 129b6bdf9ef90..82ca7f8b1bb27 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -3092,8 +3092,14 @@ static void iwl_mvm_d3_disconnect_iter(void *data, u8 *mac, ieee80211_resume_disconnect(vif); } -static bool iwl_mvm_check_rt_status(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) +enum rt_status { + FW_ALIVE, + FW_NEEDS_RESET, + FW_ERROR, +}; + +static enum rt_status iwl_mvm_check_rt_status(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) { u32 err_id; @@ -3101,29 +3107,35 @@ static bool iwl_mvm_check_rt_status(struct iwl_mvm *mvm, if (iwl_fwrt_read_err_table(mvm->trans, mvm->trans->dbg.lmac_error_event_table[0], &err_id)) { - if (err_id == RF_KILL_INDICATOR_FOR_WOWLAN && vif) { - struct cfg80211_wowlan_wakeup wakeup = { - .rfkill_release = true, - }; - ieee80211_report_wowlan_wakeup(vif, &wakeup, - GFP_KERNEL); + if (err_id == RF_KILL_INDICATOR_FOR_WOWLAN) { + IWL_WARN(mvm, "Rfkill was toggled during suspend\n"); + if (vif) { + struct cfg80211_wowlan_wakeup wakeup = { + .rfkill_release = true, + }; + + ieee80211_report_wowlan_wakeup(vif, &wakeup, + GFP_KERNEL); + } + + return FW_NEEDS_RESET; } - return true; + return FW_ERROR; } /* check if we have lmac2 set and check for error */ if (iwl_fwrt_read_err_table(mvm->trans, mvm->trans->dbg.lmac_error_event_table[1], NULL)) - return true; + return FW_ERROR; /* check for umac error */ if (iwl_fwrt_read_err_table(mvm->trans, mvm->trans->dbg.umac_error_event_table, NULL)) - return true; + return FW_ERROR; - return false; + return FW_ALIVE; } /* @@ -3492,6 +3504,7 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) bool d0i3_first = fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_D0I3_END_FIRST); bool resume_notif_based = iwl_mvm_d3_resume_notif_based(mvm); + enum rt_status rt_status; bool keep = false; mutex_lock(&mvm->mutex); @@ -3515,14 +3528,19 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) iwl_fw_dbg_read_d3_debug_data(&mvm->fwrt); - if (iwl_mvm_check_rt_status(mvm, vif)) { - IWL_ERR(mvm, "FW Error occurred during suspend. Restarting.\n"); + rt_status = iwl_mvm_check_rt_status(mvm, vif); + if (rt_status != FW_ALIVE) { set_bit(STATUS_FW_ERROR, &mvm->trans->status); - iwl_mvm_dump_nic_error_log(mvm); - iwl_dbg_tlv_time_point(&mvm->fwrt, - IWL_FW_INI_TIME_POINT_FW_ASSERT, NULL); - iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert, - false, 0); + if (rt_status == FW_ERROR) { + IWL_ERR(mvm, "FW Error occurred during suspend. Restarting.\n"); + iwl_mvm_dump_nic_error_log(mvm); + iwl_dbg_tlv_time_point(&mvm->fwrt, + IWL_FW_INI_TIME_POINT_FW_ASSERT, + NULL); + iwl_fw_dbg_collect_desc(&mvm->fwrt, + &iwl_dump_desc_assert, + false, 0); + } ret = 1; goto err; } @@ -3679,6 +3697,7 @@ int iwl_mvm_fast_resume(struct iwl_mvm *mvm) .notif_expected = IWL_D3_NOTIF_D3_END_NOTIF, }; + enum rt_status rt_status; int ret; lockdep_assert_held(&mvm->mutex); @@ -3688,14 +3707,20 @@ int iwl_mvm_fast_resume(struct iwl_mvm *mvm) mvm->last_reset_or_resume_time_jiffies = jiffies; iwl_fw_dbg_read_d3_debug_data(&mvm->fwrt); - if (iwl_mvm_check_rt_status(mvm, NULL)) { - IWL_ERR(mvm, "FW Error occurred during suspend. Restarting.\n"); + rt_status = iwl_mvm_check_rt_status(mvm, NULL); + if (rt_status != FW_ALIVE) { set_bit(STATUS_FW_ERROR, &mvm->trans->status); - iwl_mvm_dump_nic_error_log(mvm); - iwl_dbg_tlv_time_point(&mvm->fwrt, - IWL_FW_INI_TIME_POINT_FW_ASSERT, NULL); - iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert, - false, 0); + if (rt_status == FW_ERROR) { + IWL_ERR(mvm, + "iwl_mvm_check_rt_status failed, device is gone during suspend\n"); + iwl_mvm_dump_nic_error_log(mvm); + iwl_dbg_tlv_time_point(&mvm->fwrt, + IWL_FW_INI_TIME_POINT_FW_ASSERT, + NULL); + iwl_fw_dbg_collect_desc(&mvm->fwrt, + &iwl_dump_desc_assert, + false, 0); + } mvm->trans->state = IWL_TRANS_NO_FW; ret = -ENODEV; From d73d2c6e3313f0ba60711ab4f4b9044eddca9ca5 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 9 Feb 2025 14:34:49 +0200 Subject: [PATCH 022/503] wifi: iwlwifi: mvm: don't try to talk to a dead firmware This fixes: bad state = 0 WARNING: CPU: 10 PID: 702 at drivers/net/wireless/inel/iwlwifi/iwl-trans.c:178 iwl_trans_send_cmd+0xba/0xe0 [iwlwifi] Call Trace: ? __warn+0xca/0x1c0 ? iwl_trans_send_cmd+0xba/0xe0 [iwlwifi 64fa9ad799a0e0d2ba53d4af93a53ad9a531f8d4] iwl_fw_dbg_clear_monitor_buf+0xd7/0x110 [iwlwifi 64fa9ad799a0e0d2ba53d4af93a53ad9a531f8d4] _iwl_dbgfs_fw_dbg_clear_write+0xe2/0x120 [iwlmvm 0e8adb18cea92d2c341766bcc10b18699290068a] Ask whether the firmware is alive before sending a command. Fixes: 268712dc3b34 ("wifi: iwlwifi: mvm: add a debugfs hook to clear the monitor data") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.8e1597b62c70.I12ea71dd9b805b095c9fc12a10c9f34a4e801b61@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 83e3c11603622..55d035b896e91 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1479,6 +1479,13 @@ static ssize_t iwl_dbgfs_fw_dbg_clear_write(struct iwl_mvm *mvm, if (mvm->trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_9000) return -EOPNOTSUPP; + /* + * If the firmware is not running, silently succeed since there is + * no data to clear. + */ + if (!iwl_mvm_firmware_running(mvm)) + return count; + mutex_lock(&mvm->mutex); iwl_fw_dbg_clear_monitor_buf(&mvm->fwrt); mutex_unlock(&mvm->mutex); From a03e2082e678ea10d0d8bdf3ed933eb05a8ddbb0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 9 Feb 2025 14:34:50 +0200 Subject: [PATCH 023/503] wifi: iwlwifi: mvm: use the right version of the rate API The firmware uses the newer version of the API in recent devices. For older devices, we translate the rate to the new format. Don't parse the rate with old parsing macros. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.13d70cdcbb4e.Ic92193bce4013b70a823cfef250ee79c16cf7c17@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 09fd8752046ee..14ea89f931bbf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -995,7 +995,7 @@ iwl_mvm_decode_he_phy_ru_alloc(struct iwl_mvm_rx_phy_data *phy_data, */ u8 ru = le32_get_bits(phy_data->d1, IWL_RX_PHY_DATA1_HE_RU_ALLOC_MASK); u32 rate_n_flags = phy_data->rate_n_flags; - u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK_V1; + u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK; u8 offs = 0; rx_status->bw = RATE_INFO_BW_HE_RU; @@ -1050,13 +1050,13 @@ iwl_mvm_decode_he_phy_ru_alloc(struct iwl_mvm_rx_phy_data *phy_data, if (he_mu) he_mu->flags2 |= - le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK_V1, + le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK, rate_n_flags), IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW); - else if (he_type == RATE_MCS_HE_TYPE_TRIG_V1) + else if (he_type == RATE_MCS_HE_TYPE_TRIG) he->data6 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_KNOWN) | - le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK_V1, + le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK, rate_n_flags), IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW); } From e0dc2c1bef722cbf16ae557690861e5f91208129 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 9 Feb 2025 14:34:51 +0200 Subject: [PATCH 024/503] wifi: iwlwifi: limit printed string from FW file There's no guarantee here that the file is always with a NUL-termination, so reading the string may read beyond the end of the TLV. If that's the last TLV in the file, it can perhaps even read beyond the end of the file buffer. Fix that by limiting the print format to the size of the buffer we have. Fixes: aee1b6385e29 ("iwlwifi: support fseq tlv and print fseq version") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.cb5f9d0c2f5d.Idec695d53c6c2234aade306f7647b576c7e3d928@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index d3a65f33097cb..352b6e73e08f3 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1181,7 +1181,7 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, if (tlv_len != sizeof(*fseq_ver)) goto invalid_tlv_len; - IWL_INFO(drv, "TLV_FW_FSEQ_VERSION: %s\n", + IWL_INFO(drv, "TLV_FW_FSEQ_VERSION: %.32s\n", fseq_ver->version); } break; From 3b08e608d50c44ca1135beed179f266aa0461da7 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 9 Feb 2025 14:34:52 +0200 Subject: [PATCH 025/503] wifi: iwlwifi: Free pages allocated when failing to build A-MSDU When failing to prepare the data needed for A-MSDU transmission, the memory allocated for the TSO management was not freed. Fix it. Fixes: 7f5e3038f029 ("wifi: iwlwifi: map entire SKB when sending AMSDUs") Signed-off-by: Ilan Peer Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.bc27fad9b3d5.Ibf43dd18fb652b1a59061204e081f11c9fa34a3f@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 1f483f15c2383..dce5096db82b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -347,6 +347,7 @@ iwl_tfh_tfd *iwl_txq_gen2_build_tx_amsdu(struct iwl_trans *trans, return tfd; out_err: + iwl_pcie_free_tso_pages(trans, skb, out_meta); iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); return NULL; } From 3640dbc1f75ce15d128ea4af44226960d894f3fd Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 9 Feb 2025 14:34:53 +0200 Subject: [PATCH 026/503] wifi: iwlwifi: Fix A-MSDU TSO preparation The TSO preparation assumed that the skb head contained the headers while the rest of the data was in the fragments. Since this is not always true, e.g., it is possible that the data was linearised, modify the TSO preparation to start the data processing after the network headers. Fixes: 7f5e3038f029 ("wifi: iwlwifi: map entire SKB when sending AMSDUs") Signed-off-by: Ilan Peer Reviewed-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.75769a4769bf.Iaf79e8538093cdf8c446c292cc96164ad6498f61@changeid Signed-off-by: Johannes Berg --- .../wireless/intel/iwlwifi/pcie/internal.h | 5 +++-- .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 5 +++-- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 20 +++++++++++-------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 856b7e9f717d5..45460f93d24ad 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2003-2015, 2018-2024 Intel Corporation + * Copyright (C) 2003-2015, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -646,7 +646,8 @@ dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, unsigned int len); struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, - u8 **hdr, unsigned int hdr_room); + u8 **hdr, unsigned int hdr_room, + unsigned int offset); void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index dce5096db82b6..401919f9fe88e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020, 2023-2024 Intel Corporation + * Copyright (C) 2018-2020, 2023-2025 Intel Corporation */ #include #include @@ -188,7 +188,8 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); /* Our device supports 9 segments at most, it will fit in 1 page */ - sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room); + sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room, + snap_ip_tcp_hdrlen + hdr_len); if (!sgt) return -ENOMEM; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 334ebd4c12fa7..7b6071a59b694 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2003-2014, 2018-2021, 2023-2024 Intel Corporation + * Copyright (C) 2003-2014, 2018-2021, 2023-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -1855,6 +1855,7 @@ dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, * @cmd_meta: command meta to store the scatter list information for unmapping * @hdr: output argument for TSO headers * @hdr_room: requested length for TSO headers + * @offset: offset into the data from which mapping should start * * Allocate space for a scatter gather list and TSO headers and map the SKB * using the scatter gather list. The SKB is unmapped again when the page is @@ -1864,18 +1865,20 @@ dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, */ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, - u8 **hdr, unsigned int hdr_room) + u8 **hdr, unsigned int hdr_room, + unsigned int offset) { struct sg_table *sgt; + unsigned int n_segments; if (WARN_ON_ONCE(skb_has_frag_list(skb))) return NULL; + n_segments = DIV_ROUND_UP(skb->len - offset, skb_shinfo(skb)->gso_size); *hdr = iwl_pcie_get_page_hdr(trans, hdr_room + __alignof__(struct sg_table) + sizeof(struct sg_table) + - (skb_shinfo(skb)->nr_frags + 1) * - sizeof(struct scatterlist), + n_segments * sizeof(struct scatterlist), skb); if (!*hdr) return NULL; @@ -1883,11 +1886,11 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, sgt = (void *)PTR_ALIGN(*hdr + hdr_room, __alignof__(struct sg_table)); sgt->sgl = (void *)(sgt + 1); - sg_init_table(sgt->sgl, skb_shinfo(skb)->nr_frags + 1); + sg_init_table(sgt->sgl, n_segments); /* Only map the data, not the header (it is copied to the TSO page) */ - sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, skb_headlen(skb), - skb->data_len); + sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, offset, + skb->len - offset); if (WARN_ON_ONCE(sgt->orig_nents <= 0)) return NULL; @@ -1939,7 +1942,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len; /* Our device supports 9 segments at most, it will fit in 1 page */ - sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room); + sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room, + snap_ip_tcp_hdrlen + hdr_len + iv_len); if (!sgt) return -ENOMEM; From 59115e2e25f42924181055ed7cc1d123af7598b7 Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Thu, 16 Jan 2025 06:12:24 +0000 Subject: [PATCH 027/503] x86/hyperv/vtl: Stop kernel from probing VTL0 low memory For Linux, running in Hyper-V VTL (Virtual Trust Level), kernel in VTL2 tries to access VTL0 low memory in probe_roms. This memory is not described in the e820 map. Initialize probe_roms call to no-ops during boot for VTL2 kernel to avoid this. The issue got identified in OpenVMM which detects invalid accesses initiated from kernel running in VTL2. Co-developed-by: Saurabh Sengar Signed-off-by: Saurabh Sengar Signed-off-by: Naman Jain Tested-by: Roman Kisel Reviewed-by: Roman Kisel Link: https://lore.kernel.org/r/20250116061224.1701-1-namjain@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250116061224.1701-1-namjain@linux.microsoft.com> --- arch/x86/hyperv/hv_vtl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 4e1b1e3b56584..3f4e20d7b724b 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -30,6 +30,7 @@ void __init hv_vtl_init_platform(void) x86_platform.realmode_init = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_mptable = x86_init_noop; From 99e6ea912340d6a262a60d5dd0c87c5e7b2d6ff2 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 7 Feb 2025 13:21:45 +0100 Subject: [PATCH 028/503] spi: atmel-quadspi: remove references to runtime PM on error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to call runtime PM put APIs on error path of `atmel_qspi_sama7g5_transfer()` as the caller (`atmel_qspi_exec_op()`) of it will take care of this if needed. Fixes: 5af42209a4d2 ("spi: atmel-quadspi: Add support for sama7g5 QSPI") Signed-off-by: Claudiu Beznea Signed-off-by: Durai Manickam KR Reported-by: Alexander Dahl Closes: https://lore.kernel.org/linux-spi/20250109-carat-festivity-5f088e1add3c@thorsis.com/ [ csokas.bence: Rebase and clarify msg, fix/add tags ] Signed-off-by: Bence Csókás Link: https://patch.msgid.link/20250207122145.162183-2-csokas.bence@prolan.hu Signed-off-by: Mark Brown --- drivers/spi/atmel-quadspi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index d8c9be64d006a..244ac01068629 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -930,11 +930,8 @@ static int atmel_qspi_sama7g5_transfer(struct spi_mem *mem, /* Release the chip-select. */ ret = atmel_qspi_reg_sync(aq); - if (ret) { - pm_runtime_mark_last_busy(&aq->pdev->dev); - pm_runtime_put_autosuspend(&aq->pdev->dev); + if (ret) return ret; - } atmel_qspi_write(QSPI_CR_LASTXFER, aq, QSPI_CR); return atmel_qspi_wait_for_completion(aq, QSPI_SR_CSRA); From 3d7a20f9ba7b09a35df4bdb5f0ddb2a0c8a4f39e Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 13 Feb 2025 08:03:41 +0000 Subject: [PATCH 029/503] MAINTAINERS: add tambarus as R for Samsung SPI I'm currently working on a Samsung SoC which includes SPI. I'd like to be Cc'ed to further contributions and help on reviewing them. Add me as reviewer. Signed-off-by: Tudor Ambarus Acked-by: Andi Shyti Reviewed-by: Sam Protsenko Link: https://patch.msgid.link/20250213-gs101-spi-r-v1-1-1e3ab8096873@linaro.org Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index efee40ea589f7..3a5d1d5891e74 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21075,6 +21075,7 @@ F: include/linux/clk/samsung.h SAMSUNG SPI DRIVERS M: Andi Shyti +R: Tudor Ambarus L: linux-spi@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained From 7103f0589ac220eac3d2b1e8411494b31b883d06 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 13:14:34 +0100 Subject: [PATCH 030/503] x86/microcode/AMD: Remove ugly linebreak in __verify_patch_section() signature No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250211163648.30531-2-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index a5dac7f3c0a07..4a62625c311a6 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -246,8 +246,7 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size) * On success, @sh_psize returns the patch size according to the section header, * to the caller. */ -static bool -__verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize) +static bool __verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize) { u32 p_type, p_size; const u32 *hdr; From 3ef0740d10b005a45e8ae5b4b7b5d37bfddf63c0 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 12:23:47 +0100 Subject: [PATCH 031/503] x86/microcode/AMD: Remove unused save_microcode_in_initrd_amd() declarations Commit a7939f016720 ("x86/microcode/amd: Cache builtin/initrd microcode early") renamed it to save_microcode_in_initrd() and made it static. Zap the forgotten declarations. No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250211163648.30531-3-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- arch/x86/kernel/cpu/microcode/internal.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 4a62625c311a6..f831c06029948 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -517,7 +517,7 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) * patch container file in initrd, traverse equivalent cpu table, look for a * matching microcode patch, and update, all in initrd memory in place. * When vmalloc() is available for use later -- on 64-bit during first AP load, - * and on 32-bit during save_microcode_in_initrd_amd() -- we can call + * and on 32-bit during save_microcode_in_initrd() -- we can call * load_microcode_amd() to save equivalent cpu table and microcode patches in * kernel heap memory. * diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index 21776c529fa97..5df621752fefa 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -100,14 +100,12 @@ extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family); void load_ucode_amd_ap(unsigned int family); -int save_microcode_in_initrd_amd(unsigned int family); void reload_ucode_amd(unsigned int cpu); struct microcode_ops *init_amd_microcode(void); void exit_amd_microcode(void); #else /* CONFIG_CPU_SUP_AMD */ static inline void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family) { } static inline void load_ucode_amd_ap(unsigned int family) { } -static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } static inline void reload_ucode_amd(unsigned int cpu) { } static inline struct microcode_ops *init_amd_microcode(void) { return NULL; } static inline void exit_amd_microcode(void) { } From dc15675074dcfd79a2f10a6e39f96b0244961a01 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 12:46:45 +0100 Subject: [PATCH 032/503] x86/microcode/AMD: Merge early_apply_microcode() into its single callsite No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250211163648.30531-4-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 60 +++++++++++++---------------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index f831c06029948..90f93b3ca9dbb 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -512,39 +512,6 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) return true; } -/* - * Early load occurs before we can vmalloc(). So we look for the microcode - * patch container file in initrd, traverse equivalent cpu table, look for a - * matching microcode patch, and update, all in initrd memory in place. - * When vmalloc() is available for use later -- on 64-bit during first AP load, - * and on 32-bit during save_microcode_in_initrd() -- we can call - * load_microcode_amd() to save equivalent cpu table and microcode patches in - * kernel heap memory. - * - * Returns true if container found (sets @desc), false otherwise. - */ -static bool early_apply_microcode(u32 old_rev, void *ucode, size_t size) -{ - struct cont_desc desc = { 0 }; - struct microcode_amd *mc; - - scan_containers(ucode, size, &desc); - - mc = desc.mc; - if (!mc) - return false; - - /* - * Allow application of the same revision to pick up SMT-specific - * changes even if the revision of the other SMT thread is already - * up-to-date. - */ - if (old_rev > mc->hdr.patch_id) - return false; - - return __apply_microcode_amd(mc, desc.psize); -} - static bool get_builtin_microcode(struct cpio_data *cp) { char fw_name[36] = "amd-ucode/microcode_amd.bin"; @@ -582,8 +549,19 @@ static bool __init find_blobs_in_containers(struct cpio_data *ret) return found; } +/* + * Early load occurs before we can vmalloc(). So we look for the microcode + * patch container file in initrd, traverse equivalent cpu table, look for a + * matching microcode patch, and update, all in initrd memory in place. + * When vmalloc() is available for use later -- on 64-bit during first AP load, + * and on 32-bit during save_microcode_in_initrd() -- we can call + * load_microcode_amd() to save equivalent cpu table and microcode patches in + * kernel heap memory. + */ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_eax) { + struct cont_desc desc = { }; + struct microcode_amd *mc; struct cpio_data cp = { }; u32 dummy; @@ -597,7 +575,21 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ if (!find_blobs_in_containers(&cp)) return; - if (early_apply_microcode(ed->old_rev, cp.data, cp.size)) + scan_containers(cp.data, cp.size, &desc); + + mc = desc.mc; + if (!mc) + return; + + /* + * Allow application of the same revision to pick up SMT-specific + * changes even if the revision of the other SMT thread is already + * up-to-date. + */ + if (ed->old_rev > mc->hdr.patch_id) + return; + + if (__apply_microcode_amd(mc, desc.psize)) native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); } From b39c387164879eef71886fc93cee5ca7dd7bf500 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 12:51:37 +0100 Subject: [PATCH 033/503] x86/microcode/AMD: Get rid of the _load_microcode_amd() forward declaration Simply move save_microcode_in_initrd() down. No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250211163648.30531-5-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 54 ++++++++++++++--------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 90f93b3ca9dbb..adfea4d0d1297 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -593,34 +593,6 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); } -static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size); - -static int __init save_microcode_in_initrd(void) -{ - unsigned int cpuid_1_eax = native_cpuid_eax(1); - struct cpuinfo_x86 *c = &boot_cpu_data; - struct cont_desc desc = { 0 }; - enum ucode_state ret; - struct cpio_data cp; - - if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) - return 0; - - if (!find_blobs_in_containers(&cp)) - return -EINVAL; - - scan_containers(cp.data, cp.size, &desc); - if (!desc.mc) - return -EINVAL; - - ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size); - if (ret > UCODE_UPDATED) - return -EINVAL; - - return 0; -} -early_initcall(save_microcode_in_initrd); - static inline bool patch_cpus_equivalent(struct ucode_patch *p, struct ucode_patch *n, bool ignore_stepping) @@ -1004,6 +976,32 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz return ret; } +static int __init save_microcode_in_initrd(void) +{ + unsigned int cpuid_1_eax = native_cpuid_eax(1); + struct cpuinfo_x86 *c = &boot_cpu_data; + struct cont_desc desc = { 0 }; + enum ucode_state ret; + struct cpio_data cp; + + if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) + return 0; + + if (!find_blobs_in_containers(&cp)) + return -EINVAL; + + scan_containers(cp.data, cp.size, &desc); + if (!desc.mc) + return -EINVAL; + + ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size); + if (ret > UCODE_UPDATED) + return -EINVAL; + + return 0; +} +early_initcall(save_microcode_in_initrd); + /* * AMD microcode firmware naming convention, up to family 15h they are in * the legacy file: From 037e81fb9d2dfe7b31fd97e5f578854e38f09887 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 13:02:32 +0100 Subject: [PATCH 034/503] x86/microcode/AMD: Add get_patch_level() Put the MSR_AMD64_PATCH_LEVEL reading of the current microcode revision the hw has, into a separate function. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250211163648.30531-6-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 46 +++++++++++++++-------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index adfea4d0d1297..31f90e129b083 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -145,6 +145,15 @@ ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; */ static u32 bsp_cpuid_1_eax __ro_after_init; +static u32 get_patch_level(void) +{ + u32 rev, dummy __always_unused; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + + return rev; +} + static union cpuid_1_eax ucode_rev_to_cpuid(unsigned int val) { union zen_patch_rev p; @@ -483,10 +492,10 @@ static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc) } } -static bool __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) +static bool __apply_microcode_amd(struct microcode_amd *mc, u32 *cur_rev, + unsigned int psize) { unsigned long p_addr = (unsigned long)&mc->hdr.data_code; - u32 rev, dummy; native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr); @@ -504,9 +513,8 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) } /* verify patch application was successful */ - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - - if (rev != mc->hdr.patch_id) + *cur_rev = get_patch_level(); + if (*cur_rev != mc->hdr.patch_id) return false; return true; @@ -563,11 +571,12 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ struct cont_desc desc = { }; struct microcode_amd *mc; struct cpio_data cp = { }; - u32 dummy; + u32 rev; bsp_cpuid_1_eax = cpuid_1_eax; - native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy); + rev = get_patch_level(); + ed->old_rev = rev; /* Needed in load_microcode_amd() */ ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax; @@ -589,8 +598,8 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ if (ed->old_rev > mc->hdr.patch_id) return; - if (__apply_microcode_amd(mc, desc.psize)) - native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); + if (__apply_microcode_amd(mc, &rev, desc.psize)) + ed->new_rev = rev; } static inline bool patch_cpus_equivalent(struct ucode_patch *p, @@ -692,14 +701,9 @@ static void free_cache(void) static struct ucode_patch *find_patch(unsigned int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - u32 rev, dummy __always_unused; u16 equiv_id = 0; - /* fetch rev if not populated yet: */ - if (!uci->cpu_sig.rev) { - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - uci->cpu_sig.rev = rev; - } + uci->cpu_sig.rev = get_patch_level(); if (x86_family(bsp_cpuid_1_eax) < 0x17) { equiv_id = find_equiv_id(&equiv_table, uci->cpu_sig.sig); @@ -722,22 +726,20 @@ void reload_ucode_amd(unsigned int cpu) mc = p->data; - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - + rev = get_patch_level(); if (rev < mc->hdr.patch_id) { - if (__apply_microcode_amd(mc, p->size)) - pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id); + if (__apply_microcode_amd(mc, &rev, p->size)) + pr_info_once("reload revision: 0x%08x\n", rev); } } static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { - struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct ucode_patch *p; csig->sig = cpuid_eax(0x00000001); - csig->rev = c->microcode; + csig->rev = get_patch_level(); /* * a patch could have been loaded early, set uci->mc so that @@ -778,7 +780,7 @@ static enum ucode_state apply_microcode_amd(int cpu) goto out; } - if (!__apply_microcode_amd(mc_amd, p->size)) { + if (!__apply_microcode_amd(mc_amd, &rev, p->size)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); return UCODE_ERROR; From 0c28e4d1e10d2aae608094620bb386e6fd73d55e Mon Sep 17 00:00:00 2001 From: Stuart Hayhurst Date: Thu, 13 Feb 2025 13:38:49 +0000 Subject: [PATCH 035/503] HID: corsair-void: Update power supply values with a unified work handler corsair_void_process_receiver can be called from an interrupt context, locking battery_mutex in it was causing a kernel panic. Fix it by moving the critical section into its own work, sharing this work with battery_add_work and battery_remove_work to remove the need for any locking Closes: https://bugzilla.suse.com/show_bug.cgi?id=1236843 Fixes: 6ea2a6fd3872 ("HID: corsair-void: Add Corsair Void headset family driver") Cc: stable@vger.kernel.org Signed-off-by: Stuart Hayhurst Reviewed-by: Jiri Slaby Signed-off-by: Jiri Kosina --- drivers/hid/hid-corsair-void.c | 83 ++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/drivers/hid/hid-corsair-void.c b/drivers/hid/hid-corsair-void.c index 56e858066c3c3..afbd67aa97192 100644 --- a/drivers/hid/hid-corsair-void.c +++ b/drivers/hid/hid-corsair-void.c @@ -71,11 +71,9 @@ #include #include -#include #include #include #include -#include #include #include #include @@ -120,6 +118,12 @@ enum { CORSAIR_VOID_BATTERY_CHARGING = 5, }; +enum { + CORSAIR_VOID_ADD_BATTERY = 0, + CORSAIR_VOID_REMOVE_BATTERY = 1, + CORSAIR_VOID_UPDATE_BATTERY = 2, +}; + static enum power_supply_property corsair_void_battery_props[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_PRESENT, @@ -155,12 +159,12 @@ struct corsair_void_drvdata { struct power_supply *battery; struct power_supply_desc battery_desc; - struct mutex battery_mutex; struct delayed_work delayed_status_work; struct delayed_work delayed_firmware_work; - struct work_struct battery_remove_work; - struct work_struct battery_add_work; + + unsigned long battery_work_flags; + struct work_struct battery_work; }; /* @@ -260,11 +264,9 @@ static void corsair_void_process_receiver(struct corsair_void_drvdata *drvdata, /* Inform power supply if battery values changed */ if (memcmp(&orig_battery_data, battery_data, sizeof(*battery_data))) { - scoped_guard(mutex, &drvdata->battery_mutex) { - if (drvdata->battery) { - power_supply_changed(drvdata->battery); - } - } + set_bit(CORSAIR_VOID_UPDATE_BATTERY, + &drvdata->battery_work_flags); + schedule_work(&drvdata->battery_work); } } @@ -536,29 +538,11 @@ static void corsair_void_firmware_work_handler(struct work_struct *work) } -static void corsair_void_battery_remove_work_handler(struct work_struct *work) -{ - struct corsair_void_drvdata *drvdata; - - drvdata = container_of(work, struct corsair_void_drvdata, - battery_remove_work); - scoped_guard(mutex, &drvdata->battery_mutex) { - if (drvdata->battery) { - power_supply_unregister(drvdata->battery); - drvdata->battery = NULL; - } - } -} - -static void corsair_void_battery_add_work_handler(struct work_struct *work) +static void corsair_void_add_battery(struct corsair_void_drvdata *drvdata) { - struct corsair_void_drvdata *drvdata; struct power_supply_config psy_cfg = {}; struct power_supply *new_supply; - drvdata = container_of(work, struct corsair_void_drvdata, - battery_add_work); - guard(mutex)(&drvdata->battery_mutex); if (drvdata->battery) return; @@ -583,16 +567,42 @@ static void corsair_void_battery_add_work_handler(struct work_struct *work) drvdata->battery = new_supply; } +static void corsair_void_battery_work_handler(struct work_struct *work) +{ + struct corsair_void_drvdata *drvdata = container_of(work, + struct corsair_void_drvdata, battery_work); + + bool add_battery = test_and_clear_bit(CORSAIR_VOID_ADD_BATTERY, + &drvdata->battery_work_flags); + bool remove_battery = test_and_clear_bit(CORSAIR_VOID_REMOVE_BATTERY, + &drvdata->battery_work_flags); + bool update_battery = test_and_clear_bit(CORSAIR_VOID_UPDATE_BATTERY, + &drvdata->battery_work_flags); + + if (add_battery && !remove_battery) { + corsair_void_add_battery(drvdata); + } else if (remove_battery && !add_battery && drvdata->battery) { + power_supply_unregister(drvdata->battery); + drvdata->battery = NULL; + } + + if (update_battery && drvdata->battery) + power_supply_changed(drvdata->battery); + +} + static void corsair_void_headset_connected(struct corsair_void_drvdata *drvdata) { - schedule_work(&drvdata->battery_add_work); + set_bit(CORSAIR_VOID_ADD_BATTERY, &drvdata->battery_work_flags); + schedule_work(&drvdata->battery_work); schedule_delayed_work(&drvdata->delayed_firmware_work, msecs_to_jiffies(100)); } static void corsair_void_headset_disconnected(struct corsair_void_drvdata *drvdata) { - schedule_work(&drvdata->battery_remove_work); + set_bit(CORSAIR_VOID_REMOVE_BATTERY, &drvdata->battery_work_flags); + schedule_work(&drvdata->battery_work); corsair_void_set_unknown_wireless_data(drvdata); corsair_void_set_unknown_batt(drvdata); @@ -678,13 +688,7 @@ static int corsair_void_probe(struct hid_device *hid_dev, drvdata->battery_desc.get_property = corsair_void_battery_get_property; drvdata->battery = NULL; - INIT_WORK(&drvdata->battery_remove_work, - corsair_void_battery_remove_work_handler); - INIT_WORK(&drvdata->battery_add_work, - corsair_void_battery_add_work_handler); - ret = devm_mutex_init(drvdata->dev, &drvdata->battery_mutex); - if (ret) - return ret; + INIT_WORK(&drvdata->battery_work, corsair_void_battery_work_handler); ret = sysfs_create_group(&hid_dev->dev.kobj, &corsair_void_attr_group); if (ret) @@ -721,8 +725,7 @@ static void corsair_void_remove(struct hid_device *hid_dev) struct corsair_void_drvdata *drvdata = hid_get_drvdata(hid_dev); hid_hw_stop(hid_dev); - cancel_work_sync(&drvdata->battery_remove_work); - cancel_work_sync(&drvdata->battery_add_work); + cancel_work_sync(&drvdata->battery_work); if (drvdata->battery) power_supply_unregister(drvdata->battery); From 44afc10d4678d5a3a4ab8c25750be00f037298cf Mon Sep 17 00:00:00 2001 From: Ryan McClelland Date: Thu, 16 Jan 2025 22:49:24 -0800 Subject: [PATCH 036/503] HID: nintendo: fix gencon button events map This fixes the button event map to match the 3-button recommendation as well as the redundant 'z' in the button map events for the Sega MD/Gen 6 Button. Signed-off-by: Ryan McClelland Reviewed-by: Daniel J. Ogorchock Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 11ac246176ae1..839d5bcd72b1e 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -457,13 +457,13 @@ static const struct joycon_ctlr_button_mapping snescon_button_mappings[] = { }; static const struct joycon_ctlr_button_mapping gencon_button_mappings[] = { - { BTN_A, JC_BTN_A, }, - { BTN_B, JC_BTN_B, }, - { BTN_C, JC_BTN_R, }, - { BTN_X, JC_BTN_X, }, /* MD/GEN 6B Only */ - { BTN_Y, JC_BTN_Y, }, /* MD/GEN 6B Only */ - { BTN_Z, JC_BTN_L, }, /* MD/GEN 6B Only */ - { BTN_SELECT, JC_BTN_ZR, }, + { BTN_WEST, JC_BTN_A, }, /* A */ + { BTN_SOUTH, JC_BTN_B, }, /* B */ + { BTN_EAST, JC_BTN_R, }, /* C */ + { BTN_TL, JC_BTN_X, }, /* X MD/GEN 6B Only */ + { BTN_NORTH, JC_BTN_Y, }, /* Y MD/GEN 6B Only */ + { BTN_TR, JC_BTN_L, }, /* Z MD/GEN 6B Only */ + { BTN_SELECT, JC_BTN_ZR, }, /* Mode */ { BTN_START, JC_BTN_PLUS, }, { BTN_MODE, JC_BTN_HOME, }, { BTN_Z, JC_BTN_CAP, }, From 4bd0725c09f377ffaf22b834241f6c050742e4fc Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Tue, 18 Feb 2025 00:50:13 +0800 Subject: [PATCH 037/503] HID: google: fix unused variable warning under !CONFIG_ACPI As reported by the kernel test robot, the following warning occurs: >> drivers/hid/hid-google-hammer.c:261:36: warning: 'cbas_ec_acpi_ids' defined but not used [-Wunused-const-variable=] 261 | static const struct acpi_device_id cbas_ec_acpi_ids[] = { | ^~~~~~~~~~~~~~~~ The 'cbas_ec_acpi_ids' array is only used when CONFIG_ACPI is enabled. Wrapping its definition and 'MODULE_DEVICE_TABLE' in '#ifdef CONFIG_ACPI' prevents a compiler warning when ACPI is disabled. Fixes: eb1aac4c8744f75 ("HID: google: add support tablet mode switch for Whiskers") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501201141.jctFH5eB-lkp@intel.com/ Signed-off-by: Yu-Chun Lin Signed-off-by: Jiri Kosina --- drivers/hid/hid-google-hammer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 0f292b5d3e26d..eb6fd2dc75d0a 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -268,11 +268,13 @@ static void cbas_ec_remove(struct platform_device *pdev) mutex_unlock(&cbas_ec_reglock); } +#ifdef CONFIG_ACPI static const struct acpi_device_id cbas_ec_acpi_ids[] = { { "GOOG000B", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, cbas_ec_acpi_ids); +#endif #ifdef CONFIG_OF static const struct of_device_id cbas_ec_of_match[] = { From 823987841424289339fdb4ba90e6d2c3792836db Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Tue, 18 Feb 2025 14:37:29 +0800 Subject: [PATCH 038/503] HID: intel-ish-hid: Fix use-after-free issue in hid_ishtp_cl_remove() During the `rmmod` operation for the `intel_ishtp_hid` driver, a use-after-free issue can occur in the hid_ishtp_cl_remove() function. The function hid_ishtp_cl_deinit() is called before ishtp_hid_remove(), which can lead to accessing freed memory or resources during the removal process. Call Trace: ? ishtp_cl_send+0x168/0x220 [intel_ishtp] ? hid_output_report+0xe3/0x150 [hid] hid_ishtp_set_feature+0xb5/0x120 [intel_ishtp_hid] ishtp_hid_request+0x7b/0xb0 [intel_ishtp_hid] hid_hw_request+0x1f/0x40 [hid] sensor_hub_set_feature+0x11f/0x190 [hid_sensor_hub] _hid_sensor_power_state+0x147/0x1e0 [hid_sensor_trigger] hid_sensor_runtime_resume+0x22/0x30 [hid_sensor_trigger] sensor_hub_remove+0xa8/0xe0 [hid_sensor_hub] hid_device_remove+0x49/0xb0 [hid] hid_destroy_device+0x6f/0x90 [hid] ishtp_hid_remove+0x42/0x70 [intel_ishtp_hid] hid_ishtp_cl_remove+0x6b/0xb0 [intel_ishtp_hid] ishtp_cl_device_remove+0x4a/0x60 [intel_ishtp] ... Additionally, ishtp_hid_remove() is a HID level power off, which should occur before the ISHTP level disconnect. This patch resolves the issue by reordering the calls in hid_ishtp_cl_remove(). The function ishtp_hid_remove() is now called before hid_ishtp_cl_deinit(). Fixes: f645a90e8ff7 ("HID: intel-ish-hid: ishtp-hid-client: use helper functions for connection") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid-client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index cb04cd1d980bd..6550ad5bfbb53 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -832,9 +832,9 @@ static void hid_ishtp_cl_remove(struct ishtp_cl_device *cl_device) hid_ishtp_cl); dev_dbg(ishtp_device(cl_device), "%s\n", __func__); - hid_ishtp_cl_deinit(hid_ishtp_cl); ishtp_put_device(cl_device); ishtp_hid_remove(client_data); + hid_ishtp_cl_deinit(hid_ishtp_cl); hid_ishtp_cl = NULL; From 07583a0010696a17fb0942e0b499a62785c5fc9f Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Tue, 18 Feb 2025 14:37:30 +0800 Subject: [PATCH 039/503] HID: intel-ish-hid: Fix use-after-free issue in ishtp_hid_remove() The system can experience a random crash a few minutes after the driver is removed. This issue occurs due to improper handling of memory freeing in the ishtp_hid_remove() function. The function currently frees the `driver_data` directly within the loop that destroys the HID devices, which can lead to accessing freed memory. Specifically, `hid_destroy_device()` uses `driver_data` when it calls `hid_ishtp_set_feature()` to power off the sensor, so freeing `driver_data` beforehand can result in accessing invalid memory. This patch resolves the issue by storing the `driver_data` in a temporary variable before calling `hid_destroy_device()`, and then freeing the `driver_data` after the device is destroyed. Fixes: 0b28cb4bcb17 ("HID: intel-ish-hid: ISH HID client driver") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.c b/drivers/hid/intel-ish-hid/ishtp-hid.c index 00c6f0ebf3563..be2c62fc8251d 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid.c @@ -261,12 +261,14 @@ int ishtp_hid_probe(unsigned int cur_hid_dev, */ void ishtp_hid_remove(struct ishtp_cl_data *client_data) { + void *data; int i; for (i = 0; i < client_data->num_hid_devices; ++i) { if (client_data->hid_sensor_hubs[i]) { - kfree(client_data->hid_sensor_hubs[i]->driver_data); + data = client_data->hid_sensor_hubs[i]->driver_data; hid_destroy_device(client_data->hid_sensor_hubs[i]); + kfree(data); client_data->hid_sensor_hubs[i] = NULL; } } From a8e8ffcc3afce2ee5fb70162aeaef3f03573ee1e Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 9 Feb 2025 13:05:50 +0200 Subject: [PATCH 040/503] mei: me: add panther lake P DID Add Panther Lake P device id. Cc: stable Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250209110550.1582982-1-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index c3a6657dcd4a2..a5f88ec97df75 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -117,6 +117,8 @@ #define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ +#define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 6589635f8ba32..d6ff9d82ae94b 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -124,6 +124,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; From fdb1ada57cf8b8752cdf54f08709d76d74999544 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 14 Feb 2025 22:24:25 +0100 Subject: [PATCH 041/503] mei: vsc: Use "wakeuphostint" when getting the host wakeup GPIO The _CRS ACPI resources table has 2 entries for the host wakeup GPIO, the first one being a regular GpioIo () resource while the second one is a GpioInt () resource for the same pin. The acpi_gpio_mapping table used by vsc-tp.c maps the first Gpio () resource to "wakeuphost-gpios" where as the second GpioInt () entry is mapped to "wakeuphostint-gpios". Using "wakeuphost" to request the GPIO as was done until now, means that the gpiolib-acpi code does not know that the GPIO is active-low as that info is only available in the GpioInt () entry. Things were still working before due to the following happening: 1. Since the 2 entries point to the same pin they share a struct gpio_desc 2. The SPI core creates the SPI device vsc-tp.c binds to and calls acpi_dev_gpio_irq_get(). This does use the second entry and sets FLAG_ACTIVE_LOW in gpio_desc.flags . 3. vsc_tp_probe() requests the "wakeuphost" GPIO and inherits the active-low flag set by acpi_dev_gpio_irq_get() But there is a possible scenario where things do not work: 1. - 3. happen as above 4. After requesting the "wakeuphost" GPIO, the "resetfw" GPIO is requested next, but its USB GPIO controller is not available yet, so this call returns -EPROBE_DEFER. 5. The gpio_desc for "wakeuphost" is put() and during this the active-low flag is cleared from gpio_desc.flags . 6. Later on vsc_tp_probe() requests the "wakeuphost" GPIO again, but now it is not marked active-low. The difference can also be seen in /sys/kernel/debug/gpio, which contains the following line for this GPIO: gpio-535 ( |wakeuphost ) in hi IRQ ACTIVE LOW If the second scenario is hit the "ACTIVE LOW" at the end disappears and things do not work. Fix this by requesting the GPIO through the "wakeuphostint" mapping instead which provides active-low info without relying on acpi_dev_gpio_irq_get() pre-populating this info in the gpio_desc. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2316918 Signed-off-by: Hans de Goede Reviewed-by: Stanislaw Gruszka Tested-by: Sakari Ailus Fixes: 566f5ca97680 ("mei: Add transport driver for IVSC device") Cc: stable Link: https://lore.kernel.org/r/20250214212425.84021-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/vsc-tp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index 35d349fee7698..7be1649b19725 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -502,7 +502,7 @@ static int vsc_tp_probe(struct spi_device *spi) if (ret) return ret; - tp->wakeuphost = devm_gpiod_get(dev, "wakeuphost", GPIOD_IN); + tp->wakeuphost = devm_gpiod_get(dev, "wakeuphostint", GPIOD_IN); if (IS_ERR(tp->wakeuphost)) return PTR_ERR(tp->wakeuphost); From c90aad369899a607cfbc002bebeafd51e31900cd Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Thu, 13 Feb 2025 15:22:57 +0300 Subject: [PATCH 042/503] usb: atm: cxacru: fix a flaw in existing endpoint checks Syzbot once again identified a flaw in usb endpoint checking, see [1]. This time the issue stems from a commit authored by me (2eabb655a968 ("usb: atm: cxacru: fix endpoint checking in cxacru_bind()")). While using usb_find_common_endpoints() may usually be enough to discard devices with wrong endpoints, in this case one needs more than just finding and identifying the sufficient number of endpoints of correct types - one needs to check the endpoint's address as well. Since cxacru_bind() fills URBs with CXACRU_EP_CMD address in mind, switch the endpoint verification approach to usb_check_XXX_endpoints() instead to fix incomplete ep testing. [1] Syzbot report: usb 5-1: BOGUS urb xfer, pipe 3 != type 1 WARNING: CPU: 0 PID: 1378 at drivers/usb/core/urb.c:504 usb_submit_urb+0xc4e/0x18c0 drivers/usb/core/urb.c:503 ... RIP: 0010:usb_submit_urb+0xc4e/0x18c0 drivers/usb/core/urb.c:503 ... Call Trace: cxacru_cm+0x3c8/0xe50 drivers/usb/atm/cxacru.c:649 cxacru_card_status drivers/usb/atm/cxacru.c:760 [inline] cxacru_bind+0xcf9/0x1150 drivers/usb/atm/cxacru.c:1223 usbatm_usb_probe+0x314/0x1d30 drivers/usb/atm/usbatm.c:1058 cxacru_usb_probe+0x184/0x220 drivers/usb/atm/cxacru.c:1377 usb_probe_interface+0x641/0xbb0 drivers/usb/core/driver.c:396 really_probe+0x2b9/0xad0 drivers/base/dd.c:658 __driver_probe_device+0x1a2/0x390 drivers/base/dd.c:800 driver_probe_device+0x50/0x430 drivers/base/dd.c:830 ... Reported-and-tested-by: syzbot+ccbbc229a024fa3e13b5@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ccbbc229a024fa3e13b5 Fixes: 2eabb655a968 ("usb: atm: cxacru: fix endpoint checking in cxacru_bind()") Cc: stable@kernel.org Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20250213122259.730772-1-n.zhandarovich@fintech.ru Signed-off-by: Greg Kroah-Hartman --- drivers/usb/atm/cxacru.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index 0dd85d2635b99..47d06af33747d 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -1131,7 +1131,10 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, struct cxacru_data *instance; struct usb_device *usb_dev = interface_to_usbdev(intf); struct usb_host_endpoint *cmd_ep = usb_dev->ep_in[CXACRU_EP_CMD]; - struct usb_endpoint_descriptor *in, *out; + static const u8 ep_addrs[] = { + CXACRU_EP_CMD + USB_DIR_IN, + CXACRU_EP_CMD + USB_DIR_OUT, + 0}; int ret; /* instance init */ @@ -1179,13 +1182,11 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, } if (usb_endpoint_xfer_int(&cmd_ep->desc)) - ret = usb_find_common_endpoints(intf->cur_altsetting, - NULL, NULL, &in, &out); + ret = usb_check_int_endpoints(intf, ep_addrs); else - ret = usb_find_common_endpoints(intf->cur_altsetting, - &in, &out, NULL, NULL); + ret = usb_check_bulk_endpoints(intf, ep_addrs); - if (ret) { + if (!ret) { usb_err(usbatm_instance, "cxacru_bind: interface has incorrect endpoints\n"); ret = -ENODEV; goto fail; From 17c2c87c37862c3e95b55f660681cc6e8d66660e Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Wed, 12 Feb 2025 15:38:40 +0530 Subject: [PATCH 043/503] usb: gadget: u_ether: Set is_suspend flag if remote wakeup fails Currently while UDC suspends, u_ether attempts to remote wakeup the host if there are any pending transfers. However, if remote wakeup fails, the UDC remains suspended but the is_suspend flag is not set. And since is_suspend flag isn't set, the subsequent eth_start_xmit() would queue USB requests to suspended UDC. To fix this, bail out from gether_suspend() only if remote wakeup operation is successful. Cc: stable Fixes: 0a1af6dfa077 ("usb: gadget: f_ecm: Add suspend/resume and remote wakeup support") Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/20250212100840.3812153-1-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 09e2838917e29..f58590bf5e02f 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -1052,8 +1052,8 @@ void gether_suspend(struct gether *link) * There is a transfer in progress. So we trigger a remote * wakeup to inform the host. */ - ether_wakeup_host(dev->port_usb); - return; + if (!ether_wakeup_host(dev->port_usb)) + return; } spin_lock_irqsave(&dev->lock, flags); link->is_suspend = true; From 40e89ff5750fca2c1d6da93f98a2038716bba86c Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Mon, 17 Feb 2025 17:33:28 +0530 Subject: [PATCH 044/503] usb: gadget: Set self-powered based on MaxPower and bmAttributes Currently the USB gadget will be set as bus-powered based solely on whether its bMaxPower is greater than 100mA, but this may miss devices that may legitimately draw less than 100mA but still want to report as bus-powered. Similarly during suspend & resume, USB gadget is incorrectly marked as bus/self powered without checking the bmAttributes field. Fix these by configuring the USB gadget as self or bus powered based on bmAttributes, and explicitly set it as bus-powered if it draws more than 100mA. Cc: stable Fixes: 5e5caf4fa8d3 ("usb: gadget: composite: Inform controller driver of self-powered") Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/20250217120328.2446639-1-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index bdda8c74602de..1fb28bbf6c458 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1050,10 +1050,11 @@ static int set_config(struct usb_composite_dev *cdev, else usb_gadget_set_remote_wakeup(gadget, 0); done: - if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) - usb_gadget_set_selfpowered(gadget); - else + if (power > USB_SELF_POWER_VBUS_MAX_DRAW || + !(c->bmAttributes & USB_CONFIG_ATT_SELFPOWER)) usb_gadget_clear_selfpowered(gadget); + else + usb_gadget_set_selfpowered(gadget); usb_gadget_vbus_draw(gadget, power); if (result >= 0 && cdev->delayed_status) @@ -2615,7 +2616,9 @@ void composite_suspend(struct usb_gadget *gadget) cdev->suspended = 1; - usb_gadget_set_selfpowered(gadget); + if (cdev->config->bmAttributes & USB_CONFIG_ATT_SELFPOWER) + usb_gadget_set_selfpowered(gadget); + usb_gadget_vbus_draw(gadget, 2); } @@ -2649,8 +2652,11 @@ void composite_resume(struct usb_gadget *gadget) else maxpower = min(maxpower, 900U); - if (maxpower > USB_SELF_POWER_VBUS_MAX_DRAW) + if (maxpower > USB_SELF_POWER_VBUS_MAX_DRAW || + !(cdev->config->bmAttributes & USB_CONFIG_ATT_SELFPOWER)) usb_gadget_clear_selfpowered(gadget); + else + usb_gadget_set_selfpowered(gadget); usb_gadget_vbus_draw(gadget, maxpower); } else { From d6b82dafd17db0658f089b9cdec573982ca82bc5 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 19 Feb 2025 12:47:00 +0100 Subject: [PATCH 045/503] usb: typec: tcpci_rt1711h: Unmask alert interrupts to fix functionality During probe, the TCPC alert interrupts are getting masked to avoid unwanted interrupts during chip setup: this is ok to do but there is no unmasking happening at any later time, which means that the chip will not raise any interrupt, essentially making it not functional as, while internally it does perform all of the intended functions, it won't signal anything to the outside. Unmask the alert interrupts to fix functionality. Fixes: ce08eaeb6388 ("staging: typec: rt1711h typec chip driver") Cc: stable Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20250219114700.41700-1-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci_rt1711h.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpci_rt1711h.c b/drivers/usb/typec/tcpm/tcpci_rt1711h.c index 64f6dd0dc6609..88c50b984e8a3 100644 --- a/drivers/usb/typec/tcpm/tcpci_rt1711h.c +++ b/drivers/usb/typec/tcpm/tcpci_rt1711h.c @@ -334,6 +334,11 @@ static int rt1711h_probe(struct i2c_client *client) { int ret; struct rt1711h_chip *chip; + const u16 alert_mask = TCPC_ALERT_TX_SUCCESS | TCPC_ALERT_TX_DISCARDED | + TCPC_ALERT_TX_FAILED | TCPC_ALERT_RX_HARD_RST | + TCPC_ALERT_RX_STATUS | TCPC_ALERT_POWER_STATUS | + TCPC_ALERT_CC_STATUS | TCPC_ALERT_RX_BUF_OVF | + TCPC_ALERT_FAULT; chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (!chip) @@ -382,6 +387,12 @@ static int rt1711h_probe(struct i2c_client *client) dev_name(chip->dev), chip); if (ret < 0) return ret; + + /* Enable alert interrupts */ + ret = rt1711h_write16(chip, TCPC_ALERT_MASK, alert_mask); + if (ret < 0) + return ret; + enable_irq_wake(client->irq); return 0; From 976e7e9bdc7719a023a4ecccd2e3daec9ab20a40 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Mon, 17 Feb 2025 13:54:39 +0300 Subject: [PATCH 046/503] acpi: typec: ucsi: Introduce a ->poll_cci method For the ACPI backend of UCSI the UCSI "registers" are just a memory copy of the register values in an opregion. The ACPI implementation in the BIOS ensures that the opregion contents are synced to the embedded controller and it ensures that the registers (in particular CCI) are synced back to the opregion on notifications. While there is an ACPI call that syncs the actual registers to the opregion there is rarely a need to do this and on some ACPI implementations it actually breaks in various interesting ways. The only reason to force a sync from the embedded controller is to poll CCI while notifications are disabled. Only the ucsi core knows if this is the case and guessing based on the current command is suboptimal, i.e. leading to the following spurious assertion splat: WARNING: CPU: 3 PID: 76 at drivers/usb/typec/ucsi/ucsi.c:1388 ucsi_reset_ppm+0x1b4/0x1c0 [typec_ucsi] CPU: 3 UID: 0 PID: 76 Comm: kworker/3:0 Not tainted 6.12.11-200.fc41.x86_64 #1 Hardware name: LENOVO 21D0/LNVNB161216, BIOS J6CN45WW 03/17/2023 Workqueue: events_long ucsi_init_work [typec_ucsi] RIP: 0010:ucsi_reset_ppm+0x1b4/0x1c0 [typec_ucsi] Call Trace: ucsi_init_work+0x3c/0xac0 [typec_ucsi] process_one_work+0x179/0x330 worker_thread+0x252/0x390 kthread+0xd2/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Thus introduce a ->poll_cci() method that works like ->read_cci() with an additional forced sync and document that this should be used when polling with notifications disabled. For all other backends that presumably don't have this issue use the same implementation for both methods. Fixes: fa48d7e81624 ("usb: typec: ucsi: Do not call ACPI _DSM method for UCSI read operations") Cc: stable Signed-off-by: Christian A. Ehrhardt Tested-by: Fedor Pchelkin Signed-off-by: Fedor Pchelkin Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250217105442.113486-2-boddah8794@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 10 +++++----- drivers/usb/typec/ucsi/ucsi.h | 2 ++ drivers/usb/typec/ucsi/ucsi_acpi.c | 21 ++++++++++++++------- drivers/usb/typec/ucsi/ucsi_ccg.c | 1 + drivers/usb/typec/ucsi/ucsi_glink.c | 1 + drivers/usb/typec/ucsi/ucsi_stm32g0.c | 1 + drivers/usb/typec/ucsi/ucsi_yoga_c630.c | 1 + 7 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index fcf499cc9458c..0fe1476f4c297 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1346,7 +1346,7 @@ static int ucsi_reset_ppm(struct ucsi *ucsi) mutex_lock(&ucsi->ppm_lock); - ret = ucsi->ops->read_cci(ucsi, &cci); + ret = ucsi->ops->poll_cci(ucsi, &cci); if (ret < 0) goto out; @@ -1364,7 +1364,7 @@ static int ucsi_reset_ppm(struct ucsi *ucsi) tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS); do { - ret = ucsi->ops->read_cci(ucsi, &cci); + ret = ucsi->ops->poll_cci(ucsi, &cci); if (ret < 0) goto out; if (cci & UCSI_CCI_COMMAND_COMPLETE) @@ -1393,7 +1393,7 @@ static int ucsi_reset_ppm(struct ucsi *ucsi) /* Give the PPM time to process a reset before reading CCI */ msleep(20); - ret = ucsi->ops->read_cci(ucsi, &cci); + ret = ucsi->ops->poll_cci(ucsi, &cci); if (ret) goto out; @@ -1929,8 +1929,8 @@ struct ucsi *ucsi_create(struct device *dev, const struct ucsi_operations *ops) struct ucsi *ucsi; if (!ops || - !ops->read_version || !ops->read_cci || !ops->read_message_in || - !ops->sync_control || !ops->async_control) + !ops->read_version || !ops->read_cci || !ops->poll_cci || + !ops->read_message_in || !ops->sync_control || !ops->async_control) return ERR_PTR(-EINVAL); ucsi = kzalloc(sizeof(*ucsi), GFP_KERNEL); diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 82735eb34f0e3..28780acc4af2e 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -62,6 +62,7 @@ struct dentry; * struct ucsi_operations - UCSI I/O operations * @read_version: Read implemented UCSI version * @read_cci: Read CCI register + * @poll_cci: Read CCI register while polling with notifications disabled * @read_message_in: Read message data from UCSI * @sync_control: Blocking control operation * @async_control: Non-blocking control operation @@ -76,6 +77,7 @@ struct dentry; struct ucsi_operations { int (*read_version)(struct ucsi *ucsi, u16 *version); int (*read_cci)(struct ucsi *ucsi, u32 *cci); + int (*poll_cci)(struct ucsi *ucsi, u32 *cci); int (*read_message_in)(struct ucsi *ucsi, void *val, size_t val_len); int (*sync_control)(struct ucsi *ucsi, u64 command); int (*async_control)(struct ucsi *ucsi, u64 command); diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 5c55155519634..ac1ebb5d95272 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -59,19 +59,24 @@ static int ucsi_acpi_read_version(struct ucsi *ucsi, u16 *version) static int ucsi_acpi_read_cci(struct ucsi *ucsi, u32 *cci) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); - int ret; - - if (UCSI_COMMAND(ua->cmd) == UCSI_PPM_RESET) { - ret = ucsi_acpi_dsm(ua, UCSI_DSM_FUNC_READ); - if (ret) - return ret; - } memcpy(cci, ua->base + UCSI_CCI, sizeof(*cci)); return 0; } +static int ucsi_acpi_poll_cci(struct ucsi *ucsi, u32 *cci) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + int ret; + + ret = ucsi_acpi_dsm(ua, UCSI_DSM_FUNC_READ); + if (ret) + return ret; + + return ucsi_acpi_read_cci(ucsi, cci); +} + static int ucsi_acpi_read_message_in(struct ucsi *ucsi, void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); @@ -94,6 +99,7 @@ static int ucsi_acpi_async_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations ucsi_acpi_ops = { .read_version = ucsi_acpi_read_version, .read_cci = ucsi_acpi_read_cci, + .poll_cci = ucsi_acpi_poll_cci, .read_message_in = ucsi_acpi_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = ucsi_acpi_async_control @@ -142,6 +148,7 @@ static int ucsi_gram_sync_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations ucsi_gram_ops = { .read_version = ucsi_acpi_read_version, .read_cci = ucsi_acpi_read_cci, + .poll_cci = ucsi_acpi_poll_cci, .read_message_in = ucsi_gram_read_message_in, .sync_control = ucsi_gram_sync_control, .async_control = ucsi_acpi_async_control diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 740171f24ef9f..4b1668733a4be 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -664,6 +664,7 @@ static int ucsi_ccg_sync_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations ucsi_ccg_ops = { .read_version = ucsi_ccg_read_version, .read_cci = ucsi_ccg_read_cci, + .poll_cci = ucsi_ccg_read_cci, .read_message_in = ucsi_ccg_read_message_in, .sync_control = ucsi_ccg_sync_control, .async_control = ucsi_ccg_async_control, diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index fed39d4580905..8af79101a2fc7 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -206,6 +206,7 @@ static void pmic_glink_ucsi_connector_status(struct ucsi_connector *con) static const struct ucsi_operations pmic_glink_ucsi_ops = { .read_version = pmic_glink_ucsi_read_version, .read_cci = pmic_glink_ucsi_read_cci, + .poll_cci = pmic_glink_ucsi_read_cci, .read_message_in = pmic_glink_ucsi_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = pmic_glink_ucsi_async_control, diff --git a/drivers/usb/typec/ucsi/ucsi_stm32g0.c b/drivers/usb/typec/ucsi/ucsi_stm32g0.c index 6923fad31d795..57ef7d83a4121 100644 --- a/drivers/usb/typec/ucsi/ucsi_stm32g0.c +++ b/drivers/usb/typec/ucsi/ucsi_stm32g0.c @@ -424,6 +424,7 @@ static irqreturn_t ucsi_stm32g0_irq_handler(int irq, void *data) static const struct ucsi_operations ucsi_stm32g0_ops = { .read_version = ucsi_stm32g0_read_version, .read_cci = ucsi_stm32g0_read_cci, + .poll_cci = ucsi_stm32g0_read_cci, .read_message_in = ucsi_stm32g0_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = ucsi_stm32g0_async_control, diff --git a/drivers/usb/typec/ucsi/ucsi_yoga_c630.c b/drivers/usb/typec/ucsi/ucsi_yoga_c630.c index 4cae85c0dc12a..d33e3f2dd1d80 100644 --- a/drivers/usb/typec/ucsi/ucsi_yoga_c630.c +++ b/drivers/usb/typec/ucsi/ucsi_yoga_c630.c @@ -74,6 +74,7 @@ static int yoga_c630_ucsi_async_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations yoga_c630_ucsi_ops = { .read_version = yoga_c630_ucsi_read_version, .read_cci = yoga_c630_ucsi_read_cci, + .poll_cci = yoga_c630_ucsi_read_cci, .read_message_in = yoga_c630_ucsi_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = yoga_c630_ucsi_async_control, From bf4f9ae1cb08ccaafbe6874be6c46f59b83ae778 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 17 Feb 2025 13:54:40 +0300 Subject: [PATCH 047/503] usb: typec: ucsi: increase timeout for PPM reset operations It is observed that on some systems an initial PPM reset during the boot phase can trigger a timeout: [ 6.482546] ucsi_acpi USBC000:00: failed to reset PPM! [ 6.482551] ucsi_acpi USBC000:00: error -ETIMEDOUT: PPM init failed Still, increasing the timeout value, albeit being the most straightforward solution, eliminates the problem: the initial PPM reset may take up to ~8000-10000ms on some Lenovo laptops. When it is reset after the above period of time (or even if ucsi_reset_ppm() is not called overall), UCSI works as expected. Moreover, if the ucsi_acpi module is loaded/unloaded manually after the system has booted, reading the CCI values and resetting the PPM works perfectly, without any timeout. Thus it's only a boot-time issue. The reason for this behavior is not clear but it may be the consequence of some tricks that the firmware performs or be an actual firmware bug. As a workaround, increase the timeout to avoid failing the UCSI initialization prematurely. Fixes: b1b59e16075f ("usb: typec: ucsi: Increase command completion timeout value") Cc: stable Signed-off-by: Fedor Pchelkin Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250217105442.113486-3-boddah8794@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 0fe1476f4c297..7a56d3f840d75 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -25,7 +25,7 @@ * difficult to estimate the time it takes for the system to process the command * before it is actually passed to the PPM. */ -#define UCSI_TIMEOUT_MS 5000 +#define UCSI_TIMEOUT_MS 10000 /* * UCSI_SWAP_TIMEOUT_MS - Timeout for role swap requests From a321d163de3d8aa38a6449ab2becf4b1581aed96 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 8 Jan 2025 19:09:27 +0530 Subject: [PATCH 048/503] bus: mhi: host: pci_generic: Use pci_try_reset_function() to avoid deadlock There are multiple places from where the recovery work gets scheduled asynchronously. Also, there are multiple places where the caller waits synchronously for the recovery to be completed. One such place is during the PM shutdown() callback. If the device is not alive during recovery_work, it will try to reset the device using pci_reset_function(). This function internally will take the device_lock() first before resetting the device. By this time, if the lock has already been acquired, then recovery_work will get stalled while waiting for the lock. And if the lock was already acquired by the caller which waits for the recovery_work to be completed, it will lead to deadlock. This is what happened on the X1E80100 CRD device when the device died before shutdown() callback. Driver core calls the driver's shutdown() callback while holding the device_lock() leading to deadlock. And this deadlock scenario can occur on other paths as well, like during the PM suspend() callback, where the driver core would hold the device_lock() before calling driver's suspend() callback. And if the recovery_work was already started, it could lead to deadlock. This is also observed on the X1E80100 CRD. So to fix both issues, use pci_try_reset_function() in recovery_work. This function first checks for the availability of the device_lock() before trying to reset the device. If the lock is available, it will acquire it and reset the device. Otherwise, it will return -EAGAIN. If that happens, recovery_work will fail with the error message "Recovery failed" as not much could be done. Cc: stable@vger.kernel.org # 5.12 Reported-by: Johan Hovold Closes: https://lore.kernel.org/mhi/Z1me8iaK7cwgjL92@hovoldconsulting.com Fixes: 7389337f0a78 ("mhi: pci_generic: Add suspend/resume/recovery procedure") Reviewed-by: Johan Hovold Tested-by: Johan Hovold Analyzed-by: Johan Hovold Link: https://lore.kernel.org/mhi/Z2KKjWY2mPen6GPL@hovoldconsulting.com/ Reviewed-by: Loic Poulain Link: https://lore.kernel.org/r/20250108-mhi_recovery_fix-v1-1-a0a00a17da46@linaro.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/pci_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index c41119b9079f0..7ffea0f981628 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -1095,8 +1095,9 @@ static void mhi_pci_recovery_work(struct work_struct *work) err_unprepare: mhi_unprepare_after_power_down(mhi_cntrl); err_try_reset: - if (pci_reset_function(pdev)) - dev_err(&pdev->dev, "Recovery failed\n"); + err = pci_try_reset_function(pdev); + if (err) + dev_err(&pdev->dev, "Recovery failed: %d\n", err); } static void health_check(struct timer_list *t) From fa2e55811ae25020a5e9b23a8932e67e6d6261a4 Mon Sep 17 00:00:00 2001 From: Mike Lothian Date: Fri, 14 Feb 2025 12:28:00 +0000 Subject: [PATCH 049/503] ntsync: Set the permissions to be 0666 This allows ntsync to be usuable by non-root processes out of the box Signed-off-by: Mike Lothian Reviewed-by: Elizabeth Figura Link: https://lore.kernel.org/r/20250214122759.2629-2-mike@fireburn.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 055395cde42b6..0b4e56d59b3da 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -1208,6 +1208,7 @@ static struct miscdevice ntsync_misc = { .minor = MISC_DYNAMIC_MINOR, .name = NTSYNC_NAME, .fops = &ntsync_fops, + .mode = 0666, }; module_misc_device(ntsync_misc); From 954b8915ff86353037d4246c7129d807a75f898b Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 13 Feb 2025 16:57:46 -0600 Subject: [PATCH 050/503] MAINTAINERS: change maintainer for FSI Due to job transitions, both Joel and Jeremy can no longer maintain the FSI subsystem. I will take over. I also replaced Alistair with Ninad as a reviewer, as Alistair doesn't have access to hardware and hasn't been active. I also removed the link to Joel's FSI tree as he won't be maintaining it. Signed-off-by: Eddie James Acked-by: Jeremy Kerr Link: https://lore.kernel.org/r/20250213225746.2159118-1-eajames@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 25c86f47353de..f6026443526b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9439,14 +9439,11 @@ F: include/linux/fscrypt.h F: include/uapi/linux/fscrypt.h FSI SUBSYSTEM -M: Jeremy Kerr -M: Joel Stanley -R: Alistar Popple -R: Eddie James +M: Eddie James +R: Ninad Palsule L: linux-fsi@lists.ozlabs.org S: Supported Q: http://patchwork.ozlabs.org/project/linux-fsi/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joel/fsi.git F: drivers/fsi/ F: include/linux/fsi*.h F: include/trace/events/fsi*.h From 4738d3d3e12d70a5067baba147daf57e57b77548 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 11 Feb 2025 20:50:13 +0200 Subject: [PATCH 051/503] intel_th: msu: Fix kernel-doc warnings Correct function comments to prevent kernel-doc warnings found when using "W=1". msu.c:162: warning: Function parameter or struct member 'mbuf_priv' not described in 'msc' msu.c:164: warning: Function parameter or struct member 'orig_addr' not described in 'msc' msu.c:164: warning: Function parameter or struct member 'orig_sz' not described in 'msc' Signed-off-by: Andy Shevchenko Signed-off-by: Alexander Shishkin Link: https://lore.kernel.org/r/20250211185017.1759193-2-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 66123d684ac9e..492d8eba37ebc 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -108,7 +108,7 @@ struct msc_iter { * @reg_base: register window base address * @thdev: intel_th_device pointer * @mbuf: MSU buffer, if assigned - * @mbuf_priv MSU buffer's private data, if @mbuf + * @mbuf_priv: MSU buffer's private data, if @mbuf * @win_list: list of windows in multiblock mode * @single_sgt: single mode buffer * @cur_win: current window @@ -117,6 +117,8 @@ struct msc_iter { * @single_wrap: single mode wrap occurred * @base: buffer's base pointer * @base_addr: buffer's base address + * @orig_addr: MSC0 buffer's base address + * @orig_sz: MSC0 buffer's size * @user_count: number of users of the buffer * @mmap_count: number of mappings * @buf_mutex: mutex to serialize access to buffer-related bits From 04190ec6d02aa8fee0f03189bb7762b44739c253 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 11 Feb 2025 20:50:14 +0200 Subject: [PATCH 052/503] intel_th: msu: Fix less trivial kernel-doc warnings Correct function comments to prevent kernel-doc warnings found when using "W=1" that the drive-by fixers had trouble documenting and skipped over. msu.c:168: warning: Function parameter or struct member 'msu_base' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'work' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'switch_on_unlock' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'iter_list' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'stop_on_full' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'do_irq' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'multi_is_broken' not described in 'msc' Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250211185017.1759193-3-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 492d8eba37ebc..bf99d79a41920 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -105,13 +105,16 @@ struct msc_iter { /** * struct msc - MSC device representation - * @reg_base: register window base address + * @reg_base: register window base address for the entire MSU + * @msu_base: register window base address for this MSC * @thdev: intel_th_device pointer * @mbuf: MSU buffer, if assigned * @mbuf_priv: MSU buffer's private data, if @mbuf + * @work: a work to stop the trace when the buffer is full * @win_list: list of windows in multiblock mode * @single_sgt: single mode buffer * @cur_win: current window + * @switch_on_unlock: window to switch to when it becomes available * @nr_pages: total number of pages allocated for this buffer * @single_sz: amount of data in single mode * @single_wrap: single mode wrap occurred @@ -122,8 +125,12 @@ struct msc_iter { * @user_count: number of users of the buffer * @mmap_count: number of mappings * @buf_mutex: mutex to serialize access to buffer-related bits + * @iter_list: list of open file descriptor iterators + * @stop_on_full: stop the trace if the current window is full * @enabled: MSC is enabled * @wrap: wrapping is enabled + * @do_irq: IRQ resource is available, handle interrupts + * @multi_is_broken: multiblock mode enabled (not disabled by PCI drvdata) * @mode: MSC operating mode * @burst_len: write burst length * @index: number of this MSC in the MSU From b5edccae9f447a92d475267d94c33f4926963eec Mon Sep 17 00:00:00 2001 From: Pawel Chmielewski Date: Tue, 11 Feb 2025 20:50:15 +0200 Subject: [PATCH 053/503] intel_th: pci: Add Arrow Lake support Add support for the Trace Hub in Arrow Lake. Signed-off-by: Pawel Chmielewski Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@kernel.org Link: https://lore.kernel.org/r/20250211185017.1759193-4-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index e9d8d28e055f3..3e03ee788bb94 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -334,6 +334,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa824), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Arrow Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7724), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), From a70034d6c0d5f3cdee40bb00a578e17fd2ebe426 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 11 Feb 2025 20:50:16 +0200 Subject: [PATCH 054/503] intel_th: pci: Add Panther Lake-H support Add support for the Trace Hub in Panther Lake-H. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@kernel.org Link: https://lore.kernel.org/r/20250211185017.1759193-5-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 3e03ee788bb94..004e68286fd43 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -339,6 +339,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7724), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Panther Lake-H */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe324), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), From 49114ff05770264ae233f50023fc64a719a9dcf9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 11 Feb 2025 20:50:17 +0200 Subject: [PATCH 055/503] intel_th: pci: Add Panther Lake-P/U support Add support for the Trace Hub in Panther Lake-P/U. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@kernel.org Link: https://lore.kernel.org/r/20250211185017.1759193-6-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 004e68286fd43..e3def163d5cf7 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -344,6 +344,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe324), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Panther Lake-P/U */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe424), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), From 78eb41f518f414378643ab022241df2a9dcd008b Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 13 Feb 2025 15:05:13 +0100 Subject: [PATCH 056/503] drivers: core: fix device leak in __fw_devlink_relax_cycles() Commit bac3b10b78e5 ("driver core: fw_devlink: Stop trying to optimize cycle detection logic") introduced a new struct device *con_dev and a get_dev_from_fwnode() call to get it, but without adding a corresponding put_device(). Closes: https://lore.kernel.org/all/20241204124826.2e055091@booty/ Fixes: bac3b10b78e5 ("driver core: fw_devlink: Stop trying to optimize cycle detection logic") Cc: stable@vger.kernel.org Reviewed-by: Saravana Kannan Signed-off-by: Luca Ceresoli Link: https://lore.kernel.org/r/20250213-fix__fw_devlink_relax_cycles_missing_device_put-v2-1-8cd3b03e6a3f@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 5a1f051981149..2fde698430dff 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2079,6 +2079,7 @@ static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle, out: sup_handle->flags &= ~FWNODE_FLAG_VISITED; put_device(sup_dev); + put_device(con_dev); put_device(par_dev); return ret; } From c783e1258f29c5caac9eea0aea6b172870f1baf8 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 20 Feb 2025 13:03:14 +0100 Subject: [PATCH 057/503] usb: gadget: Fix setting self-powered state on suspend cdev->config might be NULL, so check it before dereferencing. CC: stable Fixes: 40e89ff5750f ("usb: gadget: Set self-powered based on MaxPower and bmAttributes") Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250220120314.3614330-1-m.szyprowski@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 1fb28bbf6c458..4bcf73bae7610 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2616,7 +2616,8 @@ void composite_suspend(struct usb_gadget *gadget) cdev->suspended = 1; - if (cdev->config->bmAttributes & USB_CONFIG_ATT_SELFPOWER) + if (cdev->config && + cdev->config->bmAttributes & USB_CONFIG_ATT_SELFPOWER) usb_gadget_set_selfpowered(gadget); usb_gadget_vbus_draw(gadget, 2); From 91d44c1afc61a2fec37a9c7a3485368309391e0b Mon Sep 17 00:00:00 2001 From: Qiu-ji Chen Date: Sat, 18 Jan 2025 15:08:33 +0800 Subject: [PATCH 058/503] cdx: Fix possible UAF error in driver_override_show() Fixed a possible UAF problem in driver_override_show() in drivers/cdx/cdx.c This function driver_override_show() is part of DEVICE_ATTR_RW, which includes both driver_override_show() and driver_override_store(). These functions can be executed concurrently in sysfs. The driver_override_store() function uses driver_set_override() to update the driver_override value, and driver_set_override() internally locks the device (device_lock(dev)). If driver_override_show() reads cdx_dev->driver_override without locking, it could potentially access a freed pointer if driver_override_store() frees the string concurrently. This could lead to printing a kernel address, which is a security risk since DEVICE_ATTR can be read by all users. Additionally, a similar pattern is used in drivers/amba/bus.c, as well as many other bus drivers, where device_lock() is taken in the show function, and it has been working without issues. This potential bug was detected by our experimental static analysis tool, which analyzes locking APIs and paired functions to identify data races and atomicity violations. Fixes: 1f86a00c1159 ("bus/fsl-mc: add support for 'driver_override' in the mc-bus") Cc: stable Signed-off-by: Qiu-ji Chen Link: https://lore.kernel.org/r/20250118070833.27201-1-chenqiuji666@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/cdx/cdx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/cdx/cdx.c b/drivers/cdx/cdx.c index c573ed2ee71a8..7811aa7340537 100644 --- a/drivers/cdx/cdx.c +++ b/drivers/cdx/cdx.c @@ -473,8 +473,12 @@ static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cdx_device *cdx_dev = to_cdx_device(dev); + ssize_t len; - return sysfs_emit(buf, "%s\n", cdx_dev->driver_override); + device_lock(dev); + len = sysfs_emit(buf, "%s\n", cdx_dev->driver_override); + device_unlock(dev); + return len; } static DEVICE_ATTR_RW(driver_override); From c99e1e1d0850ff157f1bc16871acd2dff5a9bcc3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Jan 2025 07:54:33 +0100 Subject: [PATCH 059/503] vbox: add HAS_IOPORT dependency The vboxguest driver depends on port I/O for debug output: include/asm-generic/io.h:626:15: error: call to '_outl' declared with attribute error: outl() requires CONFIG_HAS_IOPORT 626 | #define _outl _outl include/asm-generic/io.h:663:14: note: in expansion of macro '_outl' 663 | #define outl _outl | ^~~~~ drivers/virt/vboxguest/vboxguest_utils.c:102:9: note: in expansion of macro 'outl' 102 | outl(phys_req, gdev->io_port + VMMDEV_PORT_OFF_REQUEST); | ^~~~ Most arm64 platforms don't actually support port I/O, though it is currently enabled unconditionally. Refine the vbox dependency to allow turning HAS_IOPORT off in the future when building for platforms without port I/O and allow compile-testing on all architectures. Fixes: 5cf8f938bf5c ("vbox: Enable VBOXGUEST and VBOXSF_FS on ARM64") Signed-off-by: Arnd Bergmann Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250122065445.1469218-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/virt/vboxguest/Kconfig b/drivers/virt/vboxguest/Kconfig index 11b153e7454e4..eaba28c95e733 100644 --- a/drivers/virt/vboxguest/Kconfig +++ b/drivers/virt/vboxguest/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config VBOXGUEST tristate "Virtual Box Guest integration support" - depends on (ARM64 || X86) && PCI && INPUT + depends on (ARM64 || X86 || COMPILE_TEST) && PCI && INPUT + depends on HAS_IOPORT help This is a driver for the Virtual Box Guest PCI device used in Virtual Box virtual machines. Enabling this driver will add From dcb0d43ba8eb9517e70b1a0e4b0ae0ab657a0e5a Mon Sep 17 00:00:00 2001 From: Visweswara Tanuku Date: Fri, 24 Jan 2025 04:57:40 -0800 Subject: [PATCH 060/503] slimbus: messaging: Free transaction ID in delayed interrupt scenario In case of interrupt delay for any reason, slim_do_transfer() returns timeout error but the transaction ID (TID) is not freed. This results into invalid memory access inside qcom_slim_ngd_rx_msgq_cb() due to invalid TID. Fix the issue by freeing the TID in slim_do_transfer() before returning timeout error to avoid invalid memory access. Call trace: __memcpy_fromio+0x20/0x190 qcom_slim_ngd_rx_msgq_cb+0x130/0x290 [slim_qcom_ngd_ctrl] vchan_complete+0x2a0/0x4a0 tasklet_action_common+0x274/0x700 tasklet_action+0x28/0x3c _stext+0x188/0x620 run_ksoftirqd+0x34/0x74 smpboot_thread_fn+0x1d8/0x464 kthread+0x178/0x238 ret_from_fork+0x10/0x20 Code: aa0003e8 91000429 f100044a 3940002b (3800150b) ---[ end trace 0fe00bec2b975c99 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt. Fixes: afbdcc7c384b ("slimbus: Add messaging APIs to slimbus framework") Cc: stable Signed-off-by: Visweswara Tanuku Link: https://lore.kernel.org/r/20250124125740.16897-1-quic_vtanuku@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/messaging.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c index e7aa9bd4b44b8..6f01d944f9c65 100644 --- a/drivers/slimbus/messaging.c +++ b/drivers/slimbus/messaging.c @@ -148,8 +148,9 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn) } ret = ctrl->xfer_msg(ctrl, txn); - - if (!ret && need_tid && !txn->msg->comp) { + if (ret == -ETIMEDOUT) { + slim_free_txn_tid(ctrl, txn); + } else if (!ret && need_tid && !txn->msg->comp) { unsigned long ms = txn->rl + HZ; time_left = wait_for_completion_timeout(txn->comp, From e77aff5528a183462714f750e45add6cc71e276a Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Thu, 30 Jan 2025 21:58:22 +0000 Subject: [PATCH 061/503] binderfs: fix use-after-free in binder_devices Devices created through binderfs are added to the global binder_devices list but are not removed before being destroyed. This leads to dangling pointers in the list and subsequent use-after-free errors: ================================================================== BUG: KASAN: slab-use-after-free in binder_add_device+0x5c/0x9c Write of size 8 at addr ffff0000c258d708 by task mount/653 CPU: 7 UID: 0 PID: 653 Comm: mount Not tainted 6.13.0-09030-g6d61a53dd6f5 #1 Hardware name: linux,dummy-virt (DT) Call trace: binder_add_device+0x5c/0x9c binderfs_binder_device_create+0x690/0x84c [...] __arm64_sys_mount+0x324/0x3bc Allocated by task 632: binderfs_binder_device_create+0x168/0x84c binder_ctl_ioctl+0xfc/0x184 [...] __arm64_sys_ioctl+0x110/0x150 Freed by task 649: kfree+0xe0/0x338 binderfs_evict_inode+0x138/0x1dc [...] ================================================================== Remove devices from binder_devices before destroying them. Cc: Li Li Reported-by: syzbot+7015dcf45953112c8b45@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7015dcf45953112c8b45 Fixes: 12d909cac1e1 ("binderfs: add new binder devices to binder_devices") Signed-off-by: Carlos Llamas Tested-by: syzbot+7015dcf45953112c8b45@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20250130215823.1518990-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index bc6bae76ccaf1..94c6446604fc9 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -274,6 +274,7 @@ static void binderfs_evict_inode(struct inode *inode) mutex_unlock(&binderfs_minors_mutex); if (refcount_dec_and_test(&device->ref)) { + hlist_del_init(&device->hlist); kfree(device->context.name); kfree(device); } From 819cec1dc47cdeac8f5dd6ba81c1dbee2a68c3bb Mon Sep 17 00:00:00 2001 From: Haoyu Li Date: Thu, 30 Jan 2025 19:58:11 +0800 Subject: [PATCH 062/503] drivers: virt: acrn: hsm: Use kzalloc to avoid info leak in pmcmd_ioctl In the "pmcmd_ioctl" function, three memory objects allocated by kmalloc are initialized by "hcall_get_cpu_state", which are then copied to user space. The initializer is indeed implemented in "acrn_hypercall2" (arch/x86/include/asm/acrn.h). There is a risk of information leakage due to uninitialized bytes. Fixes: 3d679d5aec64 ("virt: acrn: Introduce interfaces to query C-states and P-states allowed by hypervisor") Signed-off-by: Haoyu Li Cc: stable Acked-by: Fei Li Link: https://lore.kernel.org/r/20250130115811.92424-1-lihaoyu499@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/acrn/hsm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c index c24036c4e51ec..e4e196abdaac9 100644 --- a/drivers/virt/acrn/hsm.c +++ b/drivers/virt/acrn/hsm.c @@ -49,7 +49,7 @@ static int pmcmd_ioctl(u64 cmd, void __user *uptr) switch (cmd & PMCMD_TYPE_MASK) { case ACRN_PMCMD_GET_PX_CNT: case ACRN_PMCMD_GET_CX_CNT: - pm_info = kmalloc(sizeof(u64), GFP_KERNEL); + pm_info = kzalloc(sizeof(u64), GFP_KERNEL); if (!pm_info) return -ENOMEM; @@ -64,7 +64,7 @@ static int pmcmd_ioctl(u64 cmd, void __user *uptr) kfree(pm_info); break; case ACRN_PMCMD_GET_PX_DATA: - px_data = kmalloc(sizeof(*px_data), GFP_KERNEL); + px_data = kzalloc(sizeof(*px_data), GFP_KERNEL); if (!px_data) return -ENOMEM; @@ -79,7 +79,7 @@ static int pmcmd_ioctl(u64 cmd, void __user *uptr) kfree(px_data); break; case ACRN_PMCMD_GET_CX_DATA: - cx_data = kmalloc(sizeof(*cx_data), GFP_KERNEL); + cx_data = kzalloc(sizeof(*cx_data), GFP_KERNEL); if (!cx_data) return -ENOMEM; From 038ef0754aae76f79b147b8867f9250e6a976872 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 7 Feb 2025 00:03:11 +0200 Subject: [PATCH 063/503] eeprom: digsy_mtc: Make GPIO lookup table match the device The dev_id value in the GPIO lookup table must match to the device instance name, which in this case is combined of name and platform device ID, i.e. "spi_gpio.1". But the table assumed that there was no platform device ID defined, which is wrong. Fix the dev_id value accordingly. Fixes: 9b00bc7b901f ("spi: spi-gpio: Rewrite to use GPIO descriptors") Cc: stable Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250206220311.1554075-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/digsy_mtc_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/digsy_mtc_eeprom.c b/drivers/misc/eeprom/digsy_mtc_eeprom.c index 88888485e6f8e..ee58f7ce5bfa9 100644 --- a/drivers/misc/eeprom/digsy_mtc_eeprom.c +++ b/drivers/misc/eeprom/digsy_mtc_eeprom.c @@ -50,7 +50,7 @@ static struct platform_device digsy_mtc_eeprom = { }; static struct gpiod_lookup_table eeprom_spi_gpiod_table = { - .dev_id = "spi_gpio", + .dev_id = "spi_gpio.1", .table = { GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_CLK, "sck", GPIO_ACTIVE_HIGH), From 6d991f569c5ef6eaeadf1238df2c36e3975233ad Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 23 Jan 2025 09:32:49 -0300 Subject: [PATCH 064/503] char: misc: deallocate static minor in error path When creating sysfs files fail, the allocated minor must be freed such that it can be later reused. That is specially harmful for static minor numbers, since those would always fail to register later on. Fixes: 6d04d2b554b1 ("misc: misc_minor_alloc to use ida for all dynamic/misc dynamic minors") Cc: stable Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/r/20250123123249.4081674-5-cascardo@igalia.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 2cf595d2e10b8..f7dd455dd0dd3 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -264,8 +264,8 @@ int misc_register(struct miscdevice *misc) device_create_with_groups(&misc_class, misc->parent, dev, misc, misc->groups, "%s", misc->name); if (IS_ERR(misc->this_device)) { + misc_minor_free(misc->minor); if (is_dynamic) { - misc_minor_free(misc->minor); misc->minor = MISC_DYNAMIC_MINOR; } err = PTR_ERR(misc->this_device); From 32ce5d87d52213a50a513750f01a56f4d01f50cb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 14 Feb 2025 11:21:30 +0100 Subject: [PATCH 065/503] bus: simple-pm-bus: fix forced runtime PM use The simple-pm-bus driver only enables runtime PM for some buses ('simple-pm-bus') yet has started calling pm_runtime_force_suspend() and pm_runtime_force_resume() during system suspend unconditionally. This currently works, but that is not obvious and depends on implementation details which may change at some point. Add dedicated system sleep ops and only call pm_runtime_force_suspend() and pm_runtime_force_resume() for buses that use runtime PM to avoid any future surprises. Fixes: c45839309c3d ("drivers: bus: simple-pm-bus: Use clocks") Cc: Liu Ying Signed-off-by: Johan Hovold Reviewed-by: Ulf Hansson Acked-by: Liu Ying Acked-by: Rafael J. Wysocki Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250214102130.3000-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/simple-pm-bus.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c index 5dea31769f9a8..d8e029e7e53f7 100644 --- a/drivers/bus/simple-pm-bus.c +++ b/drivers/bus/simple-pm-bus.c @@ -109,9 +109,29 @@ static int simple_pm_bus_runtime_resume(struct device *dev) return 0; } +static int simple_pm_bus_suspend(struct device *dev) +{ + struct simple_pm_bus *bus = dev_get_drvdata(dev); + + if (!bus) + return 0; + + return pm_runtime_force_suspend(dev); +} + +static int simple_pm_bus_resume(struct device *dev) +{ + struct simple_pm_bus *bus = dev_get_drvdata(dev); + + if (!bus) + return 0; + + return pm_runtime_force_resume(dev); +} + static const struct dev_pm_ops simple_pm_bus_pm_ops = { RUNTIME_PM_OPS(simple_pm_bus_runtime_suspend, simple_pm_bus_runtime_resume, NULL) - NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + NOIRQ_SYSTEM_SLEEP_PM_OPS(simple_pm_bus_suspend, simple_pm_bus_resume) }; #define ONLY_BUS ((void *) 1) /* Match if the device is only a bus. */ From 92527e473911b835c2c18b0c55c337c33e85ff00 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Thu, 20 Feb 2025 13:23:34 -0600 Subject: [PATCH 066/503] ntsync: Check wait count based on byte size. GCC versions below 13 incorrectly detect the copy size as being static and too small to fit in the "fds" array. Work around this by explicitly calculating the size and returning EINVAL based on that, instead of based on the object count. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502072019.LYoCR9bF-lkp@intel.com/ Suggested-by: Arnd Bergmann Signed-off-by: Elizabeth Figura -- Suggested-by as per Arnd's request, but the only thing I changed was preserving array_size() [as noted by Geert in the linked thread]. I tested and found no regressions. v2: Add missing sign-off Link: https://lore.kernel.org/r/20250220192334.549167-1-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 0b4e56d59b3da..999026a1ae048 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -873,6 +873,7 @@ static int setup_wait(struct ntsync_device *dev, { int fds[NTSYNC_MAX_WAIT_COUNT + 1]; const __u32 count = args->count; + size_t size = array_size(count, sizeof(fds[0])); struct ntsync_q *q; __u32 total_count; __u32 i, j; @@ -880,15 +881,14 @@ static int setup_wait(struct ntsync_device *dev, if (args->pad || (args->flags & ~NTSYNC_WAIT_REALTIME)) return -EINVAL; - if (args->count > NTSYNC_MAX_WAIT_COUNT) + if (size >= sizeof(fds)) return -EINVAL; total_count = count; if (args->alert) total_count++; - if (copy_from_user(fds, u64_to_user_ptr(args->objs), - array_size(count, sizeof(*fds)))) + if (copy_from_user(fds, u64_to_user_ptr(args->objs), size)) return -EFAULT; if (args->alert) fds[count] = args->alert; From 7241c886a71797cc51efc6fadec7076fcf6435c2 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 9 Feb 2025 15:52:52 -0800 Subject: [PATCH 067/503] fbdev: hyperv_fb: iounmap() the correct memory when removing a device When a Hyper-V framebuffer device is removed, or the driver is unbound from a device, any allocated and/or mapped memory must be released. In particular, MMIO address space that was mapped to the framebuffer must be unmapped. Current code unmaps the wrong address, resulting in an error like: [ 4093.980597] iounmap: bad address 00000000c936c05c followed by a stack dump. Commit d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver") changed the kind of address stored in info->screen_base, and the iounmap() call in hvfb_putmem() was not updated accordingly. Fix this by updating hvfb_putmem() to unmap the correct address. Fixes: d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver") Signed-off-by: Michael Kelley Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20250209235252.2987-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250209235252.2987-1-mhklinux@outlook.com> --- drivers/video/fbdev/hyperv_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 7fdb5edd7e2e8..363e4ccfcdb77 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1080,7 +1080,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) if (par->need_docopy) { vfree(par->dio_vp); - iounmap(info->screen_base); + iounmap(par->mmio_vp); vmbus_free_mmio(par->mem->start, screen_fb_size); } else { hvfb_release_phymem(hdev, info->fix.smem_start, From 50cef76d5cb0e199cda19f026842560f6eedc4f7 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 14:44:53 +0100 Subject: [PATCH 068/503] x86/microcode/AMD: Load only SHA256-checksummed patches Load patches for which the driver carries a SHA256 checksum of the patch blob. This can be disabled by adding "microcode.amd_sha_check=off" on the kernel cmdline. But it is highly NOT recommended. Signed-off-by: Borislav Petkov (AMD) --- arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/microcode/amd.c | 111 +++++- arch/x86/kernel/cpu/microcode/amd_shas.c | 444 +++++++++++++++++++++++ 3 files changed, 554 insertions(+), 2 deletions(-) create mode 100644 arch/x86/kernel/cpu/microcode/amd_shas.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be2c311f5118d..0e27ebd7e36a9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1341,6 +1341,7 @@ config X86_REBOOTFIXUPS config MICROCODE def_bool y depends on CPU_SUP_AMD || CPU_SUP_INTEL + select CRYPTO_LIB_SHA256 if CPU_SUP_AMD config MICROCODE_INITRD32 def_bool y diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 31f90e129b083..95ac1c6a84fbe 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -23,14 +23,18 @@ #include #include +#include #include #include #include #include #include +#include + #include #include +#include #include #include #include @@ -145,6 +149,98 @@ ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; */ static u32 bsp_cpuid_1_eax __ro_after_init; +static bool sha_check = true; + +struct patch_digest { + u32 patch_id; + u8 sha256[SHA256_DIGEST_SIZE]; +}; + +#include "amd_shas.c" + +static int cmp_id(const void *key, const void *elem) +{ + struct patch_digest *pd = (struct patch_digest *)elem; + u32 patch_id = *(u32 *)key; + + if (patch_id == pd->patch_id) + return 0; + else if (patch_id < pd->patch_id) + return -1; + else + return 1; +} + +static bool need_sha_check(u32 cur_rev) +{ + switch (cur_rev >> 8) { + case 0x80012: return cur_rev <= 0x800126f; break; + case 0x83010: return cur_rev <= 0x830107c; break; + case 0x86001: return cur_rev <= 0x860010e; break; + case 0x86081: return cur_rev <= 0x8608108; break; + case 0x87010: return cur_rev <= 0x8701034; break; + case 0x8a000: return cur_rev <= 0x8a0000a; break; + case 0xa0011: return cur_rev <= 0xa0011da; break; + case 0xa0012: return cur_rev <= 0xa001243; break; + case 0xa1011: return cur_rev <= 0xa101153; break; + case 0xa1012: return cur_rev <= 0xa10124e; break; + case 0xa1081: return cur_rev <= 0xa108109; break; + case 0xa2010: return cur_rev <= 0xa20102f; break; + case 0xa2012: return cur_rev <= 0xa201212; break; + case 0xa6012: return cur_rev <= 0xa60120a; break; + case 0xa7041: return cur_rev <= 0xa704109; break; + case 0xa7052: return cur_rev <= 0xa705208; break; + case 0xa7080: return cur_rev <= 0xa708009; break; + case 0xa70c0: return cur_rev <= 0xa70C009; break; + case 0xaa002: return cur_rev <= 0xaa00218; break; + default: break; + } + + pr_info("You should not be seeing this. Please send the following couple of lines to x86--kernel.org\n"); + pr_info("CPUID(1).EAX: 0x%x, current revision: 0x%x\n", bsp_cpuid_1_eax, cur_rev); + return true; +} + +static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsigned int len) +{ + struct patch_digest *pd = NULL; + u8 digest[SHA256_DIGEST_SIZE]; + struct sha256_state s; + int i; + + if (x86_family(bsp_cpuid_1_eax) < 0x17 || + x86_family(bsp_cpuid_1_eax) > 0x19) + return true; + + if (!need_sha_check(cur_rev)) + return true; + + if (!sha_check) + return true; + + pd = bsearch(&patch_id, phashes, ARRAY_SIZE(phashes), sizeof(struct patch_digest), cmp_id); + if (!pd) { + pr_err("No sha256 digest for patch ID: 0x%x found\n", patch_id); + return false; + } + + sha256_init(&s); + sha256_update(&s, data, len); + sha256_final(&s, digest); + + if (memcmp(digest, pd->sha256, sizeof(digest))) { + pr_err("Patch 0x%x SHA256 digest mismatch!\n", patch_id); + + for (i = 0; i < SHA256_DIGEST_SIZE; i++) + pr_cont("0x%x ", digest[i]); + pr_info("\n"); + + return false; + } + + return true; +} + static u32 get_patch_level(void) { u32 rev, dummy __always_unused; @@ -497,6 +593,9 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, u32 *cur_rev, { unsigned long p_addr = (unsigned long)&mc->hdr.data_code; + if (!verify_sha256_digest(mc->hdr.patch_id, *cur_rev, (const u8 *)p_addr, psize)) + return -1; + native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr); if (x86_family(bsp_cpuid_1_eax) == 0x17) { @@ -571,8 +670,17 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ struct cont_desc desc = { }; struct microcode_amd *mc; struct cpio_data cp = { }; + char buf[4]; u32 rev; + if (cmdline_find_option(boot_command_line, "microcode.amd_sha_check", buf, 4)) { + if (!strncmp(buf, "off", 3)) { + sha_check = false; + pr_warn_once("It is a very very bad idea to disable the blobs SHA check!\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + } + } + bsp_cpuid_1_eax = cpuid_1_eax; rev = get_patch_level(); @@ -902,8 +1010,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover, } /* Scan the blob in @data and add microcode patches to the cache. */ -static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, - size_t size) +static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, size_t size) { u8 *fw = (u8 *)data; size_t offset; diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c new file mode 100644 index 0000000000000..2a1655b1fdd88 --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -0,0 +1,444 @@ +/* Keep 'em sorted. */ +static const struct patch_digest phashes[] = { + { 0x8001227, { + 0x99,0xc0,0x9b,0x2b,0xcc,0x9f,0x52,0x1b, + 0x1a,0x5f,0x1d,0x83,0xa1,0x6c,0xc4,0x46, + 0xe2,0x6c,0xda,0x73,0xfb,0x2d,0x23,0xa8, + 0x77,0xdc,0x15,0x31,0x33,0x4a,0x46,0x18, + } + }, + { 0x8001250, { + 0xc0,0x0b,0x6b,0x19,0xfd,0x5c,0x39,0x60, + 0xd5,0xc3,0x57,0x46,0x54,0xe4,0xd1,0xaa, + 0xa8,0xf7,0x1f,0xa8,0x6a,0x60,0x3e,0xe3, + 0x27,0x39,0x8e,0x53,0x30,0xf8,0x49,0x19, + } + }, + { 0x800126e, { + 0xf3,0x8b,0x2b,0xb6,0x34,0xe3,0xc8,0x2c, + 0xef,0xec,0x63,0x6d,0xc8,0x76,0x77,0xb3, + 0x25,0x5a,0xb7,0x52,0x8c,0x83,0x26,0xe6, + 0x4c,0xbe,0xbf,0xe9,0x7d,0x22,0x6a,0x43, + } + }, + { 0x800126f, { + 0x2b,0x5a,0xf2,0x9c,0xdd,0xd2,0x7f,0xec, + 0xec,0x96,0x09,0x57,0xb0,0x96,0x29,0x8b, + 0x2e,0x26,0x91,0xf0,0x49,0x33,0x42,0x18, + 0xdd,0x4b,0x65,0x5a,0xd4,0x15,0x3d,0x33, + } + }, + { 0x800820d, { + 0x68,0x98,0x83,0xcd,0x22,0x0d,0xdd,0x59, + 0x73,0x2c,0x5b,0x37,0x1f,0x84,0x0e,0x67, + 0x96,0x43,0x83,0x0c,0x46,0x44,0xab,0x7c, + 0x7b,0x65,0x9e,0x57,0xb5,0x90,0x4b,0x0e, + } + }, + { 0x8301025, { + 0xe4,0x7d,0xdb,0x1e,0x14,0xb4,0x5e,0x36, + 0x8f,0x3e,0x48,0x88,0x3c,0x6d,0x76,0xa1, + 0x59,0xc6,0xc0,0x72,0x42,0xdf,0x6c,0x30, + 0x6f,0x0b,0x28,0x16,0x61,0xfc,0x79,0x77, + } + }, + { 0x8301055, { + 0x81,0x7b,0x99,0x1b,0xae,0x2d,0x4f,0x9a, + 0xef,0x13,0xce,0xb5,0x10,0xaf,0x6a,0xea, + 0xe5,0xb0,0x64,0x98,0x10,0x68,0x34,0x3b, + 0x9d,0x7a,0xd6,0x22,0x77,0x5f,0xb3,0x5b, + } + }, + { 0x8301072, { + 0xcf,0x76,0xa7,0x1a,0x49,0xdf,0x2a,0x5e, + 0x9e,0x40,0x70,0xe5,0xdd,0x8a,0xa8,0x28, + 0x20,0xdc,0x91,0xd8,0x2c,0xa6,0xa0,0xb1, + 0x2d,0x22,0x26,0x94,0x4b,0x40,0x85,0x30, + } + }, + { 0x830107a, { + 0x2a,0x65,0x8c,0x1a,0x5e,0x07,0x21,0x72, + 0xdf,0x90,0xa6,0x51,0x37,0xd3,0x4b,0x34, + 0xc4,0xda,0x03,0xe1,0x8a,0x6c,0xfb,0x20, + 0x04,0xb2,0x81,0x05,0xd4,0x87,0xf4,0x0a, + } + }, + { 0x830107b, { + 0xb3,0x43,0x13,0x63,0x56,0xc1,0x39,0xad, + 0x10,0xa6,0x2b,0xcc,0x02,0xe6,0x76,0x2a, + 0x1e,0x39,0x58,0x3e,0x23,0x6e,0xa4,0x04, + 0x95,0xea,0xf9,0x6d,0xc2,0x8a,0x13,0x19, + } + }, + { 0x830107c, { + 0x21,0x64,0xde,0xfb,0x9f,0x68,0x96,0x47, + 0x70,0x5c,0xe2,0x8f,0x18,0x52,0x6a,0xac, + 0xa4,0xd2,0x2e,0xe0,0xde,0x68,0x66,0xc3, + 0xeb,0x1e,0xd3,0x3f,0xbc,0x51,0x1d,0x38, + } + }, + { 0x860010d, { + 0x86,0xb6,0x15,0x83,0xbc,0x3b,0x9c,0xe0, + 0xb3,0xef,0x1d,0x99,0x84,0x35,0x15,0xf7, + 0x7c,0x2a,0xc6,0x42,0xdb,0x73,0x07,0x5c, + 0x7d,0xc3,0x02,0xb5,0x43,0x06,0x5e,0xf8, + } + }, + { 0x8608108, { + 0x14,0xfe,0x57,0x86,0x49,0xc8,0x68,0xe2, + 0x11,0xa3,0xcb,0x6e,0xff,0x6e,0xd5,0x38, + 0xfe,0x89,0x1a,0xe0,0x67,0xbf,0xc4,0xcc, + 0x1b,0x9f,0x84,0x77,0x2b,0x9f,0xaa,0xbd, + } + }, + { 0x8701034, { + 0xc3,0x14,0x09,0xa8,0x9c,0x3f,0x8d,0x83, + 0x9b,0x4c,0xa5,0xb7,0x64,0x8b,0x91,0x5d, + 0x85,0x6a,0x39,0x26,0x1e,0x14,0x41,0xa8, + 0x75,0xea,0xa6,0xf9,0xc9,0xd1,0xea,0x2b, + } + }, + { 0x8a00008, { + 0xd7,0x2a,0x93,0xdc,0x05,0x2f,0xa5,0x6e, + 0x0c,0x61,0x2c,0x07,0x9f,0x38,0xe9,0x8e, + 0xef,0x7d,0x2a,0x05,0x4d,0x56,0xaf,0x72, + 0xe7,0x56,0x47,0x6e,0x60,0x27,0xd5,0x8c, + } + }, + { 0x8a0000a, { + 0x73,0x31,0x26,0x22,0xd4,0xf9,0xee,0x3c, + 0x07,0x06,0xe7,0xb9,0xad,0xd8,0x72,0x44, + 0x33,0x31,0xaa,0x7d,0xc3,0x67,0x0e,0xdb, + 0x47,0xb5,0xaa,0xbc,0xf5,0xbb,0xd9,0x20, + } + }, + { 0xa00104c, { + 0x3c,0x8a,0xfe,0x04,0x62,0xd8,0x6d,0xbe, + 0xa7,0x14,0x28,0x64,0x75,0xc0,0xa3,0x76, + 0xb7,0x92,0x0b,0x97,0x0a,0x8e,0x9c,0x5b, + 0x1b,0xc8,0x9d,0x3a,0x1e,0x81,0x3d,0x3b, + } + }, + { 0xa00104e, { + 0xc4,0x35,0x82,0x67,0xd2,0x86,0xe5,0xb2, + 0xfd,0x69,0x12,0x38,0xc8,0x77,0xba,0xe0, + 0x70,0xf9,0x77,0x89,0x10,0xa6,0x74,0x4e, + 0x56,0x58,0x13,0xf5,0x84,0x70,0x28,0x0b, + } + }, + { 0xa001053, { + 0x92,0x0e,0xf4,0x69,0x10,0x3b,0xf9,0x9d, + 0x31,0x1b,0xa6,0x99,0x08,0x7d,0xd7,0x25, + 0x7e,0x1e,0x89,0xba,0x35,0x8d,0xac,0xcb, + 0x3a,0xb4,0xdf,0x58,0x12,0xcf,0xc0,0xc3, + } + }, + { 0xa001058, { + 0x33,0x7d,0xa9,0xb5,0x4e,0x62,0x13,0x36, + 0xef,0x66,0xc9,0xbd,0x0a,0xa6,0x3b,0x19, + 0xcb,0xf5,0xc2,0xc3,0x55,0x47,0x20,0xec, + 0x1f,0x7b,0xa1,0x44,0x0e,0x8e,0xa4,0xb2, + } + }, + { 0xa001075, { + 0x39,0x02,0x82,0xd0,0x7c,0x26,0x43,0xe9, + 0x26,0xa3,0xd9,0x96,0xf7,0x30,0x13,0x0a, + 0x8a,0x0e,0xac,0xe7,0x1d,0xdc,0xe2,0x0f, + 0xcb,0x9e,0x8d,0xbc,0xd2,0xa2,0x44,0xe0, + } + }, + { 0xa001078, { + 0x2d,0x67,0xc7,0x35,0xca,0xef,0x2f,0x25, + 0x4c,0x45,0x93,0x3f,0x36,0x01,0x8c,0xce, + 0xa8,0x5b,0x07,0xd3,0xc1,0x35,0x3c,0x04, + 0x20,0xa2,0xfc,0xdc,0xe6,0xce,0x26,0x3e, + } + }, + { 0xa001079, { + 0x43,0xe2,0x05,0x9c,0xfd,0xb7,0x5b,0xeb, + 0x5b,0xe9,0xeb,0x3b,0x96,0xf4,0xe4,0x93, + 0x73,0x45,0x3e,0xac,0x8d,0x3b,0xe4,0xdb, + 0x10,0x31,0xc1,0xe4,0xa2,0xd0,0x5a,0x8a, + } + }, + { 0xa00107a, { + 0x5f,0x92,0xca,0xff,0xc3,0x59,0x22,0x5f, + 0x02,0xa0,0x91,0x3b,0x4a,0x45,0x10,0xfd, + 0x19,0xe1,0x8a,0x6d,0x9a,0x92,0xc1,0x3f, + 0x75,0x78,0xac,0x78,0x03,0x1d,0xdb,0x18, + } + }, + { 0xa001143, { + 0x56,0xca,0xf7,0x43,0x8a,0x4c,0x46,0x80, + 0xec,0xde,0xe5,0x9c,0x50,0x84,0x9a,0x42, + 0x27,0xe5,0x51,0x84,0x8f,0x19,0xc0,0x8d, + 0x0c,0x25,0xb4,0xb0,0x8f,0x10,0xf3,0xf8, + } + }, + { 0xa001144, { + 0x42,0xd5,0x9b,0xa7,0xd6,0x15,0x29,0x41, + 0x61,0xc4,0x72,0x3f,0xf3,0x06,0x78,0x4b, + 0x65,0xf3,0x0e,0xfa,0x9c,0x87,0xde,0x25, + 0xbd,0xb3,0x9a,0xf4,0x75,0x13,0x53,0xdc, + } + }, + { 0xa00115d, { + 0xd4,0xc4,0x49,0x36,0x89,0x0b,0x47,0xdd, + 0xfb,0x2f,0x88,0x3b,0x5f,0xf2,0x8e,0x75, + 0xc6,0x6c,0x37,0x5a,0x90,0x25,0x94,0x3e, + 0x36,0x9c,0xae,0x02,0x38,0x6c,0xf5,0x05, + } + }, + { 0xa001173, { + 0x28,0xbb,0x9b,0xd1,0xa0,0xa0,0x7e,0x3a, + 0x59,0x20,0xc0,0xa9,0xb2,0x5c,0xc3,0x35, + 0x53,0x89,0xe1,0x4c,0x93,0x2f,0x1d,0xc3, + 0xe5,0xf7,0xf3,0xc8,0x9b,0x61,0xaa,0x9e, + } + }, + { 0xa0011a8, { + 0x97,0xc6,0x16,0x65,0x99,0xa4,0x85,0x3b, + 0xf6,0xce,0xaa,0x49,0x4a,0x3a,0xc5,0xb6, + 0x78,0x25,0xbc,0x53,0xaf,0x5d,0xcf,0xf4, + 0x23,0x12,0xbb,0xb1,0xbc,0x8a,0x02,0x2e, + } + }, + { 0xa0011ce, { + 0xcf,0x1c,0x90,0xa3,0x85,0x0a,0xbf,0x71, + 0x94,0x0e,0x80,0x86,0x85,0x4f,0xd7,0x86, + 0xae,0x38,0x23,0x28,0x2b,0x35,0x9b,0x4e, + 0xfe,0xb8,0xcd,0x3d,0x3d,0x39,0xc9,0x6a, + } + }, + { 0xa0011d1, { + 0xdf,0x0e,0xca,0xde,0xf6,0xce,0x5c,0x1e, + 0x4c,0xec,0xd7,0x71,0x83,0xcc,0xa8,0x09, + 0xc7,0xc5,0xfe,0xb2,0xf7,0x05,0xd2,0xc5, + 0x12,0xdd,0xe4,0xf3,0x92,0x1c,0x3d,0xb8, + } + }, + { 0xa0011d3, { + 0x91,0xe6,0x10,0xd7,0x57,0xb0,0x95,0x0b, + 0x9a,0x24,0xee,0xf7,0xcf,0x56,0xc1,0xa6, + 0x4a,0x52,0x7d,0x5f,0x9f,0xdf,0xf6,0x00, + 0x65,0xf7,0xea,0xe8,0x2a,0x88,0xe2,0x26, + } + }, + { 0xa0011d5, { + 0xed,0x69,0x89,0xf4,0xeb,0x64,0xc2,0x13, + 0xe0,0x51,0x1f,0x03,0x26,0x52,0x7d,0xb7, + 0x93,0x5d,0x65,0xca,0xb8,0x12,0x1d,0x62, + 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, + } + }, + { 0xa001223, { + 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, + 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, + 0x83,0x75,0x94,0xdd,0xeb,0x7e,0xb7,0x15, + 0x8e,0x3b,0x50,0x29,0x8a,0x9c,0xcc,0x45, + } + }, + { 0xa001224, { + 0x0e,0x0c,0xdf,0xb4,0x89,0xee,0x35,0x25, + 0xdd,0x9e,0xdb,0xc0,0x69,0x83,0x0a,0xad, + 0x26,0xa9,0xaa,0x9d,0xfc,0x3c,0xea,0xf9, + 0x6c,0xdc,0xd5,0x6d,0x8b,0x6e,0x85,0x4a, + } + }, + { 0xa001227, { + 0xab,0xc6,0x00,0x69,0x4b,0x50,0x87,0xad, + 0x5f,0x0e,0x8b,0xea,0x57,0x38,0xce,0x1d, + 0x0f,0x75,0x26,0x02,0xf6,0xd6,0x96,0xe9, + 0x87,0xb9,0xd6,0x20,0x27,0x7c,0xd2,0xe0, + } + }, + { 0xa001229, { + 0x7f,0x49,0x49,0x48,0x46,0xa5,0x50,0xa6, + 0x28,0x89,0x98,0xe2,0x9e,0xb4,0x7f,0x75, + 0x33,0xa7,0x04,0x02,0xe4,0x82,0xbf,0xb4, + 0xa5,0x3a,0xba,0x24,0x8d,0x31,0x10,0x1d, + } + }, + { 0xa00122e, { + 0x56,0x94,0xa9,0x5d,0x06,0x68,0xfe,0xaf, + 0xdf,0x7a,0xff,0x2d,0xdf,0x74,0x0f,0x15, + 0x66,0xfb,0x00,0xb5,0x51,0x97,0x9b,0xfa, + 0xcb,0x79,0x85,0x46,0x25,0xb4,0xd2,0x10, + } + }, + { 0xa001231, { + 0x0b,0x46,0xa5,0xfc,0x18,0x15,0xa0,0x9e, + 0xa6,0xdc,0xb7,0xff,0x17,0xf7,0x30,0x64, + 0xd4,0xda,0x9e,0x1b,0xc3,0xfc,0x02,0x3b, + 0xe2,0xc6,0x0e,0x41,0x54,0xb5,0x18,0xdd, + } + }, + { 0xa001234, { + 0x88,0x8d,0xed,0xab,0xb5,0xbd,0x4e,0xf7, + 0x7f,0xd4,0x0e,0x95,0x34,0x91,0xff,0xcc, + 0xfb,0x2a,0xcd,0xf7,0xd5,0xdb,0x4c,0x9b, + 0xd6,0x2e,0x73,0x50,0x8f,0x83,0x79,0x1a, + } + }, + { 0xa001236, { + 0x3d,0x30,0x00,0xb9,0x71,0xba,0x87,0x78, + 0xa8,0x43,0x55,0xc4,0x26,0x59,0xcf,0x9d, + 0x93,0xce,0x64,0x0e,0x8b,0x72,0x11,0x8b, + 0xa3,0x8f,0x51,0xe9,0xca,0x98,0xaa,0x25, + } + }, + { 0xa001238, { + 0x72,0xf7,0x4b,0x0c,0x7d,0x58,0x65,0xcc, + 0x00,0xcc,0x57,0x16,0x68,0x16,0xf8,0x2a, + 0x1b,0xb3,0x8b,0xe1,0xb6,0x83,0x8c,0x7e, + 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, + } + }, + { 0xa00820c, { + 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, + 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, + 0xf1,0x8c,0x88,0x45,0xd7,0x82,0x80,0xd1, + 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, + } + }, + { 0xa10113e, { + 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, + 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, + 0x15,0xe3,0x3f,0x4b,0x1d,0x0d,0x0a,0xd5, + 0xfa,0x90,0xc4,0xed,0x9d,0x90,0xaf,0x53, + } + }, + { 0xa101144, { + 0xb3,0x0b,0x26,0x9a,0xf8,0x7c,0x02,0x26, + 0x35,0x84,0x53,0xa4,0xd3,0x2c,0x7c,0x09, + 0x68,0x7b,0x96,0xb6,0x93,0xef,0xde,0xbc, + 0xfd,0x4b,0x15,0xd2,0x81,0xd3,0x51,0x47, + } + }, + { 0xa101148, { + 0x20,0xd5,0x6f,0x40,0x4a,0xf6,0x48,0x90, + 0xc2,0x93,0x9a,0xc2,0xfd,0xac,0xef,0x4f, + 0xfa,0xc0,0x3d,0x92,0x3c,0x6d,0x01,0x08, + 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, + } + }, + { 0xa10123e, { + 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, + 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, + 0x1d,0x13,0x53,0x63,0xfe,0x42,0x6f,0xfc, + 0x19,0x0f,0xf1,0xfc,0xa7,0xdd,0x89,0x1b, + } + }, + { 0xa101244, { + 0x71,0x56,0xb5,0x9f,0x21,0xbf,0xb3,0x3c, + 0x8c,0xd7,0x36,0xd0,0x34,0x52,0x1b,0xb1, + 0x46,0x2f,0x04,0xf0,0x37,0xd8,0x1e,0x72, + 0x24,0xa2,0x80,0x84,0x83,0x65,0x84,0xc0, + } + }, + { 0xa101248, { + 0xed,0x3b,0x95,0xa6,0x68,0xa7,0x77,0x3e, + 0xfc,0x17,0x26,0xe2,0x7b,0xd5,0x56,0x22, + 0x2c,0x1d,0xef,0xeb,0x56,0xdd,0xba,0x6e, + 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, + } + }, + { 0xa108108, { + 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, + 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, + 0xf5,0xd4,0x3f,0x7b,0x14,0xd5,0x60,0x2c, + 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, + } + }, + { 0xa20102d, { + 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, + 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, + 0x8b,0x89,0x2f,0xb5,0xbb,0x82,0xef,0x23, + 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, + } + }, + { 0xa201210, { + 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, + 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, + 0x6d,0x3d,0x0e,0x6b,0xa7,0xac,0xe3,0x68, + 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, + } + }, + { 0xa404107, { + 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, + 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, + 0x8b,0x0d,0x9f,0xf9,0x3a,0xdf,0xc6,0x81, + 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, + } + }, + { 0xa500011, { + 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, + 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, + 0xd7,0x5b,0x65,0x3a,0x7d,0xab,0xdf,0xa2, + 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, + } + }, + { 0xa601209, { + 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, + 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, + 0x15,0x86,0xcc,0x5d,0x97,0x0f,0xc0,0x46, + 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, + } + }, + { 0xa704107, { + 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, + 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, + 0x2a,0xad,0x8e,0x6b,0xea,0x9b,0xb7,0xc2, + 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, + } + }, + { 0xa705206, { + 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, + 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, + 0x67,0x6f,0x04,0x18,0xae,0x20,0x87,0x4b, + 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, + } + }, + { 0xa708007, { + 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, + 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, + 0x07,0xaa,0x3a,0xe0,0x57,0x13,0x72,0x80, + 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, + } + }, + { 0xa70c005, { + 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, + 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, + 0x1f,0x1f,0xf1,0x97,0xeb,0xfe,0x56,0x55, + 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, + } + }, + { 0xaa00116, { + 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, + 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, + 0xfe,0x1d,0x5e,0x65,0xc7,0xaa,0x92,0x4d, + 0x91,0xee,0x76,0xbb,0x4c,0x66,0x78,0xc9, + } + }, + { 0xaa00212, { + 0xbd,0x57,0x5d,0x0a,0x0a,0x30,0xc1,0x75, + 0x95,0x58,0x5e,0x93,0x02,0x28,0x43,0x71, + 0xed,0x42,0x29,0xc8,0xec,0x34,0x2b,0xb2, + 0x1a,0x65,0x4b,0xfe,0x07,0x0f,0x34,0xa1, + } + }, + { 0xaa00213, { + 0xed,0x58,0xb7,0x76,0x81,0x7f,0xd9,0x3a, + 0x1a,0xff,0x8b,0x34,0xb8,0x4a,0x99,0x0f, + 0x28,0x49,0x6c,0x56,0x2b,0xdc,0xb7,0xed, + 0x96,0xd5,0x9d,0xc1,0x7a,0xd4,0x51,0x9b, + } + }, + { 0xaa00215, { + 0x55,0xd3,0x28,0xcb,0x87,0xa9,0x32,0xe9, + 0x4e,0x85,0x4b,0x7c,0x6b,0xd5,0x7c,0xd4, + 0x1b,0x51,0x71,0x3a,0x0e,0x0b,0xdc,0x9b, + 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, + } + }, +}; From 39ec9eaaa165d297d008d1fa385748430bd18e4d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Feb 2025 11:53:16 -0800 Subject: [PATCH 069/503] coredump: Only sort VMAs when core_sort_vma sysctl is set The sorting of VMAs by size in commit 7d442a33bfe8 ("binfmt_elf: Dump smaller VMAs first in ELF cores") breaks elfutils[1]. Instead, sort based on the setting of the new sysctl, core_sort_vma, which defaults to 0, no sorting. Reported-by: Michael Stapelberg Closes: https://lore.kernel.org/all/20250218085407.61126-1-michael@stapelberg.de/ [1] Fixes: 7d442a33bfe8 ("binfmt_elf: Dump smaller VMAs first in ELF cores") Signed-off-by: Kees Cook --- Documentation/admin-guide/sysctl/kernel.rst | 11 +++++++++++ fs/coredump.c | 15 +++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index a43b78b4b6464..dd49a89a62d35 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -212,6 +212,17 @@ pid>/``). This value defaults to 0. +core_sort_vma +============= + +The default coredump writes VMAs in address order. By setting +``core_sort_vma`` to 1, VMAs will be written from smallest size +to largest size. This is known to break at least elfutils, but +can be handy when dealing with very large (and truncated) +coredumps where the more useful debugging details are included +in the smaller VMAs. + + core_uses_pid ============= diff --git a/fs/coredump.c b/fs/coredump.c index 591700e1b2ce6..4375c70144d0a 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -63,6 +63,7 @@ static void free_vma_snapshot(struct coredump_params *cprm); static int core_uses_pid; static unsigned int core_pipe_limit; +static unsigned int core_sort_vma; static char core_pattern[CORENAME_MAX_SIZE] = "core"; static int core_name_size = CORENAME_MAX_SIZE; unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; @@ -1026,6 +1027,15 @@ static const struct ctl_table coredump_sysctls[] = { .extra1 = (unsigned int *)&core_file_note_size_min, .extra2 = (unsigned int *)&core_file_note_size_max, }, + { + .procname = "core_sort_vma", + .data = &core_sort_vma, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; static int __init init_fs_coredump_sysctls(void) @@ -1256,8 +1266,9 @@ static bool dump_vma_snapshot(struct coredump_params *cprm) cprm->vma_data_size += m->dump_size; } - sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), - cmp_vma_size, NULL); + if (core_sort_vma) + sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), + cmp_vma_size, NULL); return true; } From 8c1624b63a7d24142a2bbc3a5ee7e95f004ea36e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 20 Feb 2025 13:18:30 +0200 Subject: [PATCH 070/503] nvme-tcp: fix possible UAF in nvme_tcp_poll nvme_tcp_poll() may race with the send path error handler because it may complete the request while it is actively being polled for completion, resulting in a UAF panic [1]: We should make sure to stop polling when we see an error when trying to read from the socket. Hence make sure to propagate the error so that the block layer breaks the polling cycle. [1]: -- [35665.692310] nvme nvme2: failed to send request -13 [35665.702265] nvme nvme2: unsupported pdu type (3) [35665.702272] BUG: kernel NULL pointer dereference, address: 0000000000000000 [35665.702542] nvme nvme2: queue 1 receive failed: -22 [35665.703209] #PF: supervisor write access in kernel mode [35665.703213] #PF: error_code(0x0002) - not-present page [35665.703214] PGD 8000003801cce067 P4D 8000003801cce067 PUD 37e6f79067 PMD 0 [35665.703220] Oops: 0002 [#1] SMP PTI [35665.703658] nvme nvme2: starting error recovery [35665.705809] Hardware name: Inspur aaabbb/YZMB-00882-104, BIOS 4.1.26 09/22/2022 [35665.705812] Workqueue: kblockd blk_mq_requeue_work [35665.709172] RIP: 0010:_raw_spin_lock+0xc/0x30 [35665.715788] Call Trace: [35665.716201] [35665.716613] ? show_trace_log_lvl+0x1c1/0x2d9 [35665.717049] ? show_trace_log_lvl+0x1c1/0x2d9 [35665.717457] ? blk_mq_request_bypass_insert+0x2c/0xb0 [35665.717950] ? __die_body.cold+0x8/0xd [35665.718361] ? page_fault_oops+0xac/0x140 [35665.718749] ? blk_mq_start_request+0x30/0xf0 [35665.719144] ? nvme_tcp_queue_rq+0xc7/0x170 [nvme_tcp] [35665.719547] ? exc_page_fault+0x62/0x130 [35665.719938] ? asm_exc_page_fault+0x22/0x30 [35665.720333] ? _raw_spin_lock+0xc/0x30 [35665.720723] blk_mq_request_bypass_insert+0x2c/0xb0 [35665.721101] blk_mq_requeue_work+0xa5/0x180 [35665.721451] process_one_work+0x1e8/0x390 [35665.721809] worker_thread+0x53/0x3d0 [35665.722159] ? process_one_work+0x390/0x390 [35665.722501] kthread+0x124/0x150 [35665.722849] ? set_kthread_struct+0x50/0x50 [35665.723182] ret_from_fork+0x1f/0x30 Reported-by: Zhang Guanghui Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8a9131c95a3da..8c14018201dbd 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2699,6 +2699,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; + int ret; if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) return 0; @@ -2706,9 +2707,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) set_bit(NVME_TCP_Q_POLLING, &queue->flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); - nvme_tcp_try_recv(queue); + ret = nvme_tcp_try_recv(queue); clear_bit(NVME_TCP_Q_POLLING, &queue->flags); - return queue->nr_cqe; + return ret < 0 ? ret : queue->nr_cqe; } static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) From 6a3572e10f740acd48e2713ef37e92186a3ce5e8 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 13 Feb 2025 01:04:43 +0800 Subject: [PATCH 071/503] nvme-pci: clean up CMBMSC when registering CMB fails CMB decoding should get disabled when the CMB block isn't successfully registered to P2P DMA subsystem. Clean up the CMBMSC register in this error handling codepath to disable CMB decoding (and CMBLOC/CMBSZ registers). Signed-off-by: Icenowy Zheng Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 950289405ef28..218506e3dabea 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2003,6 +2003,7 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (pci_p2pdma_add_resource(pdev, bar, size, offset)) { dev_warn(dev->ctrl.device, "failed to register the CMB\n"); + hi_lo_writeq(0, dev->bar + NVME_REG_CMBMSC); return; } From 56cf7ef0d490b28fad8f8629fc135c5ab7c9f54e Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 13 Feb 2025 01:04:44 +0800 Subject: [PATCH 072/503] nvme-pci: skip CMB blocks incompatible with PCI P2P DMA The PCI P2PDMA code will register the CMB block to the memory hot-plugging subsystem, which have an alignment requirement. Memory blocks that do not satisfy this alignment requirement (usually 2MB) will lead to a WARNING from memory hotplugging. Verify the CMB block's address and size against the alignment and only try to send CMB blocks compatible with it to prevent this warning. Tested on Intel DC D4502 SSD, which has a 512K CMB block that is too small for memory hotplugging (thus PCI P2PDMA). Signed-off-by: Icenowy Zheng Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 218506e3dabea..640590b217282 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1982,6 +1982,18 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (offset > bar_size) return; + /* + * Controllers may support a CMB size larger than their BAR, for + * example, due to being behind a bridge. Reduce the CMB to the + * reported size of the BAR + */ + size = min(size, bar_size - offset); + + if (!IS_ALIGNED(size, memremap_compat_align()) || + !IS_ALIGNED(pci_resource_start(pdev, bar), + memremap_compat_align())) + return; + /* * Tell the controller about the host side address mapping the CMB, * and enable CMB decoding for the NVMe 1.4+ scheme: @@ -1992,14 +2004,6 @@ static void nvme_map_cmb(struct nvme_dev *dev) dev->bar + NVME_REG_CMBMSC); } - /* - * Controllers may support a CMB size larger than their BAR, - * for example, due to being behind a bridge. Reduce the CMB to - * the reported size of the BAR - */ - if (size > bar_size - offset) - size = bar_size - offset; - if (pci_p2pdma_add_resource(pdev, bar, size, offset)) { dev_warn(dev->ctrl.device, "failed to register the CMB\n"); From f5be37ca2c99fc764408ceeeaf941bea062cdc9b Mon Sep 17 00:00:00 2001 From: Andras Sebok Date: Mon, 24 Feb 2025 10:03:56 +0100 Subject: [PATCH 073/503] dt-bindings: input/touchscreen: imagis: add compatible for ist3038h IST3038H is a touchscreen IC which seems mostly compatible with IST3038C except that it reports a different chip ID value. Signed-off-by: Andras Sebok Link: https://lore.kernel.org/r/20250224090354.102903-4-sebokandris2009@gmail.com Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml b/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml index e24cbd9609933..bd8ede3a4ad89 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml @@ -19,6 +19,7 @@ properties: - imagis,ist3038 - imagis,ist3038b - imagis,ist3038c + - imagis,ist3038h reg: maxItems: 1 From 5797c04400ee117bfe459ff1e468d0ea38054ab4 Mon Sep 17 00:00:00 2001 From: Paul Fertser Date: Thu, 23 Jan 2025 15:20:02 +0300 Subject: [PATCH 074/503] hwmon: (peci/dimmtemp) Do not provide fake thresholds data When an Icelake or Sapphire Rapids CPU isn't providing the maximum and critical thresholds for particular DIMM the driver should return an error to the userspace instead of giving it stale (best case) or wrong (the structure contains all zeros after kzalloc() call) data. The issue can be reproduced by binding the peci driver while the host is fully booted and idle, this makes PECI interaction unreliable enough. Fixes: 73bc1b885dae ("hwmon: peci: Add dimmtemp driver") Fixes: 621995b6d795 ("hwmon: (peci/dimmtemp) Add Sapphire Rapids support") Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser Reviewed-by: Iwona Winiarska Link: https://lore.kernel.org/r/20250123122003.6010-1-fercerpav@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/peci/dimmtemp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/peci/dimmtemp.c b/drivers/hwmon/peci/dimmtemp.c index d6762259dd69c..fbe82d9852e01 100644 --- a/drivers/hwmon/peci/dimmtemp.c +++ b/drivers/hwmon/peci/dimmtemp.c @@ -127,8 +127,6 @@ static int update_thresholds(struct peci_dimmtemp *priv, int dimm_no) return 0; ret = priv->gen_info->read_thresholds(priv, dimm_order, chan_rank, &data); - if (ret == -ENODATA) /* Use default or previous value */ - return 0; if (ret) return ret; @@ -509,11 +507,11 @@ read_thresholds_icx(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u ret = peci_ep_pci_local_read(priv->peci_dev, 0, 13, 0, 2, 0xd4, ®_val); if (ret || !(reg_val & BIT(31))) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; ret = peci_ep_pci_local_read(priv->peci_dev, 0, 13, 0, 2, 0xd0, ®_val); if (ret) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; /* * Device 26, Offset 224e0: IMC 0 channel 0 -> rank 0 @@ -546,11 +544,11 @@ read_thresholds_spr(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u ret = peci_ep_pci_local_read(priv->peci_dev, 0, 30, 0, 2, 0xd4, ®_val); if (ret || !(reg_val & BIT(31))) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; ret = peci_ep_pci_local_read(priv->peci_dev, 0, 30, 0, 2, 0xd0, ®_val); if (ret) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; /* * Device 26, Offset 219a8: IMC 0 channel 0 -> rank 0 From 83a4a5a82a154277dfe61d135a445901cd6d0e6f Mon Sep 17 00:00:00 2001 From: Andras Sebok Date: Mon, 24 Feb 2025 10:03:54 +0100 Subject: [PATCH 075/503] Input: imagis - add support for imagis IST3038H Add support for imagis IST3038H, which seems mostly compatible with IST3038C except that it reports a different chip ID value. Tested on samsung,j5y17lte. Signed-off-by: Andras Sebok Link: https://lore.kernel.org/r/20250224090354.102903-2-sebokandris2009@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/imagis.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/input/touchscreen/imagis.c b/drivers/input/touchscreen/imagis.c index abeae9102323c..3c8bbe284b731 100644 --- a/drivers/input/touchscreen/imagis.c +++ b/drivers/input/touchscreen/imagis.c @@ -22,6 +22,7 @@ #define IST3032C_WHOAMI 0x32c #define IST3038C_WHOAMI 0x38c +#define IST3038H_WHOAMI 0x38d #define IST3038B_REG_CHIPID 0x30 #define IST3038B_WHOAMI 0x30380b @@ -428,11 +429,19 @@ static const struct imagis_properties imagis_3038c_data = { .protocol_b = true, }; +static const struct imagis_properties imagis_3038h_data = { + .interrupt_msg_cmd = IST3038C_REG_INTR_MESSAGE, + .touch_coord_cmd = IST3038C_REG_TOUCH_COORD, + .whoami_cmd = IST3038C_REG_CHIPID, + .whoami_val = IST3038H_WHOAMI, +}; + static const struct of_device_id imagis_of_match[] = { { .compatible = "imagis,ist3032c", .data = &imagis_3032c_data }, { .compatible = "imagis,ist3038", .data = &imagis_3038_data }, { .compatible = "imagis,ist3038b", .data = &imagis_3038b_data }, { .compatible = "imagis,ist3038c", .data = &imagis_3038c_data }, + { .compatible = "imagis,ist3038h", .data = &imagis_3038h_data }, { }, }; MODULE_DEVICE_TABLE(of, imagis_of_match); From 5bab1ae5a375ccde25ac9a142ea933a5d3bdf38d Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 3 Jan 2025 10:21:35 +0100 Subject: [PATCH 076/503] Input: goodix-berlin - fix comment referencing wrong regulator In the statement above AVDD gets enabled, and not IOVDD, so fix this copy-paste mistake. Fixes: 44362279bdd4 ("Input: add core support for Goodix Berlin Touchscreen IC") Reported-by: Jens Reidel Signed-off-by: Luca Weiss Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250103-goodix-berlin-fixes-v1-1-b014737b08b2@fairphone.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix_berlin_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix_berlin_core.c b/drivers/input/touchscreen/goodix_berlin_core.c index 3fc03cf0ca23f..e273fb8edc6b9 100644 --- a/drivers/input/touchscreen/goodix_berlin_core.c +++ b/drivers/input/touchscreen/goodix_berlin_core.c @@ -263,7 +263,7 @@ static int goodix_berlin_power_on(struct goodix_berlin_core *cd) goto err_iovdd_disable; } - /* Vendor waits 15ms for IOVDD to settle */ + /* Vendor waits 15ms for AVDD to settle */ usleep_range(15000, 15100); gpiod_set_value_cansleep(cd->reset_gpio, 0); From 3b0011059334a1cf554c2c1f67d7a7b822d8238a Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 3 Jan 2025 10:21:36 +0100 Subject: [PATCH 077/503] Input: goodix-berlin - fix vddio regulator references As per dt-bindings the property is called vddio-supply, so use the correct name in the driver instead of iovdd. The datasheet also calls the supply 'VDDIO'. Fixes: 44362279bdd4 ("Input: add core support for Goodix Berlin Touchscreen IC") Cc: stable@vger.kernel.org Signed-off-by: Luca Weiss Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250103-goodix-berlin-fixes-v1-2-b014737b08b2@fairphone.com Signed-off-by: Dmitry Torokhov --- .../input/touchscreen/goodix_berlin_core.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/input/touchscreen/goodix_berlin_core.c b/drivers/input/touchscreen/goodix_berlin_core.c index e273fb8edc6b9..7f8cfdd106fae 100644 --- a/drivers/input/touchscreen/goodix_berlin_core.c +++ b/drivers/input/touchscreen/goodix_berlin_core.c @@ -165,7 +165,7 @@ struct goodix_berlin_core { struct device *dev; struct regmap *regmap; struct regulator *avdd; - struct regulator *iovdd; + struct regulator *vddio; struct gpio_desc *reset_gpio; struct touchscreen_properties props; struct goodix_berlin_fw_version fw_version; @@ -248,19 +248,19 @@ static int goodix_berlin_power_on(struct goodix_berlin_core *cd) { int error; - error = regulator_enable(cd->iovdd); + error = regulator_enable(cd->vddio); if (error) { - dev_err(cd->dev, "Failed to enable iovdd: %d\n", error); + dev_err(cd->dev, "Failed to enable vddio: %d\n", error); return error; } - /* Vendor waits 3ms for IOVDD to settle */ + /* Vendor waits 3ms for VDDIO to settle */ usleep_range(3000, 3100); error = regulator_enable(cd->avdd); if (error) { dev_err(cd->dev, "Failed to enable avdd: %d\n", error); - goto err_iovdd_disable; + goto err_vddio_disable; } /* Vendor waits 15ms for AVDD to settle */ @@ -283,8 +283,8 @@ static int goodix_berlin_power_on(struct goodix_berlin_core *cd) err_dev_reset: gpiod_set_value_cansleep(cd->reset_gpio, 1); regulator_disable(cd->avdd); -err_iovdd_disable: - regulator_disable(cd->iovdd); +err_vddio_disable: + regulator_disable(cd->vddio); return error; } @@ -292,7 +292,7 @@ static void goodix_berlin_power_off(struct goodix_berlin_core *cd) { gpiod_set_value_cansleep(cd->reset_gpio, 1); regulator_disable(cd->avdd); - regulator_disable(cd->iovdd); + regulator_disable(cd->vddio); } static int goodix_berlin_read_version(struct goodix_berlin_core *cd) @@ -744,10 +744,10 @@ int goodix_berlin_probe(struct device *dev, int irq, const struct input_id *id, return dev_err_probe(dev, PTR_ERR(cd->avdd), "Failed to request avdd regulator\n"); - cd->iovdd = devm_regulator_get(dev, "iovdd"); - if (IS_ERR(cd->iovdd)) - return dev_err_probe(dev, PTR_ERR(cd->iovdd), - "Failed to request iovdd regulator\n"); + cd->vddio = devm_regulator_get(dev, "vddio"); + if (IS_ERR(cd->vddio)) + return dev_err_probe(dev, PTR_ERR(cd->vddio), + "Failed to request vddio regulator\n"); error = goodix_berlin_power_on(cd); if (error) { From 4b90de5bc0f5a6d1151acd74c838275f9b7be3a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 15:48:52 -0800 Subject: [PATCH 078/503] xfs: reduce context switches for synchronous buffered I/O Currently all metadata I/O completions happen in the m_buf_workqueue workqueue. But for synchronous I/O (i.e. all buffer reads) there is no need for that, as there always is a called in process context that is waiting for the I/O. Factor out the guts of xfs_buf_ioend into a separate helper and call it from xfs_buf_iowait to avoid a double an extra context switch to the workqueue. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 15bb790359f81..dfc1849b3314b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1345,6 +1345,7 @@ xfs_buf_ioend_handle_error( resubmit: xfs_buf_ioerror(bp, 0); bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL); + reinit_completion(&bp->b_iowait); xfs_buf_submit(bp); return true; out_stale: @@ -1355,8 +1356,9 @@ xfs_buf_ioend_handle_error( return false; } -static void -xfs_buf_ioend( +/* returns false if the caller needs to resubmit the I/O, else true */ +static bool +__xfs_buf_ioend( struct xfs_buf *bp) { trace_xfs_buf_iodone(bp, _RET_IP_); @@ -1376,7 +1378,7 @@ xfs_buf_ioend( } if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp)) - return; + return false; /* clear the retry state */ bp->b_last_error = 0; @@ -1397,7 +1399,15 @@ xfs_buf_ioend( bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD | _XBF_LOGRECOVERY); + return true; +} +static void +xfs_buf_ioend( + struct xfs_buf *bp) +{ + if (!__xfs_buf_ioend(bp)) + return; if (bp->b_flags & XBF_ASYNC) xfs_buf_relse(bp); else @@ -1411,15 +1421,8 @@ xfs_buf_ioend_work( struct xfs_buf *bp = container_of(work, struct xfs_buf, b_ioend_work); - xfs_buf_ioend(bp); -} - -static void -xfs_buf_ioend_async( - struct xfs_buf *bp) -{ - INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); - queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); + if (__xfs_buf_ioend(bp)) + xfs_buf_relse(bp); } void @@ -1491,7 +1494,13 @@ xfs_buf_bio_end_io( XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) xfs_buf_ioerror(bp, -EIO); - xfs_buf_ioend_async(bp); + if (bp->b_flags & XBF_ASYNC) { + INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); + queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); + } else { + complete(&bp->b_iowait); + } + bio_put(bio); } @@ -1568,9 +1577,11 @@ xfs_buf_iowait( { ASSERT(!(bp->b_flags & XBF_ASYNC)); - trace_xfs_buf_iowait(bp, _RET_IP_); - wait_for_completion(&bp->b_iowait); - trace_xfs_buf_iowait_done(bp, _RET_IP_); + do { + trace_xfs_buf_iowait(bp, _RET_IP_); + wait_for_completion(&bp->b_iowait); + trace_xfs_buf_iowait_done(bp, _RET_IP_); + } while (!__xfs_buf_ioend(bp)); return bp->b_error; } From efc5f7a9f3d887ce44b7610bc39388094b6f97d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 15:48:53 -0800 Subject: [PATCH 079/503] xfs: decouple buffer readahead from the normal buffer read path xfs_buf_readahead_map is the only caller of xfs_buf_read_map and thus _xfs_buf_read that is not synchronous. Split it from xfs_buf_read_map so that the asynchronous path is self-contained and the now purely synchronous xfs_buf_read_map / _xfs_buf_read implementation can be simplified. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 41 ++++++++++++++++++++-------------------- fs/xfs/xfs_buf.h | 2 +- fs/xfs/xfs_log_recover.c | 2 +- fs/xfs/xfs_trace.h | 1 + 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index dfc1849b3314b..4ea20483d5213 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -794,18 +794,13 @@ xfs_buf_get_map( int _xfs_buf_read( - struct xfs_buf *bp, - xfs_buf_flags_t flags) + struct xfs_buf *bp) { - ASSERT(!(flags & XBF_WRITE)); ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL); bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE); - bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); - + bp->b_flags |= XBF_READ; xfs_buf_submit(bp); - if (flags & XBF_ASYNC) - return 0; return xfs_buf_iowait(bp); } @@ -857,6 +852,8 @@ xfs_buf_read_map( struct xfs_buf *bp; int error; + ASSERT(!(flags & (XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD))); + flags |= XBF_READ; *bpp = NULL; @@ -870,21 +867,11 @@ xfs_buf_read_map( /* Initiate the buffer read and wait. */ XFS_STATS_INC(target->bt_mount, xb_get_read); bp->b_ops = ops; - error = _xfs_buf_read(bp, flags); - - /* Readahead iodone already dropped the buffer, so exit. */ - if (flags & XBF_ASYNC) - return 0; + error = _xfs_buf_read(bp); } else { /* Buffer already read; all we need to do is check it. */ error = xfs_buf_reverify(bp, ops); - /* Readahead already finished; drop the buffer and exit. */ - if (flags & XBF_ASYNC) { - xfs_buf_relse(bp); - return 0; - } - /* We do not want read in the flags */ bp->b_flags &= ~XBF_READ; ASSERT(bp->b_ops != NULL || ops == NULL); @@ -936,6 +923,7 @@ xfs_buf_readahead_map( int nmaps, const struct xfs_buf_ops *ops) { + const xfs_buf_flags_t flags = XBF_READ | XBF_ASYNC | XBF_READ_AHEAD; struct xfs_buf *bp; /* @@ -945,9 +933,20 @@ xfs_buf_readahead_map( if (xfs_buftarg_is_mem(target)) return; - xfs_buf_read_map(target, map, nmaps, - XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops, - __this_address); + if (xfs_buf_get_map(target, map, nmaps, flags | XBF_TRYLOCK, &bp)) + return; + trace_xfs_buf_readahead(bp, 0, _RET_IP_); + + if (bp->b_flags & XBF_DONE) { + xfs_buf_reverify(bp, ops); + xfs_buf_relse(bp); + return; + } + XFS_STATS_INC(target->bt_mount, xb_get_read); + bp->b_ops = ops; + bp->b_flags &= ~(XBF_WRITE | XBF_DONE); + bp->b_flags |= flags; + xfs_buf_submit(bp); } /* diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 3b4ed42e11c01..2e747555ad3fa 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -291,7 +291,7 @@ int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, size_t numblks, xfs_buf_flags_t flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops); -int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags); +int _xfs_buf_read(struct xfs_buf *bp); void xfs_buf_hold(struct xfs_buf *bp); /* Releasing Buffers */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b3c27dbccce86..2f76531842f83 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3380,7 +3380,7 @@ xlog_do_recover( */ xfs_buf_lock(bp); xfs_buf_hold(bp); - error = _xfs_buf_read(bp, XBF_READ); + error = _xfs_buf_read(bp); if (error) { if (!xlog_is_shutdown(log)) { xfs_buf_ioerror_alert(bp, __this_address); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index b29462363b815..bfc2f12490224 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -593,6 +593,7 @@ DEFINE_EVENT(xfs_buf_flags_class, name, \ DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); +DEFINE_BUF_FLAGS_EVENT(xfs_buf_readahead); TRACE_EVENT(xfs_buf_ioerror, TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip), From 0d1120b9bbe48a2d119afe0dc64f9c0666745bc8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 15:48:54 -0800 Subject: [PATCH 080/503] xfs: remove most in-flight buffer accounting The buffer cache keeps a bt_io_count per-CPU counter to track all in-flight I/O, which is used to ensure no I/O is in flight when unmounting the file system. For most I/O we already keep track of inflight I/O at higher levels: - for synchronous I/O (xfs_buf_read/xfs_bwrite/xfs_buf_delwri_submit), the caller has a reference and waits for I/O completions using xfs_buf_iowait - for xfs_buf_delwri_submit_nowait the only caller (AIL writeback) tracks the log items that the buffer attached to This only leaves only xfs_buf_readahead_map as a submitter of asynchronous I/O that is not tracked by anything else. Replace the bt_io_count per-cpu counter with a more specific bt_readahead_count counter only tracking readahead I/O. This allows to simply increment it when submitting readahead I/O and decrementing it when it completed, and thus simplify xfs_buf_rele and remove the needed for the XBF_NO_IOACCT flags and the XFS_BSTATE_IN_FLIGHT buffer state. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 90 ++++++++------------------------------------ fs/xfs/xfs_buf.h | 5 +-- fs/xfs/xfs_buf_mem.c | 2 +- fs/xfs/xfs_mount.c | 7 +--- fs/xfs/xfs_rtalloc.c | 2 +- 5 files changed, 20 insertions(+), 86 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4ea20483d5213..e161f3ab41087 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -29,11 +29,6 @@ struct kmem_cache *xfs_buf_cache; /* * Locking orders * - * xfs_buf_ioacct_inc: - * xfs_buf_ioacct_dec: - * b_sema (caller holds) - * b_lock - * * xfs_buf_stale: * b_sema (caller holds) * b_lock @@ -81,51 +76,6 @@ xfs_buf_vmap_len( return (bp->b_page_count * PAGE_SIZE); } -/* - * Bump the I/O in flight count on the buftarg if we haven't yet done so for - * this buffer. The count is incremented once per buffer (per hold cycle) - * because the corresponding decrement is deferred to buffer release. Buffers - * can undergo I/O multiple times in a hold-release cycle and per buffer I/O - * tracking adds unnecessary overhead. This is used for sychronization purposes - * with unmount (see xfs_buftarg_drain()), so all we really need is a count of - * in-flight buffers. - * - * Buffers that are never released (e.g., superblock, iclog buffers) must set - * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count - * never reaches zero and unmount hangs indefinitely. - */ -static inline void -xfs_buf_ioacct_inc( - struct xfs_buf *bp) -{ - if (bp->b_flags & XBF_NO_IOACCT) - return; - - ASSERT(bp->b_flags & XBF_ASYNC); - spin_lock(&bp->b_lock); - if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { - bp->b_state |= XFS_BSTATE_IN_FLIGHT; - percpu_counter_inc(&bp->b_target->bt_io_count); - } - spin_unlock(&bp->b_lock); -} - -/* - * Clear the in-flight state on a buffer about to be released to the LRU or - * freed and unaccount from the buftarg. - */ -static inline void -__xfs_buf_ioacct_dec( - struct xfs_buf *bp) -{ - lockdep_assert_held(&bp->b_lock); - - if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { - bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; - percpu_counter_dec(&bp->b_target->bt_io_count); - } -} - /* * When we mark a buffer stale, we remove the buffer from the LRU and clear the * b_lru_ref count so that the buffer is freed immediately when the buffer @@ -156,8 +106,6 @@ xfs_buf_stale( * status now to preserve accounting consistency. */ spin_lock(&bp->b_lock); - __xfs_buf_ioacct_dec(bp); - atomic_set(&bp->b_lru_ref, 0); if (!(bp->b_state & XFS_BSTATE_DISPOSE) && (list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru))) @@ -946,6 +894,7 @@ xfs_buf_readahead_map( bp->b_ops = ops; bp->b_flags &= ~(XBF_WRITE | XBF_DONE); bp->b_flags |= flags; + percpu_counter_inc(&target->bt_readahead_count); xfs_buf_submit(bp); } @@ -1002,10 +951,12 @@ xfs_buf_get_uncached( struct xfs_buf *bp; DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); + /* there are currently no valid flags for xfs_buf_get_uncached */ + ASSERT(flags == 0); + *bpp = NULL; - /* flags might contain irrelevant bits, pass only what we care about */ - error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp); + error = _xfs_buf_alloc(target, &map, 1, flags, &bp); if (error) return error; @@ -1059,7 +1010,6 @@ xfs_buf_rele_uncached( spin_unlock(&bp->b_lock); return; } - __xfs_buf_ioacct_dec(bp); spin_unlock(&bp->b_lock); xfs_buf_free(bp); } @@ -1078,19 +1028,11 @@ xfs_buf_rele_cached( spin_lock(&bp->b_lock); ASSERT(bp->b_hold >= 1); if (bp->b_hold > 1) { - /* - * Drop the in-flight state if the buffer is already on the LRU - * and it holds the only reference. This is racy because we - * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT - * ensures the decrement occurs only once per-buf. - */ - if (--bp->b_hold == 1 && !list_empty(&bp->b_lru)) - __xfs_buf_ioacct_dec(bp); + bp->b_hold--; goto out_unlock; } /* we are asked to drop the last reference */ - __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU, keep the reference to the @@ -1370,6 +1312,8 @@ __xfs_buf_ioend( bp->b_ops->verify_read(bp); if (!bp->b_error) bp->b_flags |= XBF_DONE; + if (bp->b_flags & XBF_READ_AHEAD) + percpu_counter_dec(&bp->b_target->bt_readahead_count); } else { if (!bp->b_error) { bp->b_flags &= ~XBF_WRITE_FAIL; @@ -1658,9 +1602,6 @@ xfs_buf_submit( */ bp->b_error = 0; - if (bp->b_flags & XBF_ASYNC) - xfs_buf_ioacct_inc(bp); - if ((bp->b_flags & XBF_WRITE) && !xfs_buf_verify_write(bp)) { xfs_force_shutdown(bp->b_mount, SHUTDOWN_CORRUPT_INCORE); xfs_buf_ioend(bp); @@ -1786,9 +1727,8 @@ xfs_buftarg_wait( struct xfs_buftarg *btp) { /* - * First wait on the buftarg I/O count for all in-flight buffers to be - * released. This is critical as new buffers do not make the LRU until - * they are released. + * First wait for all in-flight readahead buffers to be released. This is + * critical as new buffers do not make the LRU until they are released. * * Next, flush the buffer workqueue to ensure all completion processing * has finished. Just waiting on buffer locks is not sufficient for @@ -1797,7 +1737,7 @@ xfs_buftarg_wait( * all reference counts have been dropped before we start walking the * LRU list. */ - while (percpu_counter_sum(&btp->bt_io_count)) + while (percpu_counter_sum(&btp->bt_readahead_count)) delay(100); flush_workqueue(btp->bt_mount->m_buf_workqueue); } @@ -1914,8 +1854,8 @@ xfs_destroy_buftarg( struct xfs_buftarg *btp) { shrinker_free(btp->bt_shrinker); - ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); - percpu_counter_destroy(&btp->bt_io_count); + ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0); + percpu_counter_destroy(&btp->bt_readahead_count); list_lru_destroy(&btp->bt_lru); } @@ -1969,7 +1909,7 @@ xfs_init_buftarg( if (list_lru_init(&btp->bt_lru)) return -ENOMEM; - if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) + if (percpu_counter_init(&btp->bt_readahead_count, 0, GFP_KERNEL)) goto out_destroy_lru; btp->bt_shrinker = @@ -1983,7 +1923,7 @@ xfs_init_buftarg( return 0; out_destroy_io_count: - percpu_counter_destroy(&btp->bt_io_count); + percpu_counter_destroy(&btp->bt_readahead_count); out_destroy_lru: list_lru_destroy(&btp->bt_lru); return -ENOMEM; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 2e747555ad3fa..80e06eecaf56e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -27,7 +27,6 @@ struct xfs_buf; #define XBF_READ (1u << 0) /* buffer intended for reading from device */ #define XBF_WRITE (1u << 1) /* buffer intended for writing to device */ #define XBF_READ_AHEAD (1u << 2) /* asynchronous read-ahead */ -#define XBF_NO_IOACCT (1u << 3) /* bypass I/O accounting (non-LRU bufs) */ #define XBF_ASYNC (1u << 4) /* initiator will not wait for completion */ #define XBF_DONE (1u << 5) /* all pages in the buffer uptodate */ #define XBF_STALE (1u << 6) /* buffer has been staled, do not find it */ @@ -58,7 +57,6 @@ typedef unsigned int xfs_buf_flags_t; { XBF_READ, "READ" }, \ { XBF_WRITE, "WRITE" }, \ { XBF_READ_AHEAD, "READ_AHEAD" }, \ - { XBF_NO_IOACCT, "NO_IOACCT" }, \ { XBF_ASYNC, "ASYNC" }, \ { XBF_DONE, "DONE" }, \ { XBF_STALE, "STALE" }, \ @@ -77,7 +75,6 @@ typedef unsigned int xfs_buf_flags_t; * Internal state flags. */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ -#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ struct xfs_buf_cache { struct rhashtable bc_hash; @@ -116,7 +113,7 @@ struct xfs_buftarg { struct shrinker *bt_shrinker; struct list_lru bt_lru; - struct percpu_counter bt_io_count; + struct percpu_counter bt_readahead_count; struct ratelimit_state bt_ioerror_rl; /* Atomic write unit values */ diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index 07bebbfb16ee1..5b64a2b3b113f 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -117,7 +117,7 @@ xmbuf_free( struct xfs_buftarg *btp) { ASSERT(xfs_buftarg_is_mem(btp)); - ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); + ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0); trace_xmbuf_free(btp); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 477c5262cf912..b69356582b86f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -181,14 +181,11 @@ xfs_readsb( /* * Allocate a (locked) buffer to hold the superblock. This will be kept - * around at all times to optimize access to the superblock. Therefore, - * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count - * elevated. + * around at all times to optimize access to the superblock. */ reread: error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), XBF_NO_IOACCT, &bp, - buf_ops); + BTOBB(sector_size), 0, &bp, buf_ops); if (error) { if (loud) xfs_warn(mp, "SB validate failed with error %d.", error); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index d8e6d073d64dc..57bef567e0116 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1407,7 +1407,7 @@ xfs_rtmount_readsb( /* m_blkbb_log is not set up yet */ error = xfs_buf_read_uncached(mp->m_rtdev_targp, XFS_RTSB_DADDR, - mp->m_sb.sb_blocksize >> BBSHIFT, XBF_NO_IOACCT, &bp, + mp->m_sb.sb_blocksize >> BBSHIFT, 0, &bp, &xfs_rtsb_buf_ops); if (error) { xfs_warn(mp, "rt sb validate failed with error %d.", error); From 9b47d37496e2669078c8616334e5a7200f91681a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 15:48:55 -0800 Subject: [PATCH 081/503] xfs: remove the XBF_STALE check from xfs_buf_rele_cached xfs_buf_stale already set b_lru_ref to 0, and thus prevents the buffer from moving to the LRU. Remove the duplicate check. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index e161f3ab41087..5d560e9073f42 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -99,12 +99,6 @@ xfs_buf_stale( */ bp->b_flags &= ~_XBF_DELWRI_Q; - /* - * Once the buffer is marked stale and unlocked, a subsequent lookup - * could reset b_flags. There is no guarantee that the buffer is - * unaccounted (released to LRU) before that occurs. Drop in-flight - * status now to preserve accounting consistency. - */ spin_lock(&bp->b_lock); atomic_set(&bp->b_lru_ref, 0); if (!(bp->b_state & XFS_BSTATE_DISPOSE) && @@ -1033,7 +1027,7 @@ xfs_buf_rele_cached( } /* we are asked to drop the last reference */ - if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { + if (atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU, keep the reference to the * buffer for the LRU and clear the (now stale) dispose list From 709329c48214ad2acf12eed1b5c0eb798e40a64c Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Mon, 24 Feb 2025 22:59:34 -0800 Subject: [PATCH 082/503] Input: xpad - add support for ZOTAC Gaming Zone ZOTAC Gaming Zone is ZOTAC's 2024 handheld release. As it is common with these handhelds, it uses a hybrid USB device with an xpad endpoint, a keyboard endpoint, and a vendor-specific endpoint for RGB control et al. Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250222170010.188761-2-lkml@antheas.dev Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 16493235bf9ee..29d7e6eb51cf0 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -349,6 +349,7 @@ static const struct xpad_device { { 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 }, + { 0x1ee9, 0x1590, "ZOTAC Gaming Zone", 0, XTYPE_XBOX360 }, { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, @@ -538,6 +539,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1a86), /* QH Electronics */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ + XPAD_XBOX360_VENDOR(0x1ee9), /* ZOTAC Technology Limited */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2345), /* Machenike Controllers */ From 95a54a96f657fd069d2a9922b6c2d293a72a001f Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Mon, 24 Feb 2025 23:00:29 -0800 Subject: [PATCH 083/503] Input: xpad - add support for TECNO Pocket Go TECNO Pocket Go is a kickstarter handheld by manufacturer TECNO Mobile. It poses a unique feature: it does not have a display. Instead, the handheld is essentially a pc in a controller. As customary, it has an xpad endpoint, a keyboard endpoint, and a vendor endpoint for its vendor software. Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250222170010.188761-3-lkml@antheas.dev Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 29d7e6eb51cf0..79e1c8f1f99c2 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -382,6 +382,7 @@ static const struct xpad_device { { 0x2563, 0x058d, "OneXPlayer Gamepad", 0, XTYPE_XBOX360 }, { 0x294b, 0x3303, "Snakebyte GAMEPAD BASE X", 0, XTYPE_XBOXONE }, { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, + { 0x2993, 0x2001, "TECNO Pocket Go", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 }, @@ -548,6 +549,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x2563), /* OneXPlayer Gamepad */ XPAD_XBOX360_VENDOR(0x260d), /* Dareu H101 */ XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */ + XPAD_XBOX360_VENDOR(0x2993), /* TECNO Mobile */ XPAD_XBOX360_VENDOR(0x2c22), /* Qanba Controllers */ XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Controllers */ XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Controllers */ From 659a7614dd72e2835ac0b220c2fa68fabd8d1df9 Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Mon, 24 Feb 2025 23:01:55 -0800 Subject: [PATCH 084/503] Input: xpad - rename QH controller to Legion Go S The QH controller is actually the controller of the Legion Go S, with the manufacturer string wch.cn and product name Legion Go S in its USB descriptor. A cursory lookup of the VID reveals the same. Therefore, rename the xpad entries to match. Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250222170010.188761-4-lkml@antheas.dev Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 79e1c8f1f99c2..c33e6f33265ba 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -312,7 +312,7 @@ static const struct xpad_device { { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, - { 0x1a86, 0xe310, "QH Electronics Controller", 0, XTYPE_XBOX360 }, + { 0x1a86, 0xe310, "Legion Go S", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0x0130, "Ion Drum Rocker", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, @@ -538,7 +538,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ - XPAD_XBOX360_VENDOR(0x1a86), /* QH Electronics */ + XPAD_XBOX360_VENDOR(0x1a86), /* Nanjing Qinheng Microelectronics (WCH) */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ XPAD_XBOX360_VENDOR(0x1ee9), /* ZOTAC Technology Limited */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ From 68283c1cb573143c0b7515e93206f3503616bc10 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Fri, 7 Feb 2025 21:02:41 +0100 Subject: [PATCH 085/503] pinctrl: bcm281xx: Fix incorrect regmap max_registers value The max_registers value does not take into consideration the stride; currently, it's set to the number of the last pin, but this does not accurately represent the final register. Fix this by multiplying the current value by 4. Fixes: 54b1aa5a5b16 ("ARM: pinctrl: Add Broadcom Capri pinctrl driver") Signed-off-by: Artur Weber Link: https://lore.kernel.org/20250207-bcm21664-pinctrl-v1-2-e7cfac9b2d3b@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-bcm281xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c index 73dbf29c002f3..cf6efa9c0364a 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c @@ -974,7 +974,7 @@ static const struct regmap_config bcm281xx_pinctrl_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = BCM281XX_PIN_VC_CAM3_SDA, + .max_register = BCM281XX_PIN_VC_CAM3_SDA * 4, }; static int bcm281xx_pinctrl_get_groups_count(struct pinctrl_dev *pctldev) From acf40ab42799e4ae1397ee6f5c5941092d66f999 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Wed, 12 Feb 2025 18:05:32 +0800 Subject: [PATCH 086/503] pinctrl: nuvoton: npcm8xx: Add NULL check in npcm8xx_gpio_fw devm_kasprintf() calls can return null pointers on failure. But the return values were not checked in npcm8xx_gpio_fw(). Add NULL check in npcm8xx_gpio_fw(), to handle kernel NULL pointer dereference error. Fixes: acf4884a5717 ("pinctrl: nuvoton: add NPCM8XX pinctrl and GPIO driver") Signed-off-by: Charles Han Link: https://lore.kernel.org/20250212100532.4317-1-hanchunchao@inspur.com Signed-off-by: Linus Walleij --- drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c index 471f644c5eef2..d09a5e9b2eca5 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c @@ -2374,6 +2374,9 @@ static int npcm8xx_gpio_fw(struct npcm8xx_pinctrl *pctrl) pctrl->gpio_bank[id].gc.parent = dev; pctrl->gpio_bank[id].gc.fwnode = child; pctrl->gpio_bank[id].gc.label = devm_kasprintf(dev, GFP_KERNEL, "%pfw", child); + if (pctrl->gpio_bank[id].gc.label == NULL) + return -ENOMEM; + pctrl->gpio_bank[id].gc.dbg_show = npcmgpio_dbg_show; pctrl->gpio_bank[id].direction_input = pctrl->gpio_bank[id].gc.direction_input; pctrl->gpio_bank[id].gc.direction_input = npcmgpio_direction_input; From 7ff4faba63571c51004280f7eb5d6362b15ec61f Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Tue, 18 Feb 2025 08:31:44 +0800 Subject: [PATCH 087/503] pinctrl: spacemit: enable config option Pinctrl is an essential driver for SpacemiT's SoC, The uart driver requires it, same as sd card driver, so let's enable it by default for this SoC. The CONFIG_PINCTRL_SPACEMIT_K1 isn't enabled when using 'make defconfig' to select kernel configuration options. This result in a broken uart driver where fail at probe() stage due to no pins found. Fixes: a83c29e1d145 ("pinctrl: spacemit: add support for SpacemiT K1 SoC") Reported-by: Alex Elder Acked-by: Conor Dooley Tested-by: Alex Elder Signed-off-by: Yixun Lan Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Link: https://lore.kernel.org/20250218-k1-pinctrl-option-v3-1-36e031e0da1b@gentoo.org Signed-off-by: Linus Walleij --- arch/riscv/Kconfig.socs | 1 + drivers/pinctrl/spacemit/Kconfig | 3 ++- drivers/pinctrl/spacemit/pinctrl-k1.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 1916cf7ba450e..17606940bb523 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -26,6 +26,7 @@ config ARCH_SOPHGO config ARCH_SPACEMIT bool "SpacemiT SoCs" + select PINCTRL help This enables support for SpacemiT SoC platform hardware. diff --git a/drivers/pinctrl/spacemit/Kconfig b/drivers/pinctrl/spacemit/Kconfig index 168f8a5ffbb95..a2f98b3f8a755 100644 --- a/drivers/pinctrl/spacemit/Kconfig +++ b/drivers/pinctrl/spacemit/Kconfig @@ -4,9 +4,10 @@ # config PINCTRL_SPACEMIT_K1 - tristate "SpacemiT K1 SoC Pinctrl driver" + bool "SpacemiT K1 SoC Pinctrl driver" depends on ARCH_SPACEMIT || COMPILE_TEST depends on OF + default y select GENERIC_PINCTRL_GROUPS select GENERIC_PINMUX_FUNCTIONS select GENERIC_PINCONF diff --git a/drivers/pinctrl/spacemit/pinctrl-k1.c b/drivers/pinctrl/spacemit/pinctrl-k1.c index a32579d736130..59fd555ff38d4 100644 --- a/drivers/pinctrl/spacemit/pinctrl-k1.c +++ b/drivers/pinctrl/spacemit/pinctrl-k1.c @@ -1044,7 +1044,7 @@ static struct platform_driver k1_pinctrl_driver = { .of_match_table = k1_pinctrl_ids, }, }; -module_platform_driver(k1_pinctrl_driver); +builtin_platform_driver(k1_pinctrl_driver); MODULE_AUTHOR("Yixun Lan "); MODULE_DESCRIPTION("Pinctrl driver for the SpacemiT K1 SoC"); From 00817f0f1c45b007965f5676b9a2013bb39c7228 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 24 Feb 2025 17:13:30 -0800 Subject: [PATCH 088/503] nvme-ioctl: fix leaked requests on mapping error All the callers assume nvme_map_user_request() frees the request on a failure. This wasn't happening on invalid metadata or io_uring command flags, so we've been leaking those requests. Fixes: 23fd22e55b767b ("nvme: wire up fixed buffer support for nvme passthrough") Fixes: 7c2fd76048e95d ("nvme: fix metadata handling in nvme-passthrough") Reviewed-by: Damien Le Moal Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index b1b46c2713e1c..24e2c702da7a2 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -128,8 +128,10 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, if (!nvme_ctrl_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked data buffer\n"); if (has_metadata) { - if (!supports_metadata) - return -EINVAL; + if (!supports_metadata) { + ret = -EINVAL; + goto out; + } if (!nvme_ctrl_meta_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked metadata buffer\n"); @@ -139,8 +141,10 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct iov_iter iter; /* fixedbufs is only for non-vectored io */ - if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) - return -EINVAL; + if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) { + ret = -EINVAL; + goto out; + } ret = io_uring_cmd_import_fixed(ubuffer, bufflen, rq_data_dir(req), &iter, ioucmd); if (ret < 0) From 729d163232971672d0f41b93c02092fb91f0e758 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:22 +0100 Subject: [PATCH 089/503] Input: i8042 - swap old quirk combination with new quirk for NHxxRZQ Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. With the old i8042 quirks this devices keyboard is sometimes laggy after resume. With the new quirk this issue doesn't happen. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 127cfdc8668a0..a764248ea11d3 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1209,18 +1209,10 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { - /* - * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes - * the keyboard very laggy for ~5 seconds after boot and - * sometimes also after resume. - * However both are required for the keyboard to not fail - * completely sometimes after boot or resume. - */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "NHxxRZQ"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { From 9ed468e17d5b80e7116fd35842df3648e808ae47 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:23 +0100 Subject: [PATCH 090/503] Input: i8042 - add required quirks for missing old boardnames Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. The PB71RD keyboard is sometimes laggy after resume and the PC70DR, PB51RF, P640RE, and PCX0DX_GN20 keyboard is sometimes unresponsive after resume. This quirk fixes that. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-2-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index a764248ea11d3..57a6eec4c2a5d 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1260,6 +1260,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "P640RE"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, { /* * This is only a partial board_name and might be followed by @@ -1335,6 +1341,24 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PB51RF"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PB71RD"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PC70DR"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"), @@ -1342,6 +1366,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PCX0DX_GN20"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, /* See comment on TUXEDO InfinityBook S17 Gen6 / Clevo NS70MU above */ { .matches = { From 75ee4ebebbbe8dc4b55ba37f388924fa96bf1564 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:24 +0100 Subject: [PATCH 091/503] Input: i8042 - swap old quirk combination with new quirk for several devices Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. While the old quirk combination did not show negative effects on these devices specifically, the new quirk works just as well and seems more stable in general. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-3-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 40 ++++++++++----------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 57a6eec4c2a5d..9df1ef6032dd0 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1080,16 +1080,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"), DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"), DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* Mivvy M310 */ @@ -1171,8 +1169,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "LAPQC71A"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1205,8 +1202,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1218,8 +1214,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, /* * At least one modern Clevo barebone has the touchpad connected both @@ -1235,17 +1230,15 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NS50MU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX | - SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | - SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_NOAUX | + SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NS50_70MU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX | - SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | - SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_NOAUX | + SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1319,8 +1312,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1338,8 +1330,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1363,8 +1354,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1383,15 +1373,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "X170SM"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "X170KM-G"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* From d85862ccca452eeb19329e9f4f9a6ce1d1e53561 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:25 +0100 Subject: [PATCH 092/503] Input: i8042 - swap old quirk combination with new quirk for more devices Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. We could not activly retest these devices because we no longer have them in our archive, but based on the other old Clevo barebones we tested where the new quirk had the same or a better behaviour I think it would be good to apply it on these too. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-4-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 31 +++++++++------------------ 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 9df1ef6032dd0..6ed9fc34948cb 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1157,9 +1157,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, /* * A lot of modern Clevo barebones have touchpad and/or keyboard issues - * after suspend fixable with nomux + reset + noloop + nopnp. Luckily, - * none of them have an external PS/2 port so this can safely be set for - * all of them. + * after suspend fixable with the forcenorestore quirk. * Clevo barebones come with board_vendor and/or system_vendor set to * either the very generic string "Notebook" and/or a different value * for each individual reseller. The only somewhat universal way to @@ -1175,22 +1173,19 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "LAPQC71B"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "N140CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "N141CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1250,8 +1245,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1268,16 +1262,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */ .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1288,8 +1280,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1300,8 +1291,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1323,8 +1313,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { From 5a4041f2c47247575a6c2e53ce14f7b0ac946c33 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 19 Feb 2025 16:02:11 +0900 Subject: [PATCH 093/503] btrfs: zoned: fix extent range end unlock in cow_file_range() Running generic/751 on the for-next branch often results in a hang like below. They are both stack by locking an extent. This suggests someone forget to unlock an extent. INFO: task kworker/u128:1:12 blocked for more than 323 seconds. Not tainted 6.13.0-BTRFS-ZNS+ #503 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u128:1 state:D stack:0 pid:12 tgid:12 ppid:2 flags:0x00004000 Workqueue: btrfs-fixup btrfs_work_helper [btrfs] Call Trace: __schedule+0x534/0xdd0 schedule+0x39/0x140 __lock_extent+0x31b/0x380 [btrfs] ? __pfx_autoremove_wake_function+0x10/0x10 btrfs_writepage_fixup_worker+0xf1/0x3a0 [btrfs] btrfs_work_helper+0xff/0x480 [btrfs] ? lock_release+0x178/0x2c0 process_one_work+0x1ee/0x570 ? srso_return_thunk+0x5/0x5f worker_thread+0x1d1/0x3b0 ? __pfx_worker_thread+0x10/0x10 kthread+0x10b/0x230 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x30/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 INFO: task kworker/u134:0:184 blocked for more than 323 seconds. Not tainted 6.13.0-BTRFS-ZNS+ #503 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u134:0 state:D stack:0 pid:184 tgid:184 ppid:2 flags:0x00004000 Workqueue: writeback wb_workfn (flush-btrfs-4) Call Trace: __schedule+0x534/0xdd0 schedule+0x39/0x140 __lock_extent+0x31b/0x380 [btrfs] ? __pfx_autoremove_wake_function+0x10/0x10 find_lock_delalloc_range+0xdb/0x260 [btrfs] writepage_delalloc+0x12f/0x500 [btrfs] ? srso_return_thunk+0x5/0x5f extent_write_cache_pages+0x232/0x840 [btrfs] btrfs_writepages+0x72/0x130 [btrfs] do_writepages+0xe7/0x260 ? srso_return_thunk+0x5/0x5f ? lock_acquire+0xd2/0x300 ? srso_return_thunk+0x5/0x5f ? find_held_lock+0x2b/0x80 ? wbc_attach_and_unlock_inode.part.0+0x102/0x250 ? wbc_attach_and_unlock_inode.part.0+0x102/0x250 __writeback_single_inode+0x5c/0x4b0 writeback_sb_inodes+0x22d/0x550 __writeback_inodes_wb+0x4c/0xe0 wb_writeback+0x2f6/0x3f0 wb_workfn+0x32a/0x510 process_one_work+0x1ee/0x570 ? srso_return_thunk+0x5/0x5f worker_thread+0x1d1/0x3b0 ? __pfx_worker_thread+0x10/0x10 kthread+0x10b/0x230 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x30/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 This happens because we have another success path for the zoned mode. When there is no active zone available, btrfs_reserve_extent() returns -EAGAIN. In this case, we have two reactions. (1) If the given range is never allocated, we can only wait for someone to finish a zone, so wait on BTRFS_FS_NEED_ZONE_FINISH bit and retry afterward. (2) Or, if some allocations are already done, we must bail out and let the caller to send IOs for the allocation. This is because these IOs may be necessary to finish a zone. The commit 06f364284794 ("btrfs: do proper folio cleanup when cow_file_range() failed") moved the unlock code from the inside of the loop to the outside. So, previously, the allocated extents are unlocked just after the allocation and so before returning from the function. However, they are no longer unlocked on the case (2) above. That caused the hang issue. Fix the issue by modifying the 'end' to the end of the allocated range. Then, we can exit the loop and the same unlock code can properly handle the case. Reported-by: Shin'ichiro Kawasaki Tested-by: Johannes Thumshirn Fixes: 06f364284794 ("btrfs: do proper folio cleanup when cow_file_range() failed") CC: stable@vger.kernel.org Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/inode.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fe2c810335ff0..69e90ed33cfe5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1382,8 +1382,13 @@ static noinline int cow_file_range(struct btrfs_inode *inode, continue; } if (done_offset) { - *done_offset = start - 1; - return 0; + /* + * Move @end to the end of the processed range, + * and exit the loop to unlock the processed extents. + */ + end = start - 1; + ret = 0; + break; } ret = -ENOSPC; } From 2df2c6ed89600a02e1c7a581a6a55e53c38ce0f5 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 25 Feb 2025 20:26:14 +0100 Subject: [PATCH 094/503] btrfs: replace deprecated strncpy() with strscpy() strncpy() is deprecated for NUL-terminated destination buffers. Use strscpy() instead and don't zero-initialize the param array. Link: https://github.com/KSPP/linux/issues/90 Cc: linux-hardening@vger.kernel.org Signed-off-by: Thorsten Blum Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 53b846d99ecea..14f53f7575553 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1330,13 +1330,13 @@ MODULE_PARM_DESC(read_policy, int btrfs_read_policy_to_enum(const char *str, s64 *value_ret) { - char param[32] = { 0 }; + char param[32]; char __maybe_unused *value_str; if (!str || strlen(str) == 0) return 0; - strncpy(param, str, sizeof(param) - 1); + strscpy(param, str); #ifdef CONFIG_BTRFS_EXPERIMENTAL /* Separate value from input in policy:value format. */ From 99ca2c28e6b68084a0fb65585df09b9e28c3ec16 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2025 11:24:50 +0100 Subject: [PATCH 095/503] wifi: mac80211: fix MLE non-inheritance parsing The code is erroneously applying the non-inheritance element to the inner elements rather than the outer, which is clearly completely wrong. Fix it by finding the MLE basic element at the beginning, and then applying the non-inheritance for the outer parsing. While at it, do some general cleanups such as not allowing callers to try looking for a specific non-transmitted BSS and link at the same time. Fixes: 45ebac4f059b ("wifi: mac80211: Parse station profile from association response") Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20250221112451.b46d42f45b66.If5b95dc3c80208e0c62d8895fb6152aa54b6620b@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 1 + net/mac80211/parse.c | 127 ++++++++++++++++++++++++++++--------------- 2 files changed, 83 insertions(+), 45 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f8d52b3b0d0e4..36a9be9a66c8e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4959,6 +4959,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, parse_params.start = bss_ies->data; parse_params.len = bss_ies->len; parse_params.bss = cbss; + parse_params.link_id = -1; bss_elems = ieee802_11_parse_elems_full(&parse_params); if (!bss_elems) { ret = false; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index cd318c1c67bec..3d5d6658fe8d5 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -47,6 +47,8 @@ struct ieee80211_elems_parse { /* The EPCS Multi-Link element in the original elements */ const struct element *ml_epcs_elem; + bool multi_link_inner; + /* * scratch buffer that can be used for various element parsing related * tasks, e.g., element de-fragmentation etc. @@ -152,12 +154,11 @@ ieee80211_parse_extension_element(u32 *crc, switch (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: - if (elems_parse->ml_basic_elem) { + if (elems_parse->multi_link_inner) { elems->parse_error |= IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC; break; } - elems_parse->ml_basic_elem = elem; break; case IEEE80211_ML_CONTROL_TYPE_RECONF: elems_parse->ml_reconf_elem = elem; @@ -866,21 +867,36 @@ ieee80211_mle_get_sta_prof(struct ieee80211_elems_parse *elems_parse, } } -static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, - struct ieee80211_elems_parse_params *params) +static const struct element * +ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse, + struct ieee80211_elems_parse_params *params, + struct ieee80211_elems_parse_params *sub) { struct ieee802_11_elems *elems = &elems_parse->elems; struct ieee80211_mle_per_sta_profile *prof; - struct ieee80211_elems_parse_params sub = { - .mode = params->mode, - .action = params->action, - .from_ap = params->from_ap, - .link_id = -1, - }; - ssize_t ml_len = elems->ml_basic_len; - const struct element *non_inherit = NULL; + const struct element *tmp; + ssize_t ml_len; const u8 *end; + if (params->mode < IEEE80211_CONN_MODE_EHT) + return NULL; + + for_each_element_extid(tmp, WLAN_EID_EXT_EHT_MULTI_LINK, + elems->ie_start, elems->total_len) { + const struct ieee80211_multi_link_elem *mle = + (void *)tmp->data + 1; + + if (!ieee80211_mle_size_ok(tmp->data + 1, tmp->datalen - 1)) + continue; + + if (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE) != + IEEE80211_ML_CONTROL_TYPE_BASIC) + continue; + + elems_parse->ml_basic_elem = tmp; + break; + } + ml_len = cfg80211_defragment_element(elems_parse->ml_basic_elem, elems->ie_start, elems->total_len, @@ -891,26 +907,26 @@ static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, WLAN_EID_FRAGMENT); if (ml_len < 0) - return; + return NULL; elems->ml_basic = (const void *)elems_parse->scratch_pos; elems->ml_basic_len = ml_len; elems_parse->scratch_pos += ml_len; if (params->link_id == -1) - return; + return NULL; ieee80211_mle_get_sta_prof(elems_parse, params->link_id); prof = elems->prof; if (!prof) - return; + return NULL; /* check if we have the 4 bytes for the fixed part in assoc response */ if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) { elems->prof = NULL; elems->sta_prof_len = 0; - return; + return NULL; } /* @@ -919,13 +935,17 @@ static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, * the -1 is because the 'sta_info_len' is accounted to as part of the * per-STA profile, but not part of the 'u8 variable[]' portion. */ - sub.start = prof->variable + prof->sta_info_len - 1 + 4; + sub->start = prof->variable + prof->sta_info_len - 1 + 4; end = (const u8 *)prof + elems->sta_prof_len; - sub.len = end - sub.start; + sub->len = end - sub->start; - non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - sub.start, sub.len); - _ieee802_11_parse_elems_full(&sub, elems_parse, non_inherit); + sub->mode = params->mode; + sub->action = params->action; + sub->from_ap = params->from_ap; + sub->link_id = -1; + + return cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub->start, sub->len); } static void @@ -973,15 +993,19 @@ ieee80211_mle_defrag_epcs(struct ieee80211_elems_parse *elems_parse) struct ieee802_11_elems * ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) { + struct ieee80211_elems_parse_params sub = {}; struct ieee80211_elems_parse *elems_parse; - struct ieee802_11_elems *elems; const struct element *non_inherit = NULL; - u8 *nontransmitted_profile; - int nontransmitted_profile_len = 0; + struct ieee802_11_elems *elems; size_t scratch_len = 3 * params->len; + bool multi_link_inner = false; BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0); + /* cannot parse for both a specific link and non-transmitted BSS */ + if (WARN_ON(params->link_id >= 0 && params->bss)) + return NULL; + elems_parse = kzalloc(struct_size(elems_parse, scratch, scratch_len), GFP_ATOMIC); if (!elems_parse) @@ -998,34 +1022,47 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) ieee80211_clear_tpe(&elems->tpe); ieee80211_clear_tpe(&elems->csa_tpe); - nontransmitted_profile = elems_parse->scratch_pos; - nontransmitted_profile_len = - ieee802_11_find_bssid_profile(params->start, params->len, - elems, params->bss, - nontransmitted_profile); - elems_parse->scratch_pos += nontransmitted_profile_len; - non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - nontransmitted_profile, - nontransmitted_profile_len); + /* + * If we're looking for a non-transmitted BSS then we cannot at + * the same time be looking for a second link as the two can only + * appear in the same frame carrying info for different BSSes. + * + * In any case, we only look for one at a time, as encoded by + * the WARN_ON above. + */ + if (params->bss) { + int nontx_len = + ieee802_11_find_bssid_profile(params->start, + params->len, + elems, params->bss, + elems_parse->scratch_pos); + sub.start = elems_parse->scratch_pos; + sub.mode = params->mode; + sub.len = nontx_len; + sub.action = params->action; + sub.link_id = params->link_id; + + /* consume the space used for non-transmitted profile */ + elems_parse->scratch_pos += nontx_len; + + non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub.start, nontx_len); + } else { + /* must always parse to get elems_parse->ml_basic_elem */ + non_inherit = ieee80211_prep_mle_link_parse(elems_parse, params, + &sub); + multi_link_inner = true; + } elems->crc = _ieee802_11_parse_elems_full(params, elems_parse, non_inherit); - /* Override with nontransmitted profile, if found */ - if (nontransmitted_profile_len) { - struct ieee80211_elems_parse_params sub = { - .mode = params->mode, - .start = nontransmitted_profile, - .len = nontransmitted_profile_len, - .action = params->action, - .link_id = params->link_id, - }; - + /* Override with nontransmitted/per-STA profile if found */ + if (sub.len) { + elems_parse->multi_link_inner = multi_link_inner; _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } - ieee80211_mle_parse_link(elems_parse, params); - ieee80211_mle_defrag_reconf(elems_parse); ieee80211_mle_defrag_epcs(elems_parse); From 130067e9c13bdc4820748ef16076a6972364745f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2025 11:24:51 +0100 Subject: [PATCH 096/503] wifi: mac80211: fix vendor-specific inheritance If there's any vendor-specific element in the subelements then the outer element parsing must not parse any vendor element at all. This isn't implemented correctly now due to parsing into the pointers and then overriding them, so explicitly skip vendor elements if any exist in the sub- elements (non-transmitted profile or per-STA profile). Fixes: 671042a4fb77 ("mac80211: support non-inheritance element") Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20250221112451.fd71e5268840.I9db3e6a3367e6ff38d052d07dc07005f0dd3bd5c@changeid Signed-off-by: Johannes Berg --- net/mac80211/parse.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 3d5d6658fe8d5..6da39c864f45b 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -48,6 +48,7 @@ struct ieee80211_elems_parse { const struct element *ml_epcs_elem; bool multi_link_inner; + bool skip_vendor; /* * scratch buffer that can be used for various element parsing related @@ -400,6 +401,9 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_VENDOR_SPECIFIC: + if (elems_parse->skip_vendor) + break; + if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && pos[2] == 0xf2) { /* Microsoft OUI (00:50:F2) */ @@ -1054,12 +1058,16 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) multi_link_inner = true; } + elems_parse->skip_vendor = + cfg80211_find_elem(WLAN_EID_VENDOR_SPECIFIC, + sub.start, sub.len); elems->crc = _ieee802_11_parse_elems_full(params, elems_parse, non_inherit); /* Override with nontransmitted/per-STA profile if found */ if (sub.len) { elems_parse->multi_link_inner = multi_link_inner; + elems_parse->skip_vendor = false; _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } From 861d0445e72e9e33797f2ceef882c74decb16a87 Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Thu, 13 Feb 2025 22:43:30 +0100 Subject: [PATCH 097/503] wifi: mac80211: Fix sparse warning for monitor_sdata Use rcu_access_pointer() to avoid sparse warning in drv_remove_interface(). Signed-off-by: Alexander Wetzel Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502130534.bVrZZBK0-lkp@intel.com/ Fixes: 646262c71aca ("wifi: mac80211: remove debugfs dir for virtual monitor") Link: https://patch.msgid.link/20250213214330.6113-1-Alexander@wetzel-home.de Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 2fc60e1e77a55..35349a7f16cb4 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -121,7 +121,7 @@ void drv_remove_interface(struct ieee80211_local *local, * The virtual monitor interface doesn't get a debugfs * entry, so it's exempt here. */ - if (sdata != local->monitor_sdata) + if (sdata != rcu_access_pointer(local->monitor_sdata)) ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links); From 8c3170628a9ce24a59647bd24f897e666af919b8 Mon Sep 17 00:00:00 2001 From: Matthias Proske Date: Wed, 12 Feb 2025 19:59:35 +0100 Subject: [PATCH 098/503] wifi: brcmfmac: keep power during suspend if board requires it After commit 92cadedd9d5f ("brcmfmac: Avoid keeping power to SDIO card unless WOWL is used"), the wifi adapter by default is turned off on suspend and then re-probed on resume. This conflicts with some embedded boards that require to remain powered. They will fail on resume with: brcmfmac: brcmf_sdio_bus_rxctl: resumed on timeout ieee80211 phy1: brcmf_bus_started: failed: -110 ieee80211 phy1: brcmf_attach: dongle is not responding: err=-110 brcmfmac: brcmf_sdio_firmware_callback: brcmf_attach failed This commit checks for the Device Tree property 'cap-power-off-cards'. If this property is not set, it means that we do not have the capability to power off and should therefore remain powered. Signed-off-by: Matthias Proske Acked-by: Arend van Spriel Link: https://patch.msgid.link/20250212185941.146958-2-email@matthias-proske.de Signed-off-by: Johannes Berg --- .../broadcom/brcm80211/brcmfmac/bcmsdh.c | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index 60eb95fc19a5a..6bc107476a2a3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -1172,6 +1172,7 @@ static int brcmf_ops_sdio_suspend(struct device *dev) struct brcmf_bus *bus_if; struct brcmf_sdio_dev *sdiodev; mmc_pm_flag_t sdio_flags; + bool cap_power_off; int ret = 0; func = container_of(dev, struct sdio_func, dev); @@ -1179,19 +1180,23 @@ static int brcmf_ops_sdio_suspend(struct device *dev) if (func->num != 1) return 0; + cap_power_off = !!(func->card->host->caps & MMC_CAP_POWER_OFF_CARD); bus_if = dev_get_drvdata(dev); sdiodev = bus_if->bus_priv.sdio; - if (sdiodev->wowl_enabled) { + if (sdiodev->wowl_enabled || !cap_power_off) { brcmf_sdiod_freezer_on(sdiodev); brcmf_sdio_wd_timer(sdiodev->bus, 0); sdio_flags = MMC_PM_KEEP_POWER; - if (sdiodev->settings->bus.sdio.oob_irq_supported) - enable_irq_wake(sdiodev->settings->bus.sdio.oob_irq_nr); - else - sdio_flags |= MMC_PM_WAKE_SDIO_IRQ; + + if (sdiodev->wowl_enabled) { + if (sdiodev->settings->bus.sdio.oob_irq_supported) + enable_irq_wake(sdiodev->settings->bus.sdio.oob_irq_nr); + else + sdio_flags |= MMC_PM_WAKE_SDIO_IRQ; + } if (sdio_set_host_pm_flags(sdiodev->func1, sdio_flags)) brcmf_err("Failed to set pm_flags %x\n", sdio_flags); @@ -1213,18 +1218,19 @@ static int brcmf_ops_sdio_resume(struct device *dev) struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio; struct sdio_func *func = container_of(dev, struct sdio_func, dev); int ret = 0; + bool cap_power_off = !!(func->card->host->caps & MMC_CAP_POWER_OFF_CARD); brcmf_dbg(SDIO, "Enter: F%d\n", func->num); if (func->num != 2) return 0; - if (!sdiodev->wowl_enabled) { + if (!sdiodev->wowl_enabled && cap_power_off) { /* bus was powered off and device removed, probe again */ ret = brcmf_sdiod_probe(sdiodev); if (ret) brcmf_err("Failed to probe device on resume\n"); } else { - if (sdiodev->settings->bus.sdio.oob_irq_supported) + if (sdiodev->wowl_enabled && sdiodev->settings->bus.sdio.oob_irq_supported) disable_irq_wake(sdiodev->settings->bus.sdio.oob_irq_nr); brcmf_sdiod_freezer_off(sdiodev); From e4cf8ec4de4e13f156c1d61977d282d90c221085 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 20 Feb 2025 08:14:43 +0000 Subject: [PATCH 099/503] affs: generate OFS sequence numbers starting at 1 If I write a file to an OFS floppy image, and try to read it back on an emulated Amiga running Workbench 1.3, the Amiga reports a disk error trying to read the file. (That is, it's unable to read it _at all_, even to copy it to the NIL: device. It isn't a matter of getting the wrong data and being unable to parse the file format.) This is because the 'sequence number' field in the OFS data block header is supposed to be based at 1, but affs writes it based at 0. All three locations changed by this patch were setting the sequence number to a variable 'bidx' which was previously obtained by dividing a file position by bsize, so bidx will naturally use 0 for the first block. Therefore all three should add 1 to that value before writing it into the sequence number field. With this change, the Amiga successfully reads the file. For data block reference: https://wiki.osdev.org/FFS_(Amiga) Signed-off-by: Simon Tatham Signed-off-by: David Sterba --- fs/affs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index a5a861dd52230..226308f8627e7 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -596,7 +596,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) BUG_ON(tmp > bsize); AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); - AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); + AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp); affs_fix_checksum(sb, bh); bh->b_state &= ~(1UL << BH_New); @@ -746,7 +746,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); - AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); + AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(bsize); AFFS_DATA_HEAD(bh)->next = 0; bh->b_state &= ~(1UL << BH_New); @@ -780,7 +780,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); - AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); + AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp); AFFS_DATA_HEAD(bh)->next = 0; bh->b_state &= ~(1UL << BH_New); From 011ea742a25a77bac3d995f457886a67d178c6f0 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 20 Feb 2025 08:14:44 +0000 Subject: [PATCH 100/503] affs: don't write overlarge OFS data block size fields If a data sector on an OFS floppy contains a value > 0x1e8 (the largest amount of data that fits in the sector after its header), then an Amiga reading the file can return corrupt data, by taking the overlarge size at its word and reading past the end of the buffer it read the disk sector into! The cause: when affs_write_end_ofs() writes data to an OFS filesystem, the new size field for a data block was computed by adding the amount of data currently being written (into the block) to the existing value of the size field. This is correct if you're extending the file at the end, but if you seek backwards in the file and overwrite _existing_ data, it can lead to the size field being larger than the maximum legal value. This commit changes the calculation so that it sets the size field to the max of its previous size and the position within the block that we just wrote up to. Signed-off-by: Simon Tatham Signed-off-by: David Sterba --- fs/affs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index 226308f8627e7..7a71018e3f675 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -724,7 +724,8 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, tmp = min(bsize - boff, to - from); BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); - be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp); + AFFS_DATA_HEAD(bh)->size = cpu_to_be32( + max(boff + tmp, be32_to_cpu(AFFS_DATA_HEAD(bh)->size))); affs_fix_checksum(sb, bh); mark_buffer_dirty_inode(bh, inode); written += tmp; From 75f1f311d883dfaffb98be3c1da208d6ed5d4df9 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 26 Feb 2025 13:38:19 -0600 Subject: [PATCH 101/503] Revert "of: reserved-memory: Fix using wrong number of cells to get property 'alignment'" This reverts commit 267b21d0bef8e67dbe6c591c9991444e58237ec9. Turns out some DTs do depend on this behavior. Specifically, a downstream Pixel 6 DT. Revert the change at least until we can decide if the DT spec can be changed instead. Cc: stable@vger.kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/of_reserved_mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 75e819f66a561..ee2e31522d7ef 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -415,12 +415,12 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam prop = of_get_flat_dt_prop(node, "alignment", &len); if (prop) { - if (len != dt_root_size_cells * sizeof(__be32)) { + if (len != dt_root_addr_cells * sizeof(__be32)) { pr_err("invalid alignment property in '%s' node.\n", uname); return -EINVAL; } - align = dt_mem_next_cell(dt_root_size_cells, &prop); + align = dt_mem_next_cell(dt_root_addr_cells, &prop); } nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; From b8501febdc513541afc5663d063bfac7ea575b71 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 29 Jan 2025 16:45:19 +0100 Subject: [PATCH 102/503] clk: qcom: dispcc-sm8750: Drop incorrect CLK_SET_RATE_PARENT on byte intf parent The parent of disp_cc_mdss_byte0_intf_clk clock should not propagate up the rates, because this messes up entire clock hierarchy when setting clock rates in MSM DSI driver. The dsi_link_clk_set_rate_6g() first sets entire clock hierarchy rates via dev_pm_opp_set_rate() on byte clock and then sets individual clock rates, like pixel and byte_intf clocks, to proper frequencies. Having CLK_SET_RATE_PARENT caused that entire tree was re-calced and the byte clock received halved frequency. Drop CLK_SET_RATE_PARENT to fix this and align with SM8550 and SM8650. Fixes: f1080d8dab0f ("clk: qcom: dispcc-sm8750: Add SM8750 Display clock controller") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250129154519.209791-1-krzysztof.kozlowski@linaro.org Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Signed-off-by: Stephen Boyd --- drivers/clk/qcom/dispcc-sm8750.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/clk/qcom/dispcc-sm8750.c b/drivers/clk/qcom/dispcc-sm8750.c index 0358dff91da5d..e9bca179998b9 100644 --- a/drivers/clk/qcom/dispcc-sm8750.c +++ b/drivers/clk/qcom/dispcc-sm8750.c @@ -827,7 +827,6 @@ static struct clk_regmap_div disp_cc_mdss_byte0_div_clk_src = { &disp_cc_mdss_byte0_clk_src.clkr.hw, }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_regmap_div_ops, }, }; @@ -842,7 +841,6 @@ static struct clk_regmap_div disp_cc_mdss_byte1_div_clk_src = { &disp_cc_mdss_byte1_clk_src.clkr.hw, }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_regmap_div_ops, }, }; From ac965d7d88fc36fb42e3d50225c0a44dd8326da4 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 26 Feb 2025 15:18:46 +0900 Subject: [PATCH 103/503] tracing: tprobe-events: Fix a memory leak when tprobe with $retval Fix a memory leak when a tprobe is defined with $retval. This combination is not allowed, but the parse_symbol_and_return() does not free the *symbol which should not be used if it returns the error. Thus, it leaks the *symbol memory in that error path. Link: https://lore.kernel.org/all/174055072650.4079315.3063014346697447838.stgit@mhiramat.tok.corp.google.com/ Fixes: ce51e6153f77 ("tracing: fprobe-event: Fix to check tracepoint event and return") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Cc: stable@vger.kernel.org --- kernel/trace/trace_fprobe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index b8f3c4ba309b6..8826f44f69a44 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1056,6 +1056,8 @@ static int parse_symbol_and_return(int argc, const char *argv[], if (is_tracepoint) { trace_probe_log_set_index(i); trace_probe_log_err(tmp - argv[i], RETVAL_ON_PROBE); + kfree(*symbol); + *symbol = NULL; return -EINVAL; } *is_return = true; From d0453655b6ddc685a4837f3cc0776ae8eef62d01 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 26 Feb 2025 15:18:54 +0900 Subject: [PATCH 104/503] tracing: tprobe-events: Reject invalid tracepoint name Commit 57a7e6de9e30 ("tracing/fprobe: Support raw tracepoints on future loaded modules") allows user to set a tprobe on non-exist tracepoint but it does not check the tracepoint name is acceptable. So it leads tprobe has a wrong character for events (e.g. with subsystem prefix). In this case, the event is not shown in the events directory. Reject such invalid tracepoint name. The tracepoint name must consist of alphabet or digit or '_'. Link: https://lore.kernel.org/all/174055073461.4079315.15875502830565214255.stgit@mhiramat.tok.corp.google.com/ Fixes: 57a7e6de9e30 ("tracing/fprobe: Support raw tracepoints on future loaded modules") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Cc: stable@vger.kernel.org --- kernel/trace/trace_fprobe.c | 13 +++++++++++++ kernel/trace/trace_probe.h | 1 + 2 files changed, 14 insertions(+) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 8826f44f69a44..85f037dc14623 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1049,6 +1049,19 @@ static int parse_symbol_and_return(int argc, const char *argv[], if (*is_return) return 0; + if (is_tracepoint) { + tmp = *symbol; + while (*tmp && (isalnum(*tmp) || *tmp == '_')) + tmp++; + if (*tmp) { + /* find a wrong character. */ + trace_probe_log_err(tmp - *symbol, BAD_TP_NAME); + kfree(*symbol); + *symbol = NULL; + return -EINVAL; + } + } + /* If there is $retval, this should be a return fprobe. */ for (i = 2; i < argc; i++) { tmp = strstr(argv[i], "$retval"); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 5803e6a415705..fba3ede870541 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -481,6 +481,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(NON_UNIQ_SYMBOL, "The symbol is not unique"), \ C(BAD_RETPROBE, "Retprobe address must be an function entry"), \ C(NO_TRACEPOINT, "Tracepoint is not found"), \ + C(BAD_TP_NAME, "Invalid character in tracepoint name"),\ C(BAD_ADDR_SUFFIX, "Invalid probed address suffix"), \ C(NO_GROUP_NAME, "Group name is not specified"), \ C(GROUP_TOO_LONG, "Group name is too long"), \ From db5e228611b118cf7b1f8084063feda5c037f4a7 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 26 Feb 2025 15:19:02 +0900 Subject: [PATCH 105/503] tracing: fprobe-events: Log error for exceeding the number of entry args Add error message when the number of entry argument exceeds the maximum size of entry data. This is currently checked when registering fprobe, but in this case no error message is shown in the error_log file. Link: https://lore.kernel.org/all/174055074269.4079315.17809232650360988538.stgit@mhiramat.tok.corp.google.com/ Fixes: 25f00e40ce79 ("tracing/probes: Support $argN in return probe (kprobe and fprobe)") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace_fprobe.c | 5 +++++ kernel/trace/trace_probe.h | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 85f037dc14623..e27305d31fc57 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1230,6 +1230,11 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], if (is_return && tf->tp.entry_arg) { tf->fp.entry_handler = trace_fprobe_entry_handler; tf->fp.entry_data_size = traceprobe_get_entry_data_size(&tf->tp); + if (ALIGN(tf->fp.entry_data_size, sizeof(long)) > MAX_FPROBE_DATA_SIZE) { + trace_probe_log_set_index(2); + trace_probe_log_err(0, TOO_MANY_EARGS); + return -E2BIG; + } } ret = traceprobe_set_print_fmt(&tf->tp, diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index fba3ede870541..c47ca002347a7 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -545,7 +545,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(NO_BTF_FIELD, "This field is not found."), \ C(BAD_BTF_TID, "Failed to get BTF type info."),\ C(BAD_TYPE4STR, "This type does not fit for string."),\ - C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"), + C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"),\ + C(TOO_MANY_EARGS, "Too many entry arguments specified"), #undef C #define C(a, b) TP_ERR_##a From 6b481ab0e6855fb30e2923c51f62f1662d1cda7e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 7 Feb 2025 11:25:31 +1000 Subject: [PATCH 106/503] drm/nouveau: select FW caching nouveau tries to load some firmware during suspend that it loaded earlier, but with fw caching disabled it hangs suspend, so just rely on FW cache enabling instead of working around it in the driver. Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting GSP-RM") Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20250207012531.621369-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index ce840300578d8..1050a4617fc15 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -4,6 +4,7 @@ config DRM_NOUVEAU depends on DRM && PCI && MMU select IOMMU_API select FW_LOADER + select FW_CACHE if PM_SLEEP select DRM_CLIENT_SELECTION select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER From c133ec0e5717868c9967fa3df92a55e537b1aead Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Tue, 25 Feb 2025 11:59:27 +0200 Subject: [PATCH 107/503] usb: xhci: Enable the TRB overfetch quirk on VIA VL805 Raspberry Pi is a major user of those chips and they discovered a bug - when the end of a transfer ring segment is reached, up to four TRBs can be prefetched from the next page even if the segment ends with link TRB and on page boundary (the chip claims to support standard 4KB pages). It also appears that if the prefetched TRBs belong to a different ring whose doorbell is later rung, they may be used without refreshing from system RAM and the endpoint will stay idle if their cycle bit is stale. Other users complain about IOMMU faults on x86 systems, unsurprisingly. Deal with it by using existing quirk which allocates a dummy page after each transfer ring segment. This was seen to resolve both problems. RPi came up with a more efficient solution, shortening each segment by four TRBs, but it complicated the driver and they ditched it for this quirk. Also rename the quirk and add VL805 device ID macro. Signed-off-by: Michal Pecio Link: https://github.com/raspberrypi/linux/issues/4685 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=215906 CC: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250225095927.2512358-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 3 ++- drivers/usb/host/xhci-pci.c | 10 +++++++--- drivers/usb/host/xhci.h | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 92703efda1f7b..fdf0c1008225a 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2437,7 +2437,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) * and our use of dma addresses in the trb_address_map radix tree needs * TRB_SEGMENT_SIZE alignment, so we pick the greater alignment need. */ - if (xhci->quirks & XHCI_ZHAOXIN_TRB_FETCH) + if (xhci->quirks & XHCI_TRB_OVERFETCH) + /* Buggy HC prefetches beyond segment bounds - allocate dummy space at the end */ xhci->segment_pool = dma_pool_create("xHCI ring segments", dev, TRB_SEGMENT_SIZE * 2, TRB_SEGMENT_SIZE * 2, xhci->page_size * 2); else diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index ad0ff356f6fa0..54460d11f7ee8 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -38,6 +38,8 @@ #define PCI_DEVICE_ID_ETRON_EJ168 0x7023 #define PCI_DEVICE_ID_ETRON_EJ188 0x7052 +#define PCI_DEVICE_ID_VIA_VL805 0x3483 + #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI 0x8c31 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 #define PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_XHCI 0x9cb1 @@ -418,8 +420,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x3432) xhci->quirks |= XHCI_BROKEN_STREAMS; - if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483) + if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == PCI_DEVICE_ID_VIA_VL805) { xhci->quirks |= XHCI_LPM_SUPPORT; + xhci->quirks |= XHCI_TRB_OVERFETCH; + } if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) { @@ -467,11 +471,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->device == 0x9202) { xhci->quirks |= XHCI_RESET_ON_RESUME; - xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; + xhci->quirks |= XHCI_TRB_OVERFETCH; } if (pdev->device == 0x9203) - xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; + xhci->quirks |= XHCI_TRB_OVERFETCH; } if (pdev->vendor == PCI_VENDOR_ID_CDNS && diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 8c164340a2c35..779b01dee068f 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1632,7 +1632,7 @@ struct xhci_hcd { #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42) #define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43) #define XHCI_RESET_TO_DEFAULT BIT_ULL(44) -#define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) +#define XHCI_TRB_OVERFETCH BIT_ULL(45) #define XHCI_ZHAOXIN_HOST BIT_ULL(46) #define XHCI_WRITE_64_HI_LO BIT_ULL(47) #define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48) From 2397d61ee45cddb8f3bd3a3a9840ef0f0b5aa843 Mon Sep 17 00:00:00 2001 From: Christian Heusel Date: Mon, 24 Feb 2025 09:32:59 +0100 Subject: [PATCH 108/503] Revert "drivers/card_reader/rtsx_usb: Restore interrupt based detection" This reverts commit 235b630eda072d7e7b102ab346d6b8a2c028a772. This commit was found responsible for issues with SD card recognition, as users had to re-insert their cards in the readers and wait for a while. As for some people the SD card was involved in the boot process it also caused boot failures. Cc: stable@vger.kernel.org Link: https://bbs.archlinux.org/viewtopic.php?id=303321 Fixes: 235b630eda07 ("drivers/card_reader/rtsx_usb: Restore interrupt based detection") Reported-by: qf Closes: https://lore.kernel.org/all/1de87dfa-1e81-45b7-8dcb-ad86c21d5352@heusel.eu Signed-off-by: Christian Heusel Link: https://lore.kernel.org/r/20250224-revert-sdcard-patch-v1-1-d1a457fbb796@heusel.eu Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rtsx_usb.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index e0174da5e9fc3..77b0490a1b38d 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -286,7 +286,6 @@ static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) { int ret; - u8 interrupt_val = 0; u16 *buf; if (!status) @@ -309,20 +308,6 @@ int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) ret = rtsx_usb_get_status_with_bulk(ucr, status); } - rtsx_usb_read_register(ucr, CARD_INT_PEND, &interrupt_val); - /* Cross check presence with interrupts */ - if (*status & XD_CD) - if (!(interrupt_val & XD_INT)) - *status &= ~XD_CD; - - if (*status & SD_CD) - if (!(interrupt_val & SD_INT)) - *status &= ~SD_CD; - - if (*status & MS_CD) - if (!(interrupt_val & MS_INT)) - *status &= ~MS_CD; - /* usb_control_msg may return positive when success */ if (ret < 0) return ret; From cbf85b9cb80bec6345ffe0368dfff98386f4714f Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Fri, 21 Feb 2025 22:32:59 +0100 Subject: [PATCH 109/503] bluetooth: btusb: Initialize .owner field of force_poll_sync_fops Initialize .owner field of force_poll_sync_fops to THIS_MODULE in order to prevent btusb from being unloaded while its operations are in use. Fixes: 800fe5ec302e ("Bluetooth: btusb: Add support for queuing during polling interval") Signed-off-by: Salah Triki Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 8149e53fd0a76..2a8d91963c63f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3644,6 +3644,7 @@ static ssize_t force_poll_sync_write(struct file *file, } static const struct file_operations force_poll_sync_fops = { + .owner = THIS_MODULE, .open = simple_open, .read = force_poll_sync_read, .write = force_poll_sync_write, From f2176a07e7b19f73e05c805cf3d130a2999154cb Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Fri, 21 Feb 2025 16:49:47 +0800 Subject: [PATCH 110/503] Bluetooth: Add check for mgmt_alloc_skb() in mgmt_remote_name() Add check for the return value of mgmt_alloc_skb() in mgmt_remote_name() to prevent null pointer dereference. Fixes: ba17bb62ce41 ("Bluetooth: Fix skb allocation in mgmt_remote_name() & mgmt_device_connected()") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f53304cb09dbe..3e0f88cd975c8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -10413,6 +10413,8 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0)); + if (!skb) + return; ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, bdaddr); From d8df010f72b8a32aaea393e36121738bb53ed905 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Fri, 21 Feb 2025 16:58:01 +0800 Subject: [PATCH 111/503] Bluetooth: Add check for mgmt_alloc_skb() in mgmt_device_connected() Add check for the return value of mgmt_alloc_skb() in mgmt_device_connected() to prevent null pointer dereference. Fixes: e96741437ef0 ("Bluetooth: mgmt: Make use of mgmt_send_event_skb in MGMT_EV_DEVICE_CONNECTED") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3e0f88cd975c8..621c555f639be 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9660,6 +9660,9 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) + eir_precalc_len(sizeof(conn->dev_class))); + if (!skb) + return; + ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, &conn->dst); ev->addr.type = link_to_bdaddr(conn->type, conn->dst_type); From 0979ff3676b1b4e6a20970bc265491d23c2da42b Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Thu, 27 Feb 2025 20:00:05 +0100 Subject: [PATCH 112/503] nvmet: remove old function prototype nvmet_subsys_nsid_exists() doesn't exist anymore Fixes: 74d16965d7ac ("nvmet-loop: avoid using mutex in IO hotpath") Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/target/nvmet.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index d2c1233981e1a..fcf4f460dc9a4 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -647,7 +647,6 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, struct nvmet_host *host); void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u8 event_info, u8 log_page); -bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid); #define NVMET_MIN_QUEUE_SIZE 16 #define NVMET_MAX_QUEUE_SIZE 1024 From afb41b08c44e5386f2f52fa859010ac4afd2b66f Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 24 Feb 2025 15:40:58 +0100 Subject: [PATCH 113/503] nvme-tcp: Fix a C2HTermReq error message In H2CTermReq, a FES with value 0x05 means "R2T Limit Exceeded"; but in C2HTermReq the same value has a different meaning (Data Transfer Limit Exceeded). Fixes: 84e009042d0f ("nvme-tcp: add basic support for the C2HTermReq PDU") Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8c14018201dbd..1094cbbec169f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -775,7 +775,7 @@ static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue, [NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error", [NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error", [NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range", - [NVME_TCP_FES_R2T_LIMIT_EXCEEDED] = "R2T Limit Exceeded", + [NVME_TCP_FES_DATA_LIMIT_EXCEEDED] = "Data Transfer Limit Exceeded", [NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter", }; From ada9ce437a4da8e27243251bd7a9ecec32ebd72a Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 27 Feb 2025 02:21:26 +0100 Subject: [PATCH 114/503] mailmap: remove unwanted entry for Antonio Quartulli antonio@openvpn.net is still used for sending patches under the OpenVPN Inc. umbrella, therefore this address should not be re-mapped. Signed-off-by: Antonio Quartulli Link: https://patch.msgid.link/20250227-b4-ovpn-v20-1-93f363310834@openvpn.net Signed-off-by: Jakub Kicinski --- .mailmap | 1 - 1 file changed, 1 deletion(-) diff --git a/.mailmap b/.mailmap index a897c16d3baef..598f31c4b498e 100644 --- a/.mailmap +++ b/.mailmap @@ -88,7 +88,6 @@ Antonio Quartulli Antonio Quartulli Antonio Quartulli Antonio Quartulli -Antonio Quartulli Antonio Quartulli Anup Patel Archit Taneja From ad95bab0cd28ed77c2c0d0b6e76e03e031391064 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 26 Feb 2025 14:42:18 +0100 Subject: [PATCH 115/503] nvme-tcp: fix potential memory corruption in nvme_tcp_recv_pdu() nvme_tcp_recv_pdu() doesn't check the validity of the header length. When header digests are enabled, a target might send a packet with an invalid header length (e.g. 255), causing nvme_tcp_verify_hdgst() to access memory outside the allocated area and cause memory corruptions by overwriting it with the calculated digest. Fix this by rejecting packets with an unexpected header length. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1094cbbec169f..23f11527d29d0 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -217,6 +217,19 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) return queue - queue->ctrl->queues; } +static inline bool nvme_tcp_recv_pdu_supported(enum nvme_tcp_pdu_type type) +{ + switch (type) { + case nvme_tcp_c2h_term: + case nvme_tcp_c2h_data: + case nvme_tcp_r2t: + case nvme_tcp_rsp: + return true; + default: + return false; + } +} + /* * Check if the queue is TLS encrypted */ @@ -818,6 +831,16 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, return 0; hdr = queue->pdu; + if (unlikely(hdr->hlen != sizeof(struct nvme_tcp_rsp_pdu))) { + if (!nvme_tcp_recv_pdu_supported(hdr->type)) + goto unsupported_pdu; + + dev_err(queue->ctrl->ctrl.device, + "pdu type %d has unexpected header length (%d)\n", + hdr->type, hdr->hlen); + return -EPROTO; + } + if (unlikely(hdr->type == nvme_tcp_c2h_term)) { /* * C2HTermReq never includes Header or Data digests. @@ -850,10 +873,13 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, nvme_tcp_init_recv_ctx(queue); return nvme_tcp_handle_r2t(queue, (void *)queue->pdu); default: - dev_err(queue->ctrl->ctrl.device, - "unsupported pdu type (%d)\n", hdr->type); - return -EINVAL; + goto unsupported_pdu; } + +unsupported_pdu: + dev_err(queue->ctrl->ctrl.device, + "unsupported pdu type (%d)\n", hdr->type); + return -EINVAL; } static inline void nvme_tcp_end_request(struct request *rq, u16 status) From a16f88964c647103dad7743a484b216d488a6352 Mon Sep 17 00:00:00 2001 From: Meir Elisha Date: Wed, 26 Feb 2025 09:28:12 +0200 Subject: [PATCH 116/503] nvmet-tcp: Fix a possible sporadic response drops in weakly ordered arch The order in which queue->cmd and rcv_state are updated is crucial. If these assignments are reordered by the compiler, the worker might not get queued in nvmet_tcp_queue_response(), hanging the IO. to enforce the the correct reordering, set rcv_state using smp_store_release(). Fixes: bdaf13279192 ("nvmet-tcp: fix a segmentation fault during io parsing error") Signed-off-by: Meir Elisha Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 7c51c2a8c109a..4f9cac8a5abe0 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -571,10 +571,16 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) struct nvmet_tcp_cmd *cmd = container_of(req, struct nvmet_tcp_cmd, req); struct nvmet_tcp_queue *queue = cmd->queue; + enum nvmet_tcp_recv_state queue_state; + struct nvmet_tcp_cmd *queue_cmd; struct nvme_sgl_desc *sgl; u32 len; - if (unlikely(cmd == queue->cmd)) { + /* Pairs with store_release in nvmet_prepare_receive_pdu() */ + queue_state = smp_load_acquire(&queue->rcv_state); + queue_cmd = READ_ONCE(queue->cmd); + + if (unlikely(cmd == queue_cmd)) { sgl = &cmd->req.cmd->common.dptr.sgl; len = le32_to_cpu(sgl->length); @@ -583,7 +589,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) * Avoid using helpers, this might happen before * nvmet_req_init is completed. */ - if (queue->rcv_state == NVMET_TCP_RECV_PDU && + if (queue_state == NVMET_TCP_RECV_PDU && len && len <= cmd->req.port->inline_data_size && nvme_is_write(cmd->req.cmd)) return; @@ -847,8 +853,9 @@ static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue) { queue->offset = 0; queue->left = sizeof(struct nvme_tcp_hdr); - queue->cmd = NULL; - queue->rcv_state = NVMET_TCP_RECV_PDU; + WRITE_ONCE(queue->cmd, NULL); + /* Ensure rcv_state is visible only after queue->cmd is set */ + smp_store_release(&queue->rcv_state, NVMET_TCP_RECV_PDU); } static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue) From 00371a3f48775967950c2fe3ec97b7c786ca956d Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 26 Feb 2025 09:52:05 +0100 Subject: [PATCH 117/503] stmmac: loongson: Pass correct arg to PCI function pcim_iomap_regions() should receive the driver's name as its third parameter, not the PCI device's name. Define the driver name with a macro and use it at the appropriate places, including pcim_iomap_regions(). Cc: stable@vger.kernel.org # v5.14+ Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") Signed-off-by: Philipp Stanner Reviewed-by: Andrew Lunn Reviewed-by: Yanteng Si Tested-by: Henry Chen Link: https://patch.msgid.link/20250226085208.97891-2-phasta@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index f5acfb7d4ff65..ab7c2750c1042 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -11,6 +11,8 @@ #include "dwmac_dma.h" #include "dwmac1000.h" +#define DRIVER_NAME "dwmac-loongson-pci" + /* Normal Loongson Tx Summary */ #define DMA_INTR_ENA_NIE_TX_LOONGSON 0x00040000 /* Normal Loongson Rx Summary */ @@ -568,7 +570,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id for (i = 0; i < PCI_STD_NUM_BARS; i++) { if (pci_resource_len(pdev, i) == 0) continue; - ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + ret = pcim_iomap_regions(pdev, BIT(0), DRIVER_NAME); if (ret) goto err_disable_device; break; @@ -687,7 +689,7 @@ static const struct pci_device_id loongson_dwmac_id_table[] = { MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table); static struct pci_driver loongson_dwmac_driver = { - .name = "dwmac-loongson-pci", + .name = DRIVER_NAME, .id_table = loongson_dwmac_id_table, .probe = loongson_dwmac_probe, .remove = loongson_dwmac_remove, From c72e455b89f216b43cd0dbb518036ec4c98f5c46 Mon Sep 17 00:00:00 2001 From: Manuel Fombuena Date: Tue, 25 Feb 2025 22:01:02 +0000 Subject: [PATCH 118/503] leds: leds-st1202: Fix NULL pointer access on race condition st1202_dt_init() calls devm_led_classdev_register_ext() before the internal data structures are properly set up, so the LEDs become visible to user space while being partially initialized, leading to a window where trying to access them causes a NULL pointer access. Move devm_led_classdev_register_ext() from DT initialization to the end of the probe function when DT and hardware are fully initialized and ready to interact with user space. Fixes: 259230378c65 ("leds: Add LED1202 I2C driver") Signed-off-by: Manuel Fombuena Link: https://lore.kernel.org/r/CWLP123MB54732771AC0CE5491B3C84DCC5C32@CWLP123MB5473.GBRP123.PROD.OUTLOOK.COM Signed-off-by: Lee Jones --- drivers/leds/leds-st1202.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/leds/leds-st1202.c b/drivers/leds/leds-st1202.c index b691c4886993f..e894b3f9a0f46 100644 --- a/drivers/leds/leds-st1202.c +++ b/drivers/leds/leds-st1202.c @@ -261,8 +261,6 @@ static int st1202_dt_init(struct st1202_chip *chip) int err, reg; for_each_available_child_of_node_scoped(dev_of_node(dev), child) { - struct led_init_data init_data = {}; - err = of_property_read_u32(child, "reg", ®); if (err) return dev_err_probe(dev, err, "Invalid register\n"); @@ -276,15 +274,6 @@ static int st1202_dt_init(struct st1202_chip *chip) led->led_cdev.pattern_set = st1202_led_pattern_set; led->led_cdev.pattern_clear = st1202_led_pattern_clear; led->led_cdev.default_trigger = "pattern"; - - init_data.fwnode = led->fwnode; - init_data.devicename = "st1202"; - init_data.default_label = ":"; - - err = devm_led_classdev_register_ext(dev, &led->led_cdev, &init_data); - if (err < 0) - return dev_err_probe(dev, err, "Failed to register LED class device\n"); - led->led_cdev.brightness_set = st1202_brightness_set; led->led_cdev.brightness_get = st1202_brightness_get; } @@ -368,6 +357,7 @@ static int st1202_probe(struct i2c_client *client) return ret; for (int i = 0; i < ST1202_MAX_LEDS; i++) { + struct led_init_data init_data = {}; led = &chip->leds[i]; led->chip = chip; led->led_num = i; @@ -384,6 +374,15 @@ static int st1202_probe(struct i2c_client *client) if (ret < 0) return dev_err_probe(&client->dev, ret, "Failed to clear LED pattern\n"); + + init_data.fwnode = led->fwnode; + init_data.devicename = "st1202"; + init_data.default_label = ":"; + + ret = devm_led_classdev_register_ext(&client->dev, &led->led_cdev, &init_data); + if (ret < 0) + return dev_err_probe(&client->dev, ret, + "Failed to register LED class device\n"); } return 0; From 3414cda9d41f41703832d0abd01063dd8de82b89 Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Thu, 27 Feb 2025 18:51:06 +0100 Subject: [PATCH 119/503] ALSA: hda/realtek: Remove (revert) duplicate Ally X config In commit 1e9c708dc3ae ("ALSA: hda/tas2781: Add new quirk for Lenovo, ASUS, Dell projects") Baojun adds a bunch of projects to the file, including for the Ally X. Turns out the initial Ally X was not sorted properly, so the kernel had 2 quirks for it. The previous quirk overrode the new one due to being earlier and they are different. When AB testing, the normal pin fixup seems to work ok but causes a bit of a minor popping. Given the other config is more complicated and may cause undefined behavior, revert it. Fixes: 1e9c708dc3ae ("ALSA: hda/tas2781: Add new quirk for Lenovo, ASUS, Dell projects") Signed-off-by: Antheas Kapenekakis Link: https://patch.msgid.link/20250227175107.33432-2-lkml@antheas.dev Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c735f630ecb5a..f92de4a95a4c6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7690,7 +7690,6 @@ enum { ALC285_FIXUP_THINKPAD_X1_GEN7, ALC285_FIXUP_THINKPAD_HEADSET_JACK, ALC294_FIXUP_ASUS_ALLY, - ALC294_FIXUP_ASUS_ALLY_X, ALC294_FIXUP_ASUS_ALLY_PINS, ALC294_FIXUP_ASUS_ALLY_VERBS, ALC294_FIXUP_ASUS_ALLY_SPEAKER, @@ -9138,12 +9137,6 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_ASUS_ALLY_PINS }, - [ALC294_FIXUP_ASUS_ALLY_X] = { - .type = HDA_FIXUP_FUNC, - .v.func = tas2781_fixup_i2c, - .chained = true, - .chain_id = ALC294_FIXUP_ASUS_ALLY_PINS - }, [ALC294_FIXUP_ASUS_ALLY_PINS] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -10645,7 +10638,6 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS), SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x17f3, "ROG Ally NR2301L/X", ALC294_FIXUP_ASUS_ALLY), - SND_PCI_QUIRK(0x1043, 0x1eb3, "ROG Ally X RC72LA", ALC294_FIXUP_ASUS_ALLY_X), SND_PCI_QUIRK(0x1043, 0x1863, "ASUS UX6404VI/VV", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS), SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), From 12784ca33b62fd327631749e6a0cd2a10110a56c Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Thu, 27 Feb 2025 18:51:07 +0100 Subject: [PATCH 120/503] ALSA: hda/realtek: Fix Asus Z13 2025 audio Use the basic quirk for this type of amplifier. Sound works in speakers, headphones, and microphone. Whereas none worked before. Tested-by: Kyle Gospodnetich Signed-off-by: Antheas Kapenekakis Link: https://patch.msgid.link/20250227175107.33432-3-lkml@antheas.dev Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f92de4a95a4c6..926007b4a9ba7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10692,6 +10692,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f1f, "ASUS H7604JI/JV/J3D", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), From f479ecc5ef15ed8d774968c1a8726a49420f11a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20B=C3=A1rta?= Date: Thu, 27 Feb 2025 17:12:55 +0100 Subject: [PATCH 121/503] ALSA: hda: Fix speakers on ASUS EXPERTBOOK P5405CSA 1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After some digging around I have found that this laptop has Cirrus's smart aplifiers connected to SPI bus (spi1-CSC3551:00-cs35l41-hda). To get them correctly detected and working I had to modify patch_realtek.c with ASUS EXPERTBOOK P5405CSA 1.0 SystemID (0x1043, 0x1f63) and add corresponding hda_quirk (ALC245_FIXUP_CS35L41_SPI_2). Signed-off-by: Daniel Bárta Link: https://patch.msgid.link/20250227161256.18061-2-daniel.barta@trustlab.cz Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 926007b4a9ba7..d58743b955f81 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10691,6 +10691,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1f1f, "ASUS H7604JI/JV/J3D", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1f63, "ASUS P5405CSA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), From be45bc4eff33d9a7dae84a2150f242a91a617402 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 24 Feb 2025 08:54:41 -0800 Subject: [PATCH 122/503] KVM: SVM: Set RFLAGS.IF=1 in C code, to get VMRUN out of the STI shadow Enable/disable local IRQs, i.e. set/clear RFLAGS.IF, in the common svm_vcpu_enter_exit() just after/before guest_state_{enter,exit}_irqoff() so that VMRUN is not executed in an STI shadow. AMD CPUs have a quirk (some would say "bug"), where the STI shadow bleeds into the guest's intr_state field if a #VMEXIT occurs during injection of an event, i.e. if the VMRUN doesn't complete before the subsequent #VMEXIT. The spurious "interrupts masked" state is relatively benign, as it only occurs during event injection and is transient. Because KVM is already injecting an event, the guest can't be in HLT, and if KVM is querying IRQ blocking for injection, then KVM would need to force an immediate exit anyways since injecting multiple events is impossible. However, because KVM copies int_state verbatim from vmcb02 to vmcb12, the spurious STI shadow is visible to L1 when running a nested VM, which can trip sanity checks, e.g. in VMware's VMM. Hoist the STI+CLI all the way to C code, as the aforementioned calls to guest_state_{enter,exit}_irqoff() already inform lockdep that IRQs are enabled/disabled, and taking a fault on VMRUN with RFLAGS.IF=1 is already possible. I.e. if there's kernel code that is confused by running with RFLAGS.IF=1, then it's already a problem. In practice, since GIF=0 also blocks NMIs, the only change in exposure to non-KVM code (relative to surrounding VMRUN with STI+CLI) is exception handling code, and except for the kvm_rebooting=1 case, all exception in the core VM-Enter/VM-Exit path are fatal. Use the "raw" variants to enable/disable IRQs to avoid tracing in the "no instrumentation" code; the guest state helpers also take care of tracing IRQ state. Oppurtunstically document why KVM needs to do STI in the first place. Reported-by: Doug Covelli Closes: https://lore.kernel.org/all/CADH9ctBs1YPmE4aCfGPNBwA10cA8RuAk2gO7542DjMZgs4uzJQ@mail.gmail.com Fixes: f14eec0a3203 ("KVM: SVM: move more vmentry code to assembly") Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Link: https://lore.kernel.org/r/20250224165442.2338294-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 14 ++++++++++++++ arch/x86/kvm/svm/vmenter.S | 10 +--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a713c803a3a37..0d299f3f921e6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4189,6 +4189,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in guest_state_enter_irqoff(); + /* + * Set RFLAGS.IF prior to VMRUN, as the host's RFLAGS.IF at the time of + * VMRUN controls whether or not physical IRQs are masked (KVM always + * runs with V_INTR_MASKING_MASK). Toggle RFLAGS.IF here to avoid the + * temptation to do STI+VMRUN+CLI, as AMD CPUs bleed the STI shadow + * into guest state if delivery of an event during VMRUN triggers a + * #VMEXIT, and the guest_state transitions already tell lockdep that + * IRQs are being enabled/disabled. Note! GIF=0 for the entirety of + * this path, so IRQs aren't actually unmasked while running host code. + */ + raw_local_irq_enable(); + amd_clear_divider(); if (sev_es_guest(vcpu->kvm)) @@ -4197,6 +4209,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in else __svm_vcpu_run(svm, spec_ctrl_intercepted); + raw_local_irq_disable(); + guest_state_exit_irqoff(); } diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 2ed80aea3bb13..0c61153b275f6 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -170,12 +170,8 @@ SYM_FUNC_START(__svm_vcpu_run) mov VCPU_RDI(%_ASM_DI), %_ASM_DI /* Enter guest mode */ - sti - 3: vmrun %_ASM_AX 4: - cli - /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX @@ -340,12 +336,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov KVM_VMCB_pa(%rax), %rax /* Enter guest mode */ - sti - 1: vmrun %rax - -2: cli - +2: /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT From f3513a335e71296a1851167b4e3b0e2bf09fc5f1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 24 Feb 2025 08:54:42 -0800 Subject: [PATCH 123/503] KVM: selftests: Assert that STI blocking isn't set after event injection Add an L1 (guest) assert to the nested exceptions test to verify that KVM doesn't put VMRUN in an STI shadow (AMD CPUs bleed the shadow into the guest's int_state if a #VMEXIT occurs before VMRUN fully completes). Add a similar assert to the VMX side as well, because why not. Reviewed-by: Jim Mattson Link: https://lore.kernel.org/r/20250224165442.2338294-3-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/nested_exceptions_test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 3eb0313ffa397..3641a42934acb 100644 --- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c @@ -85,6 +85,7 @@ static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector, GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector)); GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code); + GUEST_ASSERT(!ctrl->int_state); } static void l1_svm_code(struct svm_test_data *svm) @@ -122,6 +123,7 @@ static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code) GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector); GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code); + GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO)); } static void l1_vmx_code(struct vmx_pages *vmx) From ee89e8013383d50a27ea9bf3c8a69eed6799856f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:06 -0800 Subject: [PATCH 124/503] KVM: SVM: Drop DEBUGCTL[5:2] from guest's effective value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop bits 5:2 from the guest's effective DEBUGCTL value, as AMD changed the architectural behavior of the bits and broke backwards compatibility. On CPUs without BusLockTrap (or at least, in APMs from before ~2023), bits 5:2 controlled the behavior of external pins: Performance-Monitoring/Breakpoint Pin-Control (PBi)—Bits 5:2, read/write. Software uses thesebits to control the type of information reported by the four external performance-monitoring/breakpoint pins on the processor. When a PBi bit is cleared to 0, the corresponding external pin (BPi) reports performance-monitor information. When a PBi bit is set to 1, the corresponding external pin (BPi) reports breakpoint information. With the introduction of BusLockTrap, presumably to be compatible with Intel CPUs, AMD redefined bit 2 to be BLCKDB: Bus Lock #DB Trap (BLCKDB)—Bit 2, read/write. Software sets this bit to enable generation of a #DB trap following successful execution of a bus lock when CPL is > 0. and redefined bits 5:3 (and bit 6) as "6:3 Reserved MBZ". Ideally, KVM would treat bits 5:2 as reserved. Defer that change to a feature cleanup to avoid breaking existing guest in LTS kernels. For now, drop the bits to retain backwards compatibility (of a sort). Note, dropping bits 5:2 is still a guest-visible change, e.g. if the guest is enabling LBRs *and* the legacy PBi bits, then the state of the PBi bits is visible to the guest, whereas now the guest will always see '0'. Reported-by: Ravi Bangoria Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 12 ++++++++++++ arch/x86/kvm/svm/svm.h | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0d299f3f921e6..bdafbde1f211a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3165,6 +3165,18 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm_pr_unimpl_wrmsr(vcpu, ecx, data); break; } + + /* + * AMD changed the architectural behavior of bits 5:2. On CPUs + * without BusLockTrap, bits 5:2 control "external pins", but + * on CPUs that support BusLockDetect, bit 2 enables BusLockTrap + * and bits 5:3 are reserved-to-zero. Sadly, old KVM allowed + * the guest to set bits 5:2 despite not actually virtualizing + * Performance-Monitoring/Breakpoint external pins. Drop bits + * 5:2 for backwards compatibility. + */ + data &= ~GENMASK(5, 2); + if (data & DEBUGCTL_RESERVED_BITS) return 1; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9d7cdb8fbf872..3a931d3885e71 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -584,7 +584,7 @@ static inline bool is_vnmi_enabled(struct vcpu_svm *svm) /* svm.c */ #define MSR_INVALID 0xffffffffU -#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +#define DEBUGCTL_RESERVED_BITS (~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR)) extern bool dump_invalid_vmcb; From d0eac42f5cecce009d315655bee341304fbe075e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:07 -0800 Subject: [PATCH 125/503] KVM: SVM: Suppress DEBUGCTL.BTF on AMD Mark BTF as reserved in DEBUGCTL on AMD, as KVM doesn't actually support BTF, and fully enabling BTF virtualization is non-trivial due to interactions with the emulator, guest_debug, #DB interception, nested SVM, etc. Don't inject #GP if the guest attempts to set BTF, as there's no way to communicate lack of support to the guest, and instead suppress the flag and treat the WRMSR as (partially) unsupported. In short, make KVM behave the same on AMD and Intel (VMX already squashes BTF). Note, due to other bugs in KVM's handling of DEBUGCTL, the only way BTF has "worked" in any capacity is if the guest simultaneously enables LBRs. Reported-by: Ravi Bangoria Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 9 +++++++++ arch/x86/kvm/svm/svm.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index bdafbde1f211a..ed48465186961 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3177,6 +3177,15 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) */ data &= ~GENMASK(5, 2); + /* + * Suppress BTF as KVM doesn't virtualize BTF, but there's no + * way to communicate lack of support to the guest. + */ + if (data & DEBUGCTLMSR_BTF) { + kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data); + data &= ~DEBUGCTLMSR_BTF; + } + if (data & DEBUGCTL_RESERVED_BITS) return 1; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 3a931d3885e71..ea44c1da5a7c9 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -584,7 +584,7 @@ static inline bool is_vnmi_enabled(struct vcpu_svm *svm) /* svm.c */ #define MSR_INVALID 0xffffffffU -#define DEBUGCTL_RESERVED_BITS (~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR)) +#define DEBUGCTL_RESERVED_BITS (~DEBUGCTLMSR_LBR) extern bool dump_invalid_vmcb; From fb71c795935652fa20eaf9517ca9547f5af99a76 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:08 -0800 Subject: [PATCH 126/503] KVM: x86: Snapshot the host's DEBUGCTL in common x86 Move KVM's snapshot of DEBUGCTL to kvm_vcpu_arch and take the snapshot in common x86, so that SVM can also use the snapshot. Opportunistically change the field to a u64. While bits 63:32 are reserved on AMD, not mentioned at all in Intel's SDM, and managed as an "unsigned long" by the kernel, DEBUGCTL is an MSR and therefore a 64-bit value. Reviewed-by: Xiaoyao Li Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 8 ++------ arch/x86/kvm/vmx/vmx.h | 2 -- arch/x86/kvm/x86.c | 1 + 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0b7af5902ff75..32ae3aa50c7e3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -780,6 +780,7 @@ struct kvm_vcpu_arch { u32 pkru; u32 hflags; u64 efer; + u64 host_debugctl; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool load_eoi_exitmap_pending; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6c56d5235f0f3..3b92f893b2392 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1514,16 +1514,12 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, */ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm)) shrink_ple_window(vcpu); vmx_vcpu_load_vmcs(vcpu, cpu, NULL); vmx_vcpu_pi_load(vcpu, cpu); - - vmx->host_debugctlmsr = get_debugctlmsr(); } void vmx_vcpu_put(struct kvm_vcpu *vcpu) @@ -7458,8 +7454,8 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) } /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ - if (vmx->host_debugctlmsr) - update_debugctlmsr(vmx->host_debugctlmsr); + if (vcpu->arch.host_debugctl) + update_debugctlmsr(vcpu->arch.host_debugctl); #ifndef CONFIG_X86_64 /* diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 8b111ce1087c7..951e44dc9d0ea 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -340,8 +340,6 @@ struct vcpu_vmx { /* apic deadline value in host tsc */ u64 hv_deadline_tsc; - unsigned long host_debugctlmsr; - /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02159c967d29e..5c6fd0edc41f4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4968,6 +4968,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Save host pkru register if supported */ vcpu->arch.host_pkru = read_pkru(); + vcpu->arch.host_debugctl = get_debugctlmsr(); /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { From 433265870ab3455b418885bff48fa5fd02f7e448 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:09 -0800 Subject: [PATCH 127/503] KVM: SVM: Manually context switch DEBUGCTL if LBR virtualization is disabled Manually load the guest's DEBUGCTL prior to VMRUN (and restore the host's value on #VMEXIT) if it diverges from the host's value and LBR virtualization is disabled, as hardware only context switches DEBUGCTL if LBR virtualization is fully enabled. Running the guest with the host's value has likely been mildly problematic for quite some time, e.g. it will result in undesirable behavior if BTF diverges (with the caveat that KVM now suppresses guest BTF due to lack of support). But the bug became fatal with the introduction of Bus Lock Trap ("Detect" in kernel paralance) support for AMD (commit 408eb7417a92 ("x86/bus_lock: Add support for AMD")), as a bus lock in the guest will trigger an unexpected #DB. Note, suppressing the bus lock #DB, i.e. simply resuming the guest without injecting a #DB, is not an option. It wouldn't address the general issue with DEBUGCTL, e.g. for things like BTF, and there are other guest-visible side effects if BusLockTrap is left enabled. If BusLockTrap is disabled, then DR6.BLD is reserved-to-1; any attempts to clear it by software are ignored. But if BusLockTrap is enabled, software can clear DR6.BLD: Software enables bus lock trap by setting DebugCtl MSR[BLCKDB] (bit 2) to 1. When bus lock trap is enabled, ... The processor indicates that this #DB was caused by a bus lock by clearing DR6[BLD] (bit 11). DR6[11] previously had been defined to be always 1. and clearing DR6.BLD is "sticky" in that it's not set (i.e. lowered) by other #DBs: All other #DB exceptions leave DR6[BLD] unmodified E.g. leaving BusLockTrap enable can confuse a legacy guest that writes '0' to reset DR6. Reported-by: rangemachine@gmail.com Reported-by: whanos@sergal.fun Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219787 Closes: https://lore.kernel.org/all/bug-219787-28872@https.bugzilla.kernel.org%2F Cc: Ravi Bangoria Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ed48465186961..e67de787fc714 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4288,6 +4288,16 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, clgi(); kvm_load_guest_xsave_state(vcpu); + /* + * Hardware only context switches DEBUGCTL if LBR virtualization is + * enabled. Manually load DEBUGCTL if necessary (and restore it after + * VM-Exit), as running with the host's DEBUGCTL can negatively affect + * guest state and can even be fatal, e.g. due to Bus Lock Detect. + */ + if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) && + vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl) + update_debugctlmsr(svm->vmcb->save.dbgctl); + kvm_wait_lapic_expire(vcpu); /* @@ -4315,6 +4325,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); + if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) && + vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl) + update_debugctlmsr(vcpu->arch.host_debugctl); + kvm_load_host_xsave_state(vcpu); stgi(); From 189ecdb3e112da703ac0699f4ec76aa78122f911 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:10 -0800 Subject: [PATCH 128/503] KVM: x86: Snapshot the host's DEBUGCTL after disabling IRQs Snapshot the host's DEBUGCTL after disabling IRQs, as perf can toggle debugctl bits from IRQ context, e.g. when enabling/disabling events via smp_call_function_single(). Taking the snapshot (long) before IRQs are disabled could result in KVM effectively clobbering DEBUGCTL due to using a stale snapshot. Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5c6fd0edc41f4..12d5f47c1bbe9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4968,7 +4968,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Save host pkru register if supported */ vcpu->arch.host_pkru = read_pkru(); - vcpu->arch.host_debugctl = get_debugctlmsr(); /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { @@ -10969,6 +10968,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(0, 7); } + vcpu->arch.host_debugctl = get_debugctlmsr(); + guest_timing_enter_irqoff(); for (;;) { From 4a4f9b5c7c13601c4f1b3d8c607d7439e39f40d2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Feb 2025 15:58:30 -0500 Subject: [PATCH 129/503] bcachefs: Don't set BCH_FEATURE_incompat_version_field unless requested We shouldn't be setting incompatible bits or the incompatible version field unless explicitly request or allowed - otherwise we break mounting with old kernels or userspace. Reported-by: Dave Hansen Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 24 ++++++++++++++++-------- fs/bcachefs/super-io.h | 11 ++++------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 8037ccbacf6af..a81a7b6c09897 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -69,14 +69,20 @@ enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_meta return v; } -void bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) +bool bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) { - mutex_lock(&c->sb_lock); - SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, - max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); - c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + bool ret = (c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && + version <= c->sb.version_incompat_allowed; + + if (ret) { + mutex_lock(&c->sb_lock); + SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, + max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + + return ret; } const char * const bch2_sb_fields[] = { @@ -1219,9 +1225,11 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) c->disk_sb.sb->version = cpu_to_le16(new_version); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); - if (incompat) + if (incompat) { SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version)); + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field); + } } static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index f1ab4f9437203..b4cff9ebdebbf 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -21,17 +21,14 @@ static inline bool bch2_version_compatible(u16 version) void bch2_version_to_text(struct printbuf *, enum bcachefs_metadata_version); enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version); -void bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version); +bool bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version); static inline bool bch2_request_incompat_feature(struct bch_fs *c, enum bcachefs_metadata_version version) { - if (unlikely(version > c->sb.version_incompat)) { - if (version > c->sb.version_incompat_allowed) - return false; - bch2_set_version_incompat(c, version); - } - return true; + return likely(version <= c->sb.version_incompat) + ? true + : bch2_set_version_incompat(c, version); } static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) From ee01b2f2d7d0010787c2343463965bbc283a497f Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 26 Feb 2025 18:13:42 +0100 Subject: [PATCH 130/503] net: gso: fix ownership in __udp_gso_segment In __udp_gso_segment the skb destructor is removed before segmenting the skb but the socket reference is kept as-is. This is an issue if the original skb is later orphaned as we can hit the following bug: kernel BUG at ./include/linux/skbuff.h:3312! (skb_orphan) RIP: 0010:ip_rcv_core+0x8b2/0xca0 Call Trace: ip_rcv+0xab/0x6e0 __netif_receive_skb_one_core+0x168/0x1b0 process_backlog+0x384/0x1100 __napi_poll.constprop.0+0xa1/0x370 net_rx_action+0x925/0xe50 The above can happen following a sequence of events when using OpenVSwitch, when an OVS_ACTION_ATTR_USERSPACE action precedes an OVS_ACTION_ATTR_OUTPUT action: 1. OVS_ACTION_ATTR_USERSPACE is handled (in do_execute_actions): the skb goes through queue_gso_packets and then __udp_gso_segment, where its destructor is removed. 2. The segments' data are copied and sent to userspace. 3. OVS_ACTION_ATTR_OUTPUT is handled (in do_execute_actions) and the same original skb is sent to its path. 4. If it later hits skb_orphan, we hit the bug. Fix this by also removing the reference to the socket in __udp_gso_segment. Fixes: ad405857b174 ("udp: better wmem accounting on gso") Signed-off-by: Antoine Tenart Link: https://patch.msgid.link/20250226171352.258045-1-atenart@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index a5be6e4ed326f..ecfca59f31f13 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -321,13 +321,17 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, /* clear destructor to avoid skb_segment assigning it to tail */ copy_dtor = gso_skb->destructor == sock_wfree; - if (copy_dtor) + if (copy_dtor) { gso_skb->destructor = NULL; + gso_skb->sk = NULL; + } segs = skb_segment(gso_skb, features); if (IS_ERR_OR_NULL(segs)) { - if (copy_dtor) + if (copy_dtor) { gso_skb->destructor = sock_wfree; + gso_skb->sk = sk; + } return segs; } From a466fd7e9fafd975949e5945e2f70c33a94b1a70 Mon Sep 17 00:00:00 2001 From: Vitaliy Shevtsov Date: Thu, 27 Feb 2025 23:46:27 +0500 Subject: [PATCH 131/503] caif_virtio: fix wrong pointer check in cfv_probe() del_vqs() frees virtqueues, therefore cfv->vq_tx pointer should be checked for NULL before calling it, not cfv->vdev. Also the current implementation is redundant because the pointer cfv->vdev is dereferenced before it is checked for NULL. Fix this by checking cfv->vq_tx for NULL instead of cfv->vdev before calling del_vqs(). Fixes: 0d2e1a2926b1 ("caif_virtio: Introduce caif over virtio") Signed-off-by: Vitaliy Shevtsov Reviewed-by: Gerhard Engleder Link: https://patch.msgid.link/20250227184716.4715-1-v.shevtsov@mt-integration.ru Signed-off-by: Jakub Kicinski --- drivers/net/caif/caif_virtio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 7fea00c7ca8a6..c60386bf2d1a4 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -745,7 +745,7 @@ static int cfv_probe(struct virtio_device *vdev) if (cfv->vr_rx) vdev->vringh_config->del_vrhs(cfv->vdev); - if (cfv->vdev) + if (cfv->vq_tx) vdev->config->del_vqs(cfv->vdev); free_netdev(netdev); return err; From 2565e42539b120b81a68a58da961ce5d1e34eac8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Nov 2024 14:39:11 +0100 Subject: [PATCH 132/503] perf/core: Fix pmus_lock vs. pmus_srcu ordering Commit a63fbed776c7 ("perf/tracing/cpuhotplug: Fix locking order") placed pmus_lock inside pmus_srcu, this makes perf_pmu_unregister() trip lockdep. Move the locking about such that only pmu_idr and pmus (list) are modified while holding pmus_lock. This avoids doing synchronize_srcu() while holding pmus_lock and all is well again. Fixes: a63fbed776c7 ("perf/tracing/cpuhotplug: Fix locking order") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20241104135517.679556858@infradead.org --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6364319e2f888..11793d690cbb7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11939,6 +11939,8 @@ void perf_pmu_unregister(struct pmu *pmu) { mutex_lock(&pmus_lock); list_del_rcu(&pmu->entry); + idr_remove(&pmu_idr, pmu->type); + mutex_unlock(&pmus_lock); /* * We dereference the pmu list under both SRCU and regular RCU, so @@ -11948,7 +11950,6 @@ void perf_pmu_unregister(struct pmu *pmu) synchronize_rcu(); free_percpu(pmu->pmu_disable_count); - idr_remove(&pmu_idr, pmu->type); if (pmu_bus_running && pmu->dev && pmu->dev != PMU_NULL_DEV) { if (pmu->nr_addr_filters) device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); @@ -11956,7 +11957,6 @@ void perf_pmu_unregister(struct pmu *pmu) put_device(pmu->dev); } free_pmu_context(pmu); - mutex_unlock(&pmus_lock); } EXPORT_SYMBOL_GPL(perf_pmu_unregister); From 003659fec9f6d8c04738cb74b5384398ae8a7e88 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Nov 2024 14:39:12 +0100 Subject: [PATCH 133/503] perf/core: Fix perf_pmu_register() vs. perf_init_event() There is a fairly obvious race between perf_init_event() doing idr_find() and perf_pmu_register() doing idr_alloc() with an incompletely initialized PMU pointer. Avoid by doing idr_alloc() on a NULL pointer to register the id, and swizzling the real struct pmu pointer at the end using idr_replace(). Also making sure to not set struct pmu members after publishing the struct pmu, duh. [ introduce idr_cmpxchg() in order to better handle the idr_replace() error case -- if it were to return an unexpected pointer, it will already have replaced the value and there is no going back. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20241104135517.858805880@infradead.org --- kernel/events/core.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 11793d690cbb7..823aa08249161 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11830,6 +11830,21 @@ static int pmu_dev_alloc(struct pmu *pmu) static struct lock_class_key cpuctx_mutex; static struct lock_class_key cpuctx_lock; +static bool idr_cmpxchg(struct idr *idr, unsigned long id, void *old, void *new) +{ + void *tmp, *val = idr_find(idr, id); + + if (val != old) + return false; + + tmp = idr_replace(idr, new, id); + if (IS_ERR(tmp)) + return false; + + WARN_ON_ONCE(tmp != val); + return true; +} + int perf_pmu_register(struct pmu *pmu, const char *name, int type) { int cpu, ret, max = PERF_TYPE_MAX; @@ -11856,7 +11871,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) if (type >= 0) max = type; - ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL); + ret = idr_alloc(&pmu_idr, NULL, max, 0, GFP_KERNEL); if (ret < 0) goto free_pdc; @@ -11864,6 +11879,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) type = ret; pmu->type = type; + atomic_set(&pmu->exclusive_cnt, 0); if (pmu_bus_running && !pmu->dev) { ret = pmu_dev_alloc(pmu); @@ -11912,14 +11928,22 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) if (!pmu->event_idx) pmu->event_idx = perf_event_idx_default; + /* + * Now that the PMU is complete, make it visible to perf_try_init_event(). + */ + if (!idr_cmpxchg(&pmu_idr, pmu->type, NULL, pmu)) + goto free_context; list_add_rcu(&pmu->entry, &pmus); - atomic_set(&pmu->exclusive_cnt, 0); + ret = 0; unlock: mutex_unlock(&pmus_lock); return ret; +free_context: + free_percpu(pmu->cpu_pmu_context); + free_dev: if (pmu->dev && pmu->dev != PMU_NULL_DEV) { device_del(pmu->dev); From c9ce148ea753bef66686460fa3cec6641cdfbb9f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 1 Mar 2025 12:45:29 +0100 Subject: [PATCH 134/503] ALSA: seq: Avoid module auto-load handling at event delivery snd_seq_client_use_ptr() is supposed to return the snd_seq_client object for the given client ID, and it tries to handle the module auto-loading when no matching object is found. Although the module handling is performed only conditionally with "!in_interrupt()", this condition may be fragile, e.g. when the code is called from the ALSA timer callback where the spinlock is temporarily disabled while the irq is disabled. Then his doesn't fit well and spews the error about sleep from invalid context, as complained recently by syzbot. Also, in general, handling the module-loading at each time if no matching object is found is really an overkill. It can be still useful when performed at the top-level ioctl or proc reads, but it shouldn't be done at event delivery at all. For addressing the issues above, this patch disables the module handling in snd_seq_client_use_ptr() in normal cases like event deliveries, but allow only in limited and safe situations. A new function client_load_and_use_ptr() is used for the cases where the module loading can be done safely, instead. Reported-by: syzbot+4cb9fad083898f54c517@syzkaller.appspotmail.com Closes: https://lore.kernel.org/67c272e5.050a0220.dc10f.0159.GAE@google.com Cc: Link: https://patch.msgid.link/20250301114530.8975-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 46 ++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index cb66ec42a3f8a..706f53e39b53c 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -106,7 +106,7 @@ static struct snd_seq_client *clientptr(int clientid) return clienttab[clientid]; } -struct snd_seq_client *snd_seq_client_use_ptr(int clientid) +static struct snd_seq_client *client_use_ptr(int clientid, bool load_module) { unsigned long flags; struct snd_seq_client *client; @@ -126,7 +126,7 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid) } spin_unlock_irqrestore(&clients_lock, flags); #ifdef CONFIG_MODULES - if (!in_interrupt()) { + if (load_module) { static DECLARE_BITMAP(client_requested, SNDRV_SEQ_GLOBAL_CLIENTS); static DECLARE_BITMAP(card_requested, SNDRV_CARDS); @@ -168,6 +168,20 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid) return client; } +/* get snd_seq_client object for the given id quickly */ +struct snd_seq_client *snd_seq_client_use_ptr(int clientid) +{ + return client_use_ptr(clientid, false); +} + +/* get snd_seq_client object for the given id; + * if not found, retry after loading the modules + */ +static struct snd_seq_client *client_load_and_use_ptr(int clientid) +{ + return client_use_ptr(clientid, IS_ENABLED(CONFIG_MODULES)); +} + /* Take refcount and perform ioctl_mutex lock on the given client; * used only for OSS sequencer * Unlock via snd_seq_client_ioctl_unlock() below @@ -176,7 +190,7 @@ bool snd_seq_client_ioctl_lock(int clientid) { struct snd_seq_client *client; - client = snd_seq_client_use_ptr(clientid); + client = client_load_and_use_ptr(clientid); if (!client) return false; mutex_lock(&client->ioctl_mutex); @@ -1195,7 +1209,7 @@ static int snd_seq_ioctl_running_mode(struct snd_seq_client *client, void *arg) int err = 0; /* requested client number */ - cptr = snd_seq_client_use_ptr(info->client); + cptr = client_load_and_use_ptr(info->client); if (cptr == NULL) return -ENOENT; /* don't change !!! */ @@ -1257,7 +1271,7 @@ static int snd_seq_ioctl_get_client_info(struct snd_seq_client *client, struct snd_seq_client *cptr; /* requested client number */ - cptr = snd_seq_client_use_ptr(client_info->client); + cptr = client_load_and_use_ptr(client_info->client); if (cptr == NULL) return -ENOENT; /* don't change !!! */ @@ -1396,7 +1410,7 @@ static int snd_seq_ioctl_get_port_info(struct snd_seq_client *client, void *arg) struct snd_seq_client *cptr; struct snd_seq_client_port *port; - cptr = snd_seq_client_use_ptr(info->addr.client); + cptr = client_load_and_use_ptr(info->addr.client); if (cptr == NULL) return -ENXIO; @@ -1503,10 +1517,10 @@ static int snd_seq_ioctl_subscribe_port(struct snd_seq_client *client, struct snd_seq_client *receiver = NULL, *sender = NULL; struct snd_seq_client_port *sport = NULL, *dport = NULL; - receiver = snd_seq_client_use_ptr(subs->dest.client); + receiver = client_load_and_use_ptr(subs->dest.client); if (!receiver) goto __end; - sender = snd_seq_client_use_ptr(subs->sender.client); + sender = client_load_and_use_ptr(subs->sender.client); if (!sender) goto __end; sport = snd_seq_port_use_ptr(sender, subs->sender.port); @@ -1871,7 +1885,7 @@ static int snd_seq_ioctl_get_client_pool(struct snd_seq_client *client, struct snd_seq_client_pool *info = arg; struct snd_seq_client *cptr; - cptr = snd_seq_client_use_ptr(info->client); + cptr = client_load_and_use_ptr(info->client); if (cptr == NULL) return -ENOENT; memset(info, 0, sizeof(*info)); @@ -1975,7 +1989,7 @@ static int snd_seq_ioctl_get_subscription(struct snd_seq_client *client, struct snd_seq_client_port *sport = NULL; result = -EINVAL; - sender = snd_seq_client_use_ptr(subs->sender.client); + sender = client_load_and_use_ptr(subs->sender.client); if (!sender) goto __end; sport = snd_seq_port_use_ptr(sender, subs->sender.port); @@ -2006,7 +2020,7 @@ static int snd_seq_ioctl_query_subs(struct snd_seq_client *client, void *arg) struct list_head *p; int i; - cptr = snd_seq_client_use_ptr(subs->root.client); + cptr = client_load_and_use_ptr(subs->root.client); if (!cptr) goto __end; port = snd_seq_port_use_ptr(cptr, subs->root.port); @@ -2073,7 +2087,7 @@ static int snd_seq_ioctl_query_next_client(struct snd_seq_client *client, if (info->client < 0) info->client = 0; for (; info->client < SNDRV_SEQ_MAX_CLIENTS; info->client++) { - cptr = snd_seq_client_use_ptr(info->client); + cptr = client_load_and_use_ptr(info->client); if (cptr) break; /* found */ } @@ -2096,7 +2110,7 @@ static int snd_seq_ioctl_query_next_port(struct snd_seq_client *client, struct snd_seq_client *cptr; struct snd_seq_client_port *port = NULL; - cptr = snd_seq_client_use_ptr(info->addr.client); + cptr = client_load_and_use_ptr(info->addr.client); if (cptr == NULL) return -ENXIO; @@ -2193,7 +2207,7 @@ static int snd_seq_ioctl_client_ump_info(struct snd_seq_client *caller, size = sizeof(struct snd_ump_endpoint_info); else size = sizeof(struct snd_ump_block_info); - cptr = snd_seq_client_use_ptr(client); + cptr = client_load_and_use_ptr(client); if (!cptr) return -ENOENT; @@ -2475,7 +2489,7 @@ int snd_seq_kernel_client_enqueue(int client, struct snd_seq_event *ev, if (check_event_type_and_length(ev)) return -EINVAL; - cptr = snd_seq_client_use_ptr(client); + cptr = client_load_and_use_ptr(client); if (cptr == NULL) return -EINVAL; @@ -2707,7 +2721,7 @@ void snd_seq_info_clients_read(struct snd_info_entry *entry, /* list the client table */ for (c = 0; c < SNDRV_SEQ_MAX_CLIENTS; c++) { - client = snd_seq_client_use_ptr(c); + client = client_load_and_use_ptr(c); if (client == NULL) continue; if (client->type == NO_CLIENT) { From 7a68b55ff39b0a1638acb1694c185d49f6077a0d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 27 Feb 2025 18:05:25 +0000 Subject: [PATCH 135/503] KVM: arm64: Initialize HCR_EL2.E2H early On CPUs without FEAT_E2H0, HCR_EL2.E2H is RES1, but may reset to an UNKNOWN value out of reset and consequently may not read as 1 unless it has been explicitly initialized. We handled this for the head.S boot code in commits: 3944382fa6f22b54 ("arm64: Treat HCR_EL2.E2H as RES1 when ID_AA64MMFR4_EL1.E2H0 is negative") b3320142f3db9b3f ("arm64: Fix early handling of FEAT_E2H0 not being implemented") Unfortunately, we forgot to apply a similar fix to the KVM PSCI entry points used when relaying CPU_ON, CPU_SUSPEND, and SYSTEM SUSPEND. When KVM is entered via these entry points, the value of HCR_EL2.E2H may be consumed before it has been initialized (e.g. by the 'init_el2_state' macro). Initialize HCR_EL2.E2H early in these paths such that it can be consumed reliably. The existing code in head.S is factored out into a new 'init_el2_hcr' macro, and this is used in the __kvm_hyp_init_cpu() function common to all the relevant PSCI entry points. For clarity, I've tweaked the assembly used to check whether ID_AA64MMFR4_EL1.E2H0 is negative. The bitfield is extracted as a signed value, and this is checked with a signed-greater-or-equal (GE) comparison. As the hyp code will reconfigure HCR_EL2 later in ___kvm_hyp_init(), all bits other than E2H are initialized to zero in __kvm_hyp_init_cpu(). Fixes: 3944382fa6f22b54 ("arm64: Treat HCR_EL2.E2H as RES1 when ID_AA64MMFR4_EL1.E2H0 is negative") Fixes: b3320142f3db9b3f ("arm64: Fix early handling of FEAT_E2H0 not being implemented") Signed-off-by: Mark Rutland Cc: Ahmed Genidi Cc: Ben Horgan Cc: Catalin Marinas Cc: Leo Yan Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Link: https://lore.kernel.org/r/20250227180526.1204723-2-mark.rutland@arm.com [maz: fixed LT->GE thinko] Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/el2_setup.h | 26 ++++++++++++++++++++++++++ arch/arm64/kernel/head.S | 19 +------------------ arch/arm64/kvm/hyp/nvhe/hyp-init.S | 8 +++++++- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 25e1626517500..56034a394b437 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -16,6 +16,32 @@ #include #include +.macro init_el2_hcr val + mov_q x0, \val + + /* + * Compliant CPUs advertise their VHE-onlyness with + * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it + * can reset into an UNKNOWN state and might not read as 1 until it has + * been initialized explicitly. + * + * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but + * don't advertise it (they predate this relaxation). + * + * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H + * indicating whether the CPU is running in E2H mode. + */ + mrs_s x1, SYS_ID_AA64MMFR4_EL1 + sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH + cmp x1, #0 + b.ge .LnVHE_\@ + + orr x0, x0, #HCR_E2H +.LnVHE_\@: + msr hcr_el2, x0 + isb +.endm + .macro __init_el2_sctlr mov_q x0, INIT_SCTLR_EL2_MMU_OFF msr sctlr_el2, x0 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 5ab1970ee5436..2d56459d6c94c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -298,25 +298,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr sctlr_el2, x0 isb 0: - mov_q x0, HCR_HOST_NVHE_FLAGS - - /* - * Compliant CPUs advertise their VHE-onlyness with - * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be - * RES1 in that case. Publish the E2H bit early so that - * it can be picked up by the init_el2_state macro. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but - * don't advertise it (they predate this relaxation). - */ - mrs_s x1, SYS_ID_AA64MMFR4_EL1 - tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f - - orr x0, x0, #HCR_E2H -1: - msr hcr_el2, x0 - isb + init_el2_hcr HCR_HOST_NVHE_FLAGS init_el2_state /* Hypervisor stub */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index fc18662260676..3fb5504a7d7fc 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -73,8 +73,12 @@ __do_hyp_init: eret SYM_CODE_END(__kvm_hyp_init) +/* + * Initialize EL2 CPU state to sane values. + * + * HCR_EL2.E2H must have been initialized already. + */ SYM_CODE_START_LOCAL(__kvm_init_el2_state) - /* Initialize EL2 CPU state to sane values. */ init_el2_state // Clobbers x0..x2 finalise_el2_state ret @@ -206,6 +210,8 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) 2: msr SPsel, #1 // We want to use SP_EL{1,2} + init_el2_hcr 0 + bl __kvm_init_el2_state __init_el2_nvhe_prepare_eret From 3855a7b91d42ebf3513b7ccffc44807274978b3d Mon Sep 17 00:00:00 2001 From: Ahmed Genidi Date: Thu, 27 Feb 2025 18:05:26 +0000 Subject: [PATCH 136/503] KVM: arm64: Initialize SCTLR_EL1 in __kvm_hyp_init_cpu() When KVM is in protected mode, host calls to PSCI are proxied via EL2, and cold entries from CPU_ON, CPU_SUSPEND, and SYSTEM_SUSPEND bounce through __kvm_hyp_init_cpu() at EL2 before entering the host kernel's entry point at EL1. While __kvm_hyp_init_cpu() initializes SPSR_EL2 for the exception return to EL1, it does not initialize SCTLR_EL1. Due to this, it's possible to enter EL1 with SCTLR_EL1 in an UNKNOWN state. In practice this has been seen to result in kernel crashes after CPU_ON as a result of SCTLR_EL1.M being 1 in violation of the initial core configuration specified by PSCI. Fix this by initializing SCTLR_EL1 for cold entry to the host kernel. As it's necessary to write to SCTLR_EL12 in VHE mode, this initialization is moved into __kvm_host_psci_cpu_entry() where we can use write_sysreg_el1(). The remnants of the '__init_el2_nvhe_prepare_eret' macro are folded into its only caller, as this is clearer than having the macro. Fixes: cdf367192766ad11 ("KVM: arm64: Intercept host's CPU_ON SMCs") Reported-by: Leo Yan Signed-off-by: Ahmed Genidi [ Mark: clarify commit message, handle E2H, move to C, remove macro ] Signed-off-by: Mark Rutland Cc: Ahmed Genidi Cc: Ben Horgan Cc: Catalin Marinas Cc: Leo Yan Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Reviewed-by: Leo Yan Link: https://lore.kernel.org/r/20250227180526.1204723-3-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/el2_setup.h | 5 ----- arch/arm64/kernel/head.S | 3 ++- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 2 -- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 +++ 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 56034a394b437..555c613fd2324 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -270,11 +270,6 @@ .Lskip_gcs_\@: .endm -.macro __init_el2_nvhe_prepare_eret - mov x0, #INIT_PSTATE_EL1 - msr spsr_el2, x0 -.endm - .macro __init_el2_mpam /* Memory Partitioning And Monitoring: disable EL2 traps */ mrs x1, id_aa64pfr0_el1 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2d56459d6c94c..2ce73525de2c9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -322,7 +322,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr sctlr_el1, x1 mov x2, xzr 3: - __init_el2_nvhe_prepare_eret + mov x0, #INIT_PSTATE_EL1 + msr spsr_el2, x0 mov w0, #BOOT_CPU_MODE_EL2 orr x0, x0, x2 diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 3fb5504a7d7fc..f8af11189572f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -214,8 +214,6 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) bl __kvm_init_el2_state - __init_el2_nvhe_prepare_eret - /* Enable MMU, set vectors and stack. */ mov x0, x28 bl ___kvm_hyp_init // Clobbers x0..x2 diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 9c2ce1e0e99a5..c3e196fb8b18f 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -218,6 +218,9 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) if (is_cpu_on) release_boot_args(boot_args); + write_sysreg_el1(INIT_SCTLR_EL1_MMU_OFF, SYS_SCTLR); + write_sysreg(INIT_PSTATE_EL1, SPSR_EL2); + __host_enter(host_ctxt); } From e04918dc594669068f5d59d567d08db531167188 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Mar 2025 15:18:24 +0800 Subject: [PATCH 137/503] cred: Fix RCU warnings in override/revert_creds Fix RCU warnings in override_creds and revert_creds by turning the RCU pointer into a normal pointer using rcu_replace_pointer. These warnings were previously private to the cred code, but due to the move into the header file they are now polluting unrelated subsystems. Fixes: 49dffdfde462 ("cred: Add a light version of override/revert_creds()") Signed-off-by: Herbert Xu Link: https://lore.kernel.org/r/Z8QGQGW0IaSklKG7@gondor.apana.org.au Signed-off-by: Christian Brauner --- include/linux/cred.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index 0c3c4b16b469c..5658a3bfe803c 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -172,18 +172,12 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) static inline const struct cred *override_creds(const struct cred *override_cred) { - const struct cred *old = current->cred; - - rcu_assign_pointer(current->cred, override_cred); - return old; + return rcu_replace_pointer(current->cred, override_cred, 1); } static inline const struct cred *revert_creds(const struct cred *revert_cred) { - const struct cred *override_cred = current->cred; - - rcu_assign_pointer(current->cred, revert_cred); - return override_cred; + return rcu_replace_pointer(current->cred, revert_cred, 1); } /** From 6b6e2e8fd0de3fa7c6f4f8fe6841b01770b2e7bc Mon Sep 17 00:00:00 2001 From: Titus Rwantare Date: Thu, 27 Feb 2025 22:24:55 +0000 Subject: [PATCH 138/503] hwmon: (pmbus) Initialise page count in pmbus_identify() The `pmbus_identify()` function fails to correctly determine the number of supported pages on PMBus devices. This occurs because `info->pages` is implicitly zero-initialised, and `pmbus_set_page()` does not perform writes to the page register if `info->pages` is not yet initialised. Without this patch, `info->pages` is always set to the maximum after scanning. This patch initialises `info->pages` to `PMBUS_PAGES` before the probing loop, enabling `pmbus_set_page()` writes to make it out onto the bus correctly identifying the number of pages. `PMBUS_PAGES` seemed like a reasonable non-zero number because that's the current result of the identification process. Testing was done with a PMBus device in QEMU. Signed-off-by: Titus Rwantare Fixes: 442aba78728e7 ("hwmon: PMBus device driver") Link: https://lore.kernel.org/r/20250227222455.2583468-1-titusr@google.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c index 77cf268e7d2d6..920cd5408141a 100644 --- a/drivers/hwmon/pmbus/pmbus.c +++ b/drivers/hwmon/pmbus/pmbus.c @@ -103,6 +103,8 @@ static int pmbus_identify(struct i2c_client *client, if (pmbus_check_byte_register(client, 0, PMBUS_PAGE)) { int page; + info->pages = PMBUS_PAGES; + for (page = 1; page < PMBUS_PAGES; page++) { if (pmbus_set_page(client, page, 0xff) < 0) break; From 1c7932d5ae0f5c22fa52ac811b4c427bbca5aff5 Mon Sep 17 00:00:00 2001 From: Maud Spierings Date: Thu, 27 Feb 2025 13:57:53 +0100 Subject: [PATCH 139/503] hwmon: (ntc_thermistor) Fix the ncpXXxh103 sensor table I could not find a single table that has the values currently present in the table, change it to the actual values that can be found in [1]/[2] and [3] (page 15 column 2) [1]: https://www.murata.com/products/productdetail?partno=NCP15XH103F03RC [2]: https://www.murata.com/products/productdata/8796836626462/NTHCG83.txt?1437969843000 [3]: https://nl.mouser.com/datasheet/2/281/r44e-522712.pdf Fixes: 54ce3a0d8011 ("hwmon: (ntc_thermistor) Add support for ncpXXxh103") Signed-off-by: Maud Spierings Link: https://lore.kernel.org/r/20250227-ntc_thermistor_fixes-v1-3-70fa73200b52@gocontroll.com Reviewed-by: Linus Walleij Signed-off-by: Guenter Roeck --- drivers/hwmon/ntc_thermistor.c | 66 +++++++++++++++++----------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index b5352900463fb..0d29c8f97ba7c 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -181,40 +181,40 @@ static const struct ntc_compensation ncpXXwf104[] = { }; static const struct ntc_compensation ncpXXxh103[] = { - { .temp_c = -40, .ohm = 247565 }, - { .temp_c = -35, .ohm = 181742 }, - { .temp_c = -30, .ohm = 135128 }, - { .temp_c = -25, .ohm = 101678 }, - { .temp_c = -20, .ohm = 77373 }, - { .temp_c = -15, .ohm = 59504 }, - { .temp_c = -10, .ohm = 46222 }, - { .temp_c = -5, .ohm = 36244 }, - { .temp_c = 0, .ohm = 28674 }, - { .temp_c = 5, .ohm = 22878 }, - { .temp_c = 10, .ohm = 18399 }, - { .temp_c = 15, .ohm = 14910 }, - { .temp_c = 20, .ohm = 12169 }, + { .temp_c = -40, .ohm = 195652 }, + { .temp_c = -35, .ohm = 148171 }, + { .temp_c = -30, .ohm = 113347 }, + { .temp_c = -25, .ohm = 87559 }, + { .temp_c = -20, .ohm = 68237 }, + { .temp_c = -15, .ohm = 53650 }, + { .temp_c = -10, .ohm = 42506 }, + { .temp_c = -5, .ohm = 33892 }, + { .temp_c = 0, .ohm = 27219 }, + { .temp_c = 5, .ohm = 22021 }, + { .temp_c = 10, .ohm = 17926 }, + { .temp_c = 15, .ohm = 14674 }, + { .temp_c = 20, .ohm = 12081 }, { .temp_c = 25, .ohm = 10000 }, - { .temp_c = 30, .ohm = 8271 }, - { .temp_c = 35, .ohm = 6883 }, - { .temp_c = 40, .ohm = 5762 }, - { .temp_c = 45, .ohm = 4851 }, - { .temp_c = 50, .ohm = 4105 }, - { .temp_c = 55, .ohm = 3492 }, - { .temp_c = 60, .ohm = 2985 }, - { .temp_c = 65, .ohm = 2563 }, - { .temp_c = 70, .ohm = 2211 }, - { .temp_c = 75, .ohm = 1915 }, - { .temp_c = 80, .ohm = 1666 }, - { .temp_c = 85, .ohm = 1454 }, - { .temp_c = 90, .ohm = 1275 }, - { .temp_c = 95, .ohm = 1121 }, - { .temp_c = 100, .ohm = 990 }, - { .temp_c = 105, .ohm = 876 }, - { .temp_c = 110, .ohm = 779 }, - { .temp_c = 115, .ohm = 694 }, - { .temp_c = 120, .ohm = 620 }, - { .temp_c = 125, .ohm = 556 }, + { .temp_c = 30, .ohm = 8315 }, + { .temp_c = 35, .ohm = 6948 }, + { .temp_c = 40, .ohm = 5834 }, + { .temp_c = 45, .ohm = 4917 }, + { .temp_c = 50, .ohm = 4161 }, + { .temp_c = 55, .ohm = 3535 }, + { .temp_c = 60, .ohm = 3014 }, + { .temp_c = 65, .ohm = 2586 }, + { .temp_c = 70, .ohm = 2228 }, + { .temp_c = 75, .ohm = 1925 }, + { .temp_c = 80, .ohm = 1669 }, + { .temp_c = 85, .ohm = 1452 }, + { .temp_c = 90, .ohm = 1268 }, + { .temp_c = 95, .ohm = 1110 }, + { .temp_c = 100, .ohm = 974 }, + { .temp_c = 105, .ohm = 858 }, + { .temp_c = 110, .ohm = 758 }, + { .temp_c = 115, .ohm = 672 }, + { .temp_c = 120, .ohm = 596 }, + { .temp_c = 125, .ohm = 531 }, }; /* From e278d5e8aef4c0a1d9a9fa8b8910d713a89aa800 Mon Sep 17 00:00:00 2001 From: Erik Schumacher Date: Mon, 24 Feb 2025 09:19:04 +0000 Subject: [PATCH 140/503] hwmon: (ad7314) Validate leading zero bits and return error Leading zero bits are sent on the bus before the temperature value is transmitted. If any of these bits are high, the connection might be unstable or there could be no AD7314 / ADT730x (or compatible) at all. Return -EIO in that case. Signed-off-by: Erik Schumacher Fixes: 4f3a659581cab ("hwmon: AD7314 driver (ported from IIO)") Link: https://lore.kernel.org/r/24a50c2981a318580aca8f50d23be7987b69ea00.camel@iris-sensing.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ad7314.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hwmon/ad7314.c b/drivers/hwmon/ad7314.c index 7802bbf5f9587..59424103f6348 100644 --- a/drivers/hwmon/ad7314.c +++ b/drivers/hwmon/ad7314.c @@ -22,11 +22,13 @@ */ #define AD7314_TEMP_MASK 0x7FE0 #define AD7314_TEMP_SHIFT 5 +#define AD7314_LEADING_ZEROS_MASK BIT(15) /* * ADT7301 and ADT7302 temperature masks */ #define ADT7301_TEMP_MASK 0x3FFF +#define ADT7301_LEADING_ZEROS_MASK (BIT(15) | BIT(14)) enum ad7314_variant { adt7301, @@ -65,12 +67,20 @@ static ssize_t ad7314_temperature_show(struct device *dev, return ret; switch (spi_get_device_id(chip->spi_dev)->driver_data) { case ad7314: + if (ret & AD7314_LEADING_ZEROS_MASK) { + /* Invalid read-out, leading zero part is missing */ + return -EIO; + } data = (ret & AD7314_TEMP_MASK) >> AD7314_TEMP_SHIFT; data = sign_extend32(data, 9); return sprintf(buf, "%d\n", 250 * data); case adt7301: case adt7302: + if (ret & ADT7301_LEADING_ZEROS_MASK) { + /* Invalid read-out, leading zero part is missing */ + return -EIO; + } /* * Documented as a 13 bit twos complement register * with a sign bit - which is a 14 bit 2's complement From fd5ba38390c59e1c147480ae49b6133c4ac24001 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 26 Feb 2025 15:19:18 +0900 Subject: [PATCH 141/503] tracing: probe-events: Remove unused MAX_ARG_BUF_LEN macro Commit 18b1e870a496 ("tracing/probes: Add $arg* meta argument for all function args") introduced MAX_ARG_BUF_LEN but it is not used. Remove it. Link: https://lore.kernel.org/all/174055075876.4079315.8805416872155957588.stgit@mhiramat.tok.corp.google.com/ Fixes: 18b1e870a496 ("tracing/probes: Add $arg* meta argument for all function args") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace_probe.h | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index c47ca002347a7..96792bc4b0924 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -36,7 +36,6 @@ #define MAX_BTF_ARGS_LEN 128 #define MAX_DENTRY_ARGS_LEN 256 #define MAX_STRING_SIZE PATH_MAX -#define MAX_ARG_BUF_LEN (MAX_TRACE_ARGS * MAX_ARG_NAME_LEN) /* Reserved field names */ #define FIELD_STRING_IP "__probe_ip" From 4dd541f9d9e4d8cdfa9797e68d893b0c27e4c46c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 15 Feb 2025 11:01:49 +0900 Subject: [PATCH 142/503] MAINTAINERS: update email address in cifs and ksmbd entry Steve mainly checks his email through his gmail address. I also check issues through another email address. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8e0736dc2ee0e..ca11a553d4121 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5775,6 +5775,7 @@ X: drivers/clk/clkdev.c COMMON INTERNET FILE SYSTEM CLIENT (CIFS and SMB3) M: Steve French +M: Steve French R: Paulo Alcantara (DFS, global name space) R: Ronnie Sahlberg (directory leases, sparse files) R: Shyam Prasad N (multichannel) @@ -12655,7 +12656,9 @@ F: tools/testing/selftests/ KERNEL SMB3 SERVER (KSMBD) M: Namjae Jeon +M: Namjae Jeon M: Steve French +M: Steve French R: Sergey Senozhatsky R: Tom Talpey L: linux-cifs@vger.kernel.org From d6e13e19063db24f94b690159d0633aaf72a0f03 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 18 Feb 2025 22:49:50 +0900 Subject: [PATCH 143/503] ksmbd: fix out-of-bounds in parse_sec_desc() If osidoffset, gsidoffset and dacloffset could be greater than smb_ntsd struct size. If it is smaller, It could cause slab-out-of-bounds. And when validating sid, It need to check it included subauth array size. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index d39d3e553366d..89415b02dd649 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -807,6 +807,13 @@ static int parse_sid(struct smb_sid *psid, char *end_of_acl) return -EINVAL; } + if (!psid->num_subauth) + return 0; + + if (psid->num_subauth > SID_MAX_SUB_AUTHORITIES || + end_of_acl < (char *)psid + 8 + sizeof(__le32) * psid->num_subauth) + return -EINVAL; + return 0; } @@ -848,6 +855,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, pntsd->type = cpu_to_le16(DACL_PRESENT); if (pntsd->osidoffset) { + if (le32_to_cpu(pntsd->osidoffset) < sizeof(struct smb_ntsd)) + return -EINVAL; + rc = parse_sid(owner_sid_ptr, end_of_acl); if (rc) { pr_err("%s: Error %d parsing Owner SID\n", __func__, rc); @@ -863,6 +873,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, } if (pntsd->gsidoffset) { + if (le32_to_cpu(pntsd->gsidoffset) < sizeof(struct smb_ntsd)) + return -EINVAL; + rc = parse_sid(group_sid_ptr, end_of_acl); if (rc) { pr_err("%s: Error %d mapping Owner SID to gid\n", @@ -884,6 +897,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, pntsd->type |= cpu_to_le16(DACL_PROTECTED); if (dacloffset) { + if (dacloffset < sizeof(struct smb_ntsd)) + return -EINVAL; + parse_dacl(idmap, dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr, fattr); } From e2ff19f0b7a30e03516e6eb73b948e27a55bc9d2 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 21 Feb 2025 14:16:23 +0900 Subject: [PATCH 144/503] ksmbd: fix type confusion via race condition when using ipc_msg_send_request req->handle is allocated using ksmbd_acquire_id(&ipc_ida), based on ida_alloc. req->handle from ksmbd_ipc_login_request and FSCTL_PIPE_TRANSCEIVE ioctl can be same and it could lead to type confusion between messages, resulting in access to unexpected parts of memory after an incorrect delivery. ksmbd check type of ipc response but missing add continue to check next ipc reponse. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_ipc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index 0460ebea6ff02..3f185ae60dc51 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -281,6 +281,7 @@ static int handle_response(int type, void *payload, size_t sz) if (entry->type + 1 != type) { pr_err("Waiting for IPC type %d, got %d. Ignore.\n", entry->type + 1, type); + continue; } entry->response = kvzalloc(sz, KSMBD_DEFAULT_GFP); From 84d2d1641b71dec326e8736a749b7ee76a9599fc Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 26 Feb 2025 15:44:02 +0900 Subject: [PATCH 145/503] ksmbd: fix use-after-free in smb2_lock If smb_lock->zero_len has value, ->llist of smb_lock is not delete and flock is old one. It will cause use-after-free on error handling routine. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index f1efcd0274750..35bed8fc1b970 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7458,13 +7458,13 @@ int smb2_lock(struct ksmbd_work *work) } no_check_cl: + flock = smb_lock->fl; + list_del(&smb_lock->llist); + if (smb_lock->zero_len) { err = 0; goto skip; } - - flock = smb_lock->fl; - list_del(&smb_lock->llist); retry: rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL); skip: From e26e2d2e15daf1ab33e0135caf2304a0cfa2744b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 27 Feb 2025 15:49:10 +0900 Subject: [PATCH 146/503] ksmbd: fix bug on trap in smb2_lock If lock count is greater than 1, flags could be old value. It should be checked with flags of smb_lock, not flags. It will cause bug-on trap from locks_free_lock in error handling routine. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 35bed8fc1b970..c53121538990e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7468,7 +7468,7 @@ int smb2_lock(struct ksmbd_work *work) retry: rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL); skip: - if (flags & SMB2_LOCKFLAG_UNLOCK) { + if (smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) { if (!rc) { ksmbd_debug(SMB, "File unlocked\n"); } else if (rc == -ENOENT) { From 62e7dd0a39c2d0d7ff03274c36df971f1b3d2d0d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 12 Feb 2025 23:26:09 +0900 Subject: [PATCH 147/503] smb: common: change the data type of num_aces to le16 2.4.5 in [MS-DTYP].pdf describe the data type of num_aces as le16. AceCount (2 bytes): An unsigned 16-bit integer that specifies the count of the number of ACE records in the ACL. Change it to le16 and add reserved field to smb_acl struct. Reported-by: Igor Leite Ladessa Tested-by: Igor Leite Ladessa Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/cifsacl.c | 26 +++++++++++++------------- fs/smb/common/smbacl.h | 3 ++- fs/smb/server/smbacl.c | 31 ++++++++++++++++--------------- fs/smb/server/smbacl.h | 2 +- 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 699a3f76d0834..7d953208046af 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -763,7 +763,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, struct cifs_fattr *fattr, bool mode_from_special_sid) { int i; - int num_aces = 0; + u16 num_aces = 0; int acl_size; char *acl_base; struct smb_ace **ppace; @@ -785,7 +785,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, cifs_dbg(NOISY, "DACL revision %d size %d num aces %d\n", le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), - le32_to_cpu(pdacl->num_aces)); + le16_to_cpu(pdacl->num_aces)); /* reset rwx permissions for user/group/other. Also, if num_aces is 0 i.e. DACL has no ACEs, @@ -795,7 +795,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, acl_base = (char *)pdacl; acl_size = sizeof(struct smb_acl); - num_aces = le32_to_cpu(pdacl->num_aces); + num_aces = le16_to_cpu(pdacl->num_aces); if (num_aces > 0) { umode_t denied_mode = 0; @@ -937,12 +937,12 @@ unsigned int setup_special_user_owner_ACE(struct smb_ace *pntace) static void populate_new_aces(char *nacl_base, struct smb_sid *pownersid, struct smb_sid *pgrpsid, - __u64 *pnmode, u32 *pnum_aces, u16 *pnsize, + __u64 *pnmode, u16 *pnum_aces, u16 *pnsize, bool modefromsid, bool posix) { __u64 nmode; - u32 num_aces = 0; + u16 num_aces = 0; u16 nsize = 0; __u64 user_mode; __u64 group_mode; @@ -1050,7 +1050,7 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p u16 size = 0; struct smb_ace *pntace = NULL; char *acl_base = NULL; - u32 src_num_aces = 0; + u16 src_num_aces = 0; u16 nsize = 0; struct smb_ace *pnntace = NULL; char *nacl_base = NULL; @@ -1058,7 +1058,7 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p acl_base = (char *)pdacl; size = sizeof(struct smb_acl); - src_num_aces = le32_to_cpu(pdacl->num_aces); + src_num_aces = le16_to_cpu(pdacl->num_aces); nacl_base = (char *)pndacl; nsize = sizeof(struct smb_acl); @@ -1090,11 +1090,11 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, u16 size = 0; struct smb_ace *pntace = NULL; char *acl_base = NULL; - u32 src_num_aces = 0; + u16 src_num_aces = 0; u16 nsize = 0; struct smb_ace *pnntace = NULL; char *nacl_base = NULL; - u32 num_aces = 0; + u16 num_aces = 0; bool new_aces_set = false; /* Assuming that pndacl and pnmode are never NULL */ @@ -1112,7 +1112,7 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, acl_base = (char *)pdacl; size = sizeof(struct smb_acl); - src_num_aces = le32_to_cpu(pdacl->num_aces); + src_num_aces = le16_to_cpu(pdacl->num_aces); /* Retain old ACEs which we can retain */ for (i = 0; i < src_num_aces; ++i) { @@ -1158,7 +1158,7 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, } finalize_dacl: - pndacl->num_aces = cpu_to_le32(num_aces); + pndacl->num_aces = cpu_to_le16(num_aces); pndacl->size = cpu_to_le16(nsize); return 0; @@ -1293,7 +1293,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION); ndacl_ptr->size = cpu_to_le16(0); - ndacl_ptr->num_aces = cpu_to_le32(0); + ndacl_ptr->num_aces = cpu_to_le16(0); rc = set_chmod_dacl(dacl_ptr, ndacl_ptr, owner_sid_ptr, group_sid_ptr, pnmode, mode_from_sid, posix); @@ -1653,7 +1653,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); if (mode_from_sid) nsecdesclen += - le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct smb_ace); + le16_to_cpu(dacl_ptr->num_aces) * sizeof(struct smb_ace); else /* cifsacl */ nsecdesclen += le16_to_cpu(dacl_ptr->size); } diff --git a/fs/smb/common/smbacl.h b/fs/smb/common/smbacl.h index 6a60698fc6f0f..a624ec9e4a144 100644 --- a/fs/smb/common/smbacl.h +++ b/fs/smb/common/smbacl.h @@ -107,7 +107,8 @@ struct smb_sid { struct smb_acl { __le16 revision; /* revision level */ __le16 size; - __le32 num_aces; + __le16 num_aces; + __le16 reserved; } __attribute__((packed)); struct smb_ace { diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 89415b02dd649..561f80d3f953e 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -333,7 +333,7 @@ void posix_state_to_acl(struct posix_acl_state *state, pace->e_perm = state->other.allow; } -int init_acl_state(struct posix_acl_state *state, int cnt) +int init_acl_state(struct posix_acl_state *state, u16 cnt) { int alloc; @@ -368,7 +368,7 @@ static void parse_dacl(struct mnt_idmap *idmap, struct smb_fattr *fattr) { int i, ret; - int num_aces = 0; + u16 num_aces = 0; unsigned int acl_size; char *acl_base; struct smb_ace **ppace; @@ -389,12 +389,12 @@ static void parse_dacl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "DACL revision %d size %d num aces %d\n", le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), - le32_to_cpu(pdacl->num_aces)); + le16_to_cpu(pdacl->num_aces)); acl_base = (char *)pdacl; acl_size = sizeof(struct smb_acl); - num_aces = le32_to_cpu(pdacl->num_aces); + num_aces = le16_to_cpu(pdacl->num_aces); if (num_aces <= 0) return; @@ -580,7 +580,7 @@ static void parse_dacl(struct mnt_idmap *idmap, static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap, struct smb_ace *pndace, - struct smb_fattr *fattr, u32 *num_aces, + struct smb_fattr *fattr, u16 *num_aces, u16 *size, u32 nt_aces_num) { struct posix_acl_entry *pace; @@ -701,7 +701,7 @@ static void set_ntacl_dacl(struct mnt_idmap *idmap, struct smb_fattr *fattr) { struct smb_ace *ntace, *pndace; - int nt_num_aces = le32_to_cpu(nt_dacl->num_aces), num_aces = 0; + u16 nt_num_aces = le16_to_cpu(nt_dacl->num_aces), num_aces = 0; unsigned short size = 0; int i; @@ -728,7 +728,7 @@ static void set_ntacl_dacl(struct mnt_idmap *idmap, set_posix_acl_entries_dacl(idmap, pndace, fattr, &num_aces, &size, nt_num_aces); - pndacl->num_aces = cpu_to_le32(num_aces); + pndacl->num_aces = cpu_to_le16(num_aces); pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size); } @@ -736,7 +736,7 @@ static void set_mode_dacl(struct mnt_idmap *idmap, struct smb_acl *pndacl, struct smb_fattr *fattr) { struct smb_ace *pace, *pndace; - u32 num_aces = 0; + u16 num_aces = 0; u16 size = 0, ace_size = 0; uid_t uid; const struct smb_sid *sid; @@ -792,7 +792,7 @@ static void set_mode_dacl(struct mnt_idmap *idmap, fattr->cf_mode, 0007); out: - pndacl->num_aces = cpu_to_le32(num_aces); + pndacl->num_aces = cpu_to_le16(num_aces); pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size); } @@ -1022,8 +1022,9 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct smb_sid owner_sid, group_sid; struct dentry *parent = path->dentry->d_parent; struct mnt_idmap *idmap = mnt_idmap(path->mnt); - int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size; - int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; + int inherited_flags = 0, flags = 0, i, nt_size = 0, pdacl_size; + int rc = 0, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; + u16 num_aces, ace_cnt = 0; char *aces_base; bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode); @@ -1039,7 +1040,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset); acl_len = pntsd_size - dacloffset; - num_aces = le32_to_cpu(parent_pdacl->num_aces); + num_aces = le16_to_cpu(parent_pdacl->num_aces); pntsd_type = le16_to_cpu(parent_pntsd->type); pdacl_size = le16_to_cpu(parent_pdacl->size); @@ -1199,7 +1200,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset)); pdacl->revision = cpu_to_le16(2); pdacl->size = cpu_to_le16(sizeof(struct smb_acl) + nt_size); - pdacl->num_aces = cpu_to_le32(ace_cnt); + pdacl->num_aces = cpu_to_le16(ace_cnt); pace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); memcpy(pace, aces_base, nt_size); pntsd_size += sizeof(struct smb_acl) + nt_size; @@ -1280,7 +1281,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); aces_size = acl_size - sizeof(struct smb_acl); - for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { + for (i = 0; i < le16_to_cpu(pdacl->num_aces); i++) { if (offsetof(struct smb_ace, access_req) > aces_size) break; ace_size = le16_to_cpu(ace->size); @@ -1301,7 +1302,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); aces_size = acl_size - sizeof(struct smb_acl); - for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { + for (i = 0; i < le16_to_cpu(pdacl->num_aces); i++) { if (offsetof(struct smb_ace, access_req) > aces_size) break; ace_size = le16_to_cpu(ace->size); diff --git a/fs/smb/server/smbacl.h b/fs/smb/server/smbacl.h index 24ce576fc2924..355adaee39b87 100644 --- a/fs/smb/server/smbacl.h +++ b/fs/smb/server/smbacl.h @@ -86,7 +86,7 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, int build_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info, __u32 *secdesclen, struct smb_fattr *fattr); -int init_acl_state(struct posix_acl_state *state, int cnt); +int init_acl_state(struct posix_acl_state *state, u16 cnt); void free_acl_state(struct posix_acl_state *state); void posix_state_to_acl(struct posix_acl_state *state, struct posix_acl_entry *pace); From 1b8b67f3c5e5169535e26efedd3e422172e2db64 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 12 Feb 2025 09:32:11 +0900 Subject: [PATCH 148/503] ksmbd: fix incorrect validation for num_aces field of smb_acl parse_dcal() validate num_aces to allocate posix_ace_state_array. if (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) It is an incorrect validation that we can create an array of size ULONG_MAX. smb_acl has ->size field to calculate actual number of aces in request buffer size. Use this to check invalid num_aces. Reported-by: Igor Leite Ladessa Tested-by: Igor Leite Ladessa Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 561f80d3f953e..49b128698670a 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -398,7 +398,9 @@ static void parse_dacl(struct mnt_idmap *idmap, if (num_aces <= 0) return; - if (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) + if (num_aces > (le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) / + (offsetof(struct smb_ace, sid) + + offsetof(struct smb_sid, sub_auth) + sizeof(__le16))) return; ret = init_acl_state(&acl_state, num_aces); @@ -432,6 +434,7 @@ static void parse_dacl(struct mnt_idmap *idmap, offsetof(struct smb_sid, sub_auth); if (end_of_acl - acl_base < acl_size || + ppace[i]->sid.num_subauth == 0 || ppace[i]->sid.num_subauth > SID_MAX_SUB_AUTHORITIES || (end_of_acl - acl_base < acl_size + sizeof(__le32) * ppace[i]->sid.num_subauth) || From aa2a739a75ab6f24ef72fb3fdb9192c081eacf06 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 12 Feb 2025 09:37:57 +0900 Subject: [PATCH 149/503] cifs: fix incorrect validation for num_aces field of smb_acl parse_dcal() validate num_aces to allocate ace array. f (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) It is an incorrect validation that we can create an array of size ULONG_MAX. smb_acl has ->size field to calculate actual number of aces in response buffer size. Use this to check invalid num_aces. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/cifsacl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 7d953208046af..64bd68f750f84 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -778,7 +778,8 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, } /* validate that we do not go past end of acl */ - if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) { + if (end_of_acl < (char *)pdacl + sizeof(struct smb_acl) || + end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) { cifs_dbg(VFS, "ACL too small to parse DACL\n"); return; } @@ -799,8 +800,11 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, if (num_aces > 0) { umode_t denied_mode = 0; - if (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) + if (num_aces > (le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) / + (offsetof(struct smb_ace, sid) + + offsetof(struct smb_sid, sub_auth) + sizeof(__le16))) return; + ppace = kmalloc_array(num_aces, sizeof(struct smb_ace *), GFP_KERNEL); if (!ppace) From f603b159231b0c58f0c27ab39348534063d38223 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 3 Mar 2025 14:56:10 +0800 Subject: [PATCH 150/503] ALSA: hda/realtek - add supported Mic Mute LED for Lenovo platform Support Mic Mute LED for ThinkCentre M series. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/c211a2702f1f411e86bd7420d7eebc03@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d58743b955f81..ebf54ef5877a4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5053,6 +5053,16 @@ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, } } +static void alc233_fixup_lenovo_low_en_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->micmute_led_polarity = 1; + alc233_fixup_lenovo_line2_mic_hotkey(codec, fix, action); +} + static void alc_hp_mute_disable(struct hda_codec *codec, unsigned int delay) { if (delay <= 0) @@ -7621,6 +7631,7 @@ enum { ALC275_FIXUP_DELL_XPS, ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, + ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED, ALC255_FIXUP_DELL_SPK_NOISE, ALC225_FIXUP_DISABLE_MIC_VREF, ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -8615,6 +8626,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc233_fixup_lenovo_line2_mic_hotkey, }, + [ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc233_fixup_lenovo_low_en_micmute_led, + }, [ALC233_FIXUP_INTEL_NUC8_DMIC] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_inv_dmic, @@ -10906,6 +10921,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), SND_PCI_QUIRK(0x17aa, 0x334b, "Lenovo ThinkCentre M70 Gen5", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3384, "ThinkCentre M90a PRO", ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED), + SND_PCI_QUIRK(0x17aa, 0x3386, "ThinkCentre M90a Gen6", ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED), + SND_PCI_QUIRK(0x17aa, 0x3387, "ThinkCentre M70a Gen6", ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED), SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), HDA_CODEC_QUIRK(0x17aa, 0x3802, "DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga Pro 9 14IRP8", ALC287_FIXUP_TAS2781_I2C), From 59b348be7597c4a9903cb003c69e37df20c04a30 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Fri, 28 Feb 2025 16:46:57 +0300 Subject: [PATCH 151/503] wifi: cfg80211: regulatory: improve invalid hints checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Syzbot keeps reporting an issue [1] that occurs when erroneous symbols sent from userspace get through into user_alpha2[] via regulatory_hint_user() call. Such invalid regulatory hints should be rejected. While a sanity check from commit 47caf685a685 ("cfg80211: regulatory: reject invalid hints") looks to be enough to deter these very cases, there is a way to get around it due to 2 reasons. 1) The way isalpha() works, symbols other than latin lower and upper letters may be used to determine a country/domain. For instance, greek letters will also be considered upper/lower letters and for such characters isalpha() will return true as well. However, ISO-3166-1 alpha2 codes should only hold latin characters. 2) While processing a user regulatory request, between reg_process_hint_user() and regulatory_hint_user() there happens to be a call to queue_regulatory_request() which modifies letters in request->alpha2[] with toupper(). This works fine for latin symbols, less so for weird letter characters from the second part of _ctype[]. Syzbot triggers a warning in is_user_regdom_saved() by first sending over an unexpected non-latin letter that gets malformed by toupper() into a character that ends up failing isalpha() check. Prevent this by enhancing is_an_alpha2() to ensure that incoming symbols are latin letters and nothing else. [1] Syzbot report: ------------[ cut here ]------------ Unexpected user alpha2: A� WARNING: CPU: 1 PID: 964 at net/wireless/reg.c:442 is_user_regdom_saved net/wireless/reg.c:440 [inline] WARNING: CPU: 1 PID: 964 at net/wireless/reg.c:442 restore_alpha2 net/wireless/reg.c:3424 [inline] WARNING: CPU: 1 PID: 964 at net/wireless/reg.c:442 restore_regulatory_settings+0x3c0/0x1e50 net/wireless/reg.c:3516 Modules linked in: CPU: 1 UID: 0 PID: 964 Comm: kworker/1:2 Not tainted 6.12.0-rc5-syzkaller-00044-gc1e939a21eb1 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Workqueue: events_power_efficient crda_timeout_work RIP: 0010:is_user_regdom_saved net/wireless/reg.c:440 [inline] RIP: 0010:restore_alpha2 net/wireless/reg.c:3424 [inline] RIP: 0010:restore_regulatory_settings+0x3c0/0x1e50 net/wireless/reg.c:3516 ... Call Trace: crda_timeout_work+0x27/0x50 net/wireless/reg.c:542 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa65/0x1850 kernel/workqueue.c:3310 worker_thread+0x870/0xd30 kernel/workqueue.c:3391 kthread+0x2f2/0x390 kernel/kthread.c:389 ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Reported-by: syzbot+e10709ac3c44f3d4e800@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e10709ac3c44f3d4e800 Fixes: 09d989d179d0 ("cfg80211: add regulatory hint disconnect support") Cc: stable@kernel.org Signed-off-by: Nikita Zhandarovich Link: https://patch.msgid.link/20250228134659.1577656-1-n.zhandarovich@fintech.ru Signed-off-by: Johannes Berg --- net/wireless/reg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2dd0533e76605..212e9561aae77 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -407,7 +407,8 @@ static bool is_an_alpha2(const char *alpha2) { if (!alpha2) return false; - return isalpha(alpha2[0]) && isalpha(alpha2[1]); + return isascii(alpha2[0]) && isalpha(alpha2[0]) && + isascii(alpha2[1]) && isalpha(alpha2[1]); } static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) From 487cfd4a8e3dc42d34a759017978a4edaf85fce0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Feb 2025 19:45:29 +0000 Subject: [PATCH 152/503] xhci: Restrict USB4 tunnel detection for USB3 devices to Intel hosts When adding support for USB3-over-USB4 tunnelling detection, a check for an Intel-specific capability was added. This capability, which goes by ID 206, is used without any check that we are actually dealing with an Intel host. As it turns out, the Cadence XHCI controller *also* exposes an extended capability numbered 206 (for unknown purposes), but of course doesn't have the Intel-specific registers that the tunnelling code is trying to access. Fun follows. The core of the problems is that the tunnelling code blindly uses vendor-specific capabilities without any check (the Intel-provided documentation I have at hand indicates that 192-255 are indeed vendor-specific). Restrict the detection code to Intel HW for real, preventing any further explosion on my (non-Intel) HW. Cc: stable Fixes: 948ce83fbb7df ("xhci: Add USB4 tunnel detection for USB3 devices on Intel hosts") Signed-off-by: Marc Zyngier Acked-by: Mathias Nyman Link: https://lore.kernel.org/r/20250227194529.2288718-1-maz@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 9693464c05204..69c278b64084b 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "xhci.h" #include "xhci-trace.h" @@ -770,9 +771,16 @@ static int xhci_exit_test_mode(struct xhci_hcd *xhci) enum usb_link_tunnel_mode xhci_port_is_tunneled(struct xhci_hcd *xhci, struct xhci_port *port) { + struct usb_hcd *hcd; void __iomem *base; u32 offset; + /* Don't try and probe this capability for non-Intel hosts */ + hcd = xhci_to_hcd(xhci); + if (!dev_is_pci(hcd->self.controller) || + to_pci_dev(hcd->self.controller)->vendor != PCI_VENDOR_ID_INTEL) + return USB_LINK_UNKNOWN; + base = &xhci->cap_regs->hc_capbase; offset = xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_INTEL_SPR_SHADOW); From 8e812e9355a6f14dffd54a33d951ca403b9732f5 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Mon, 24 Feb 2025 14:26:04 +0530 Subject: [PATCH 153/503] usb: gadget: Check bmAttributes only if configuration is valid If the USB configuration is not valid, then avoid checking for bmAttributes to prevent null pointer deference. Cc: stable Fixes: 40e89ff5750f ("usb: gadget: Set self-powered based on MaxPower and bmAttributes") Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/20250224085604.417327-1-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 4bcf73bae7610..869ad99afb48b 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1051,7 +1051,7 @@ static int set_config(struct usb_composite_dev *cdev, usb_gadget_set_remote_wakeup(gadget, 0); done: if (power > USB_SELF_POWER_VBUS_MAX_DRAW || - !(c->bmAttributes & USB_CONFIG_ATT_SELFPOWER)) + (c && !(c->bmAttributes & USB_CONFIG_ATT_SELFPOWER))) usb_gadget_clear_selfpowered(gadget); else usb_gadget_set_selfpowered(gadget); From 69c58deec19628c8a686030102176484eb94fed4 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sun, 16 Feb 2025 22:30:02 +0000 Subject: [PATCH 154/503] usb: dwc3: gadget: Prevent irq storm when TH re-executes While commit d325a1de49d6 ("usb: dwc3: gadget: Prevent losing events in event cache") makes sure that top half(TH) does not end up overwriting the cached events before processing them when the TH gets invoked more than one time, returning IRQ_HANDLED results in occasional irq storm where the TH hogs the CPU. The irq storm can be prevented by the flag before event handler busy is cleared. Default enable interrupt moderation in all versions which support them. ftrace event stub during dwc3 irq storm: irq/504_dwc3-1111 ( 1111) [000] .... 70.000866: irq_handler_exit: irq=14 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000872: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000874: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000881: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000883: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000889: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000892: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000898: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000901: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000907: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000909: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000915: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000918: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000924: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000927: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000933: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000935: irq_handler_exit: irq=504 ret=handled .... Cc: stable Suggested-by: Thinh Nguyen Fixes: d325a1de49d6 ("usb: dwc3: gadget: Prevent losing events in event cache") Signed-off-by: Badhri Jagan Sridharan Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250216223003.3568039-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 16 ++++++---------- drivers/usb/dwc3/gadget.c | 10 +++++++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index dfa1b5fe48dc4..2c472cb97f6c7 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1835,8 +1835,6 @@ static void dwc3_get_properties(struct dwc3 *dwc) dwc->tx_thr_num_pkt_prd = tx_thr_num_pkt_prd; dwc->tx_max_burst_prd = tx_max_burst_prd; - dwc->imod_interval = 0; - dwc->tx_fifo_resize_max_num = tx_fifo_resize_max_num; } @@ -1854,21 +1852,19 @@ static void dwc3_check_params(struct dwc3 *dwc) unsigned int hwparam_gen = DWC3_GHWPARAMS3_SSPHY_IFC(dwc->hwparams.hwparams3); - /* Check for proper value of imod_interval */ - if (dwc->imod_interval && !dwc3_has_imod(dwc)) { - dev_warn(dwc->dev, "Interrupt moderation not supported\n"); - dwc->imod_interval = 0; - } - /* + * Enable IMOD for all supporting controllers. + * + * Particularly, DWC_usb3 v3.00a must enable this feature for + * the following reason: + * * Workaround for STAR 9000961433 which affects only version * 3.00a of the DWC_usb3 core. This prevents the controller * interrupt from being masked while handling events. IMOD * allows us to work around this issue. Enable it for the * affected version. */ - if (!dwc->imod_interval && - DWC3_VER_IS(DWC3, 300A)) + if (dwc3_has_imod((dwc))) dwc->imod_interval = 1; /* Check the maximum_speed parameter */ diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ddd6b2ce57107..89a4dc8ebf948 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4501,14 +4501,18 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), DWC3_GEVNTSIZ_SIZE(evt->length)); + evt->flags &= ~DWC3_EVENT_PENDING; + /* + * Add an explicit write memory barrier to make sure that the update of + * clearing DWC3_EVENT_PENDING is observed in dwc3_check_event_buf() + */ + wmb(); + if (dwc->imod_interval) { dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), DWC3_GEVNTCOUNT_EHB); dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval); } - /* Keep the clearing of DWC3_EVENT_PENDING at the end */ - evt->flags &= ~DWC3_EVENT_PENDING; - return ret; } From b5ea08aa883da05106fcc683d12489a4292d1122 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 25 Feb 2025 13:02:46 +0200 Subject: [PATCH 155/503] usb: renesas_usbhs: Call clk_put() Clocks acquired with of_clk_get() need to be freed with clk_put(). Call clk_put() on priv->clks[0] on error path. Fixes: 3df0e240caba ("usb: renesas_usbhs: Add multiple clocks management") Cc: stable Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250225110248.870417-2-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 935fc496fe94b..6c7857b66a219 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -312,8 +312,10 @@ static int usbhsc_clk_get(struct device *dev, struct usbhs_priv *priv) priv->clks[1] = of_clk_get(dev_of_node(dev), 1); if (PTR_ERR(priv->clks[1]) == -ENOENT) priv->clks[1] = NULL; - else if (IS_ERR(priv->clks[1])) + else if (IS_ERR(priv->clks[1])) { + clk_put(priv->clks[0]); return PTR_ERR(priv->clks[1]); + } return 0; } From e0c92440938930e7fa7aa6362780d39cdea34449 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 25 Feb 2025 13:02:47 +0200 Subject: [PATCH 156/503] usb: renesas_usbhs: Use devm_usb_get_phy() The gpriv->transceiver is retrieved in probe() through usb_get_phy() but never released. Use devm_usb_get_phy() to handle this scenario. This issue was identified through code investigation. No issue was found without this change. Fixes: b5a2875605ca ("usb: renesas_usbhs: Allow an OTG PHY driver to provide VBUS") Cc: stable Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250225110248.870417-3-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/mod_gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 105132ae87acb..e8e5723f54122 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -1094,7 +1094,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv) goto usbhs_mod_gadget_probe_err_gpriv; } - gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED); + gpriv->transceiver = devm_usb_get_phy(dev, USB_PHY_TYPE_UNDEFINED); dev_info(dev, "%stransceiver found\n", !IS_ERR(gpriv->transceiver) ? "" : "no "); From 552ca6b87e3778f3dd5b87842f95138162e16c82 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 25 Feb 2025 13:02:48 +0200 Subject: [PATCH 157/503] usb: renesas_usbhs: Flush the notify_hotplug_work When performing continuous unbind/bind operations on the USB drivers available on the Renesas RZ/G2L SoC, a kernel crash with the message "Unable to handle kernel NULL pointer dereference at virtual address" may occur. This issue points to the usbhsc_notify_hotplug() function. Flush the delayed work to avoid its execution when driver resources are unavailable. Fixes: bc57381e6347 ("usb: renesas_usbhs: use delayed_work instead of work_struct") Cc: stable Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250225110248.870417-4-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 6c7857b66a219..4b35ef216125c 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -781,6 +781,8 @@ static void usbhs_remove(struct platform_device *pdev) dev_dbg(&pdev->dev, "usb remove\n"); + flush_delayed_work(&priv->notify_hotplug_work); + /* power off */ if (!usbhs_get_dparam(priv, runtime_pwctrl)) usbhsc_power_ctrl(priv, 0); From 2b66ef84d0d2a0ea955b40bd306f5e3abbc5cf9c Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 28 Feb 2025 07:50:25 +0000 Subject: [PATCH 158/503] usb: hub: lack of clearing xHC resources The xHC resources allocated for USB devices are not released in correct order after resuming in case when while suspend device was reconnected. This issue has been detected during the fallowing scenario: - connect hub HS to root port - connect LS/FS device to hub port - wait for enumeration to finish - force host to suspend - reconnect hub attached to root port - wake host For this scenario during enumeration of USB LS/FS device the Cadence xHC reports completion error code for xHC commands because the xHC resources used for devices has not been properly released. XHCI specification doesn't mention that device can be reset in any order so, we should not treat this issue as Cadence xHC controller bug. Similar as during disconnecting in this case the device resources should be cleared starting form the last usb device in tree toward the root hub. To fix this issue usbcore driver should call hcd->driver->reset_device for all USB devices connected to hub which was reconnected while suspending. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Signed-off-by: Pawel Laszczak Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/PH7PR07MB953841E38C088678ACDCF6EEDDCC2@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a76bb50b62026..dcba4281ea486 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -6065,6 +6065,36 @@ void usb_hub_cleanup(void) usb_deregister(&hub_driver); } /* usb_hub_cleanup() */ +/** + * hub_hc_release_resources - clear resources used by host controller + * @udev: pointer to device being released + * + * Context: task context, might sleep + * + * Function releases the host controller resources in correct order before + * making any operation on resuming usb device. The host controller resources + * allocated for devices in tree should be released starting from the last + * usb device in tree toward the root hub. This function is used only during + * resuming device when usb device require reinitialization – that is, when + * flag udev->reset_resume is set. + * + * This call is synchronous, and may not be used in an interrupt context. + */ +static void hub_hc_release_resources(struct usb_device *udev) +{ + struct usb_hub *hub = usb_hub_to_struct_hub(udev); + struct usb_hcd *hcd = bus_to_hcd(udev->bus); + int i; + + /* Release up resources for all children before this device */ + for (i = 0; i < udev->maxchild; i++) + if (hub->ports[i]->child) + hub_hc_release_resources(hub->ports[i]->child); + + if (hcd->driver->reset_device) + hcd->driver->reset_device(hcd, udev); +} + /** * usb_reset_and_verify_device - perform a USB port reset to reinitialize a device * @udev: device to reset (not in SUSPENDED or NOTATTACHED state) @@ -6129,6 +6159,9 @@ static int usb_reset_and_verify_device(struct usb_device *udev) bos = udev->bos; udev->bos = NULL; + if (udev->reset_resume) + hub_hc_release_resources(udev); + mutex_lock(hcd->address0_mutex); for (i = 0; i < PORT_INIT_TRIES; ++i) { From 1be4e29e94a6be77de3bc210820b74f40814f17a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 27 Feb 2025 11:03:06 -0600 Subject: [PATCH 159/503] platform/x86/amd/pmf: Initialize and clean up `cb_mutex` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `cb_mutex` was introduced in commit 9e0894d07072e ("platform/x86/amd/pmf: Enable Custom BIOS Inputs for PMF-TA") to prevent conccurrent access for BIOS inputs. It however isn't initialized and so on use it may lead to a NULL pointer dereference. Add code to initialize on probe and clean up on destroy. Reported-by: Yijun Shen Cc: Richard Gong Fixes: 9e0894d07072e ("platform/x86/amd/pmf: Enable Custom BIOS Inputs for PMF-TA") Signed-off-by: Mario Limonciello Tested-By: Yijun Shen Acked-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250227170308.435862-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 764cc1fe90ae4..a2cb2d5544f5b 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -452,6 +452,7 @@ static int amd_pmf_probe(struct platform_device *pdev) mutex_init(&dev->lock); mutex_init(&dev->update_mutex); + mutex_init(&dev->cb_mutex); apmf_acpi_init(dev); platform_set_drvdata(pdev, dev); @@ -477,6 +478,7 @@ static void amd_pmf_remove(struct platform_device *pdev) amd_pmf_dbgfs_unregister(dev); mutex_destroy(&dev->lock); mutex_destroy(&dev->update_mutex); + mutex_destroy(&dev->cb_mutex); kfree(dev->buf); } From df1a1ed5e1bdd9cc13148e0e5549f5ebcf76cf13 Mon Sep 17 00:00:00 2001 From: Brendan King Date: Wed, 26 Feb 2025 15:42:19 +0000 Subject: [PATCH 160/503] drm/imagination: avoid deadlock on fence release Do scheduler queue fence release processing on a workqueue, rather than in the release function itself. Fixes deadlock issues such as the following: [ 607.400437] ============================================ [ 607.405755] WARNING: possible recursive locking detected [ 607.415500] -------------------------------------------- [ 607.420817] weston:zfq0/24149 is trying to acquire lock: [ 607.426131] ffff000017d041a0 (reservation_ww_class_mutex){+.+.}-{3:3}, at: pvr_gem_object_vunmap+0x40/0xc0 [powervr] [ 607.436728] but task is already holding lock: [ 607.442554] ffff000017d105a0 (reservation_ww_class_mutex){+.+.}-{3:3}, at: dma_buf_ioctl+0x250/0x554 [ 607.451727] other info that might help us debug this: [ 607.458245] Possible unsafe locking scenario: [ 607.464155] CPU0 [ 607.466601] ---- [ 607.469044] lock(reservation_ww_class_mutex); [ 607.473584] lock(reservation_ww_class_mutex); [ 607.478114] *** DEADLOCK *** Cc: stable@vger.kernel.org Fixes: eaf01ee5ba28 ("drm/imagination: Implement job submission and scheduling") Signed-off-by: Brendan King Reviewed-by: Matt Coster Link: https://patchwork.freedesktop.org/patch/msgid/20250226-fence-release-deadlock-v2-1-6fed2fc1fe88@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_queue.c | 13 +++++++++++-- drivers/gpu/drm/imagination/pvr_queue.h | 4 ++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index c4f08432882b1..f3f1c5212df74 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -109,12 +109,20 @@ pvr_queue_fence_get_driver_name(struct dma_fence *f) return PVR_DRIVER_NAME; } +static void pvr_queue_fence_release_work(struct work_struct *w) +{ + struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work); + + pvr_context_put(fence->queue->ctx); + dma_fence_free(&fence->base); +} + static void pvr_queue_fence_release(struct dma_fence *f) { struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); + struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev; - pvr_context_put(fence->queue->ctx); - dma_fence_free(f); + queue_work(pvr_dev->sched_wq, &fence->release_work); } static const char * @@ -268,6 +276,7 @@ pvr_queue_fence_init(struct dma_fence *f, pvr_context_get(queue->ctx); fence->queue = queue; + INIT_WORK(&fence->release_work, pvr_queue_fence_release_work); dma_fence_init(&fence->base, fence_ops, &fence_ctx->lock, fence_ctx->id, atomic_inc_return(&fence_ctx->seqno)); diff --git a/drivers/gpu/drm/imagination/pvr_queue.h b/drivers/gpu/drm/imagination/pvr_queue.h index e06ced69302fc..93fe9ac9f58cc 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.h +++ b/drivers/gpu/drm/imagination/pvr_queue.h @@ -5,6 +5,7 @@ #define PVR_QUEUE_H #include +#include #include "pvr_cccb.h" #include "pvr_device.h" @@ -63,6 +64,9 @@ struct pvr_queue_fence { /** @queue: Queue that created this fence. */ struct pvr_queue *queue; + + /** @release_work: Fence release work structure. */ + struct work_struct release_work; }; /** From a5c4c3ba95a52d66315acdfbaba9bd82ed39c250 Mon Sep 17 00:00:00 2001 From: Brendan King Date: Wed, 26 Feb 2025 15:43:06 +0000 Subject: [PATCH 161/503] drm/imagination: Hold drm_gem_gpuva lock for unmap Avoid a warning from drm_gem_gpuva_assert_lock_held in drm_gpuva_unlink. The Imagination driver uses the GEM object reservation lock to protect the gpuva list, but the GEM object was not always known in the code paths that ended up calling drm_gpuva_unlink. When the GEM object isn't known, it is found by calling drm_gpuva_find to lookup the object associated with a given virtual address range, or by calling drm_gpuva_find_first when removing all mappings. Cc: stable@vger.kernel.org Fixes: 4bc736f890ce ("drm/imagination: vm: make use of GPUVM's drm_exec helper") Signed-off-by: Brendan King Reviewed-by: Matt Coster Link: https://patchwork.freedesktop.org/patch/msgid/20250226-hold-drm_gem_gpuva-lock-for-unmap-v2-1-3fdacded227f@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_fw_meta.c | 6 +- drivers/gpu/drm/imagination/pvr_vm.c | 134 +++++++++++++++++----- drivers/gpu/drm/imagination/pvr_vm.h | 3 + 3 files changed, 115 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_fw_meta.c b/drivers/gpu/drm/imagination/pvr_fw_meta.c index c39beb70c3173..6d13864851fc2 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_meta.c +++ b/drivers/gpu/drm/imagination/pvr_fw_meta.c @@ -527,8 +527,10 @@ pvr_meta_vm_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) static void pvr_meta_vm_unmap(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) { - pvr_vm_unmap(pvr_dev->kernel_vm_ctx, fw_obj->fw_mm_node.start, - fw_obj->fw_mm_node.size); + struct pvr_gem_object *pvr_obj = fw_obj->gem; + + pvr_vm_unmap_obj(pvr_dev->kernel_vm_ctx, pvr_obj, + fw_obj->fw_mm_node.start, fw_obj->fw_mm_node.size); } static bool diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c index 363f885a70982..2896fa7501b1c 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.c +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -293,8 +293,9 @@ pvr_vm_bind_op_map_init(struct pvr_vm_bind_op *bind_op, static int pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op, - struct pvr_vm_context *vm_ctx, u64 device_addr, - u64 size) + struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) { int err; @@ -318,6 +319,7 @@ pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op, goto err_bind_op_fini; } + bind_op->pvr_obj = pvr_obj; bind_op->vm_ctx = vm_ctx; bind_op->device_addr = device_addr; bind_op->size = size; @@ -597,20 +599,6 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context) return ERR_PTR(err); } -/** - * pvr_vm_unmap_all() - Unmap all mappings associated with a VM context. - * @vm_ctx: Target VM context. - * - * This function ensures that no mappings are left dangling by unmapping them - * all in order of ascending device-virtual address. - */ -void -pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx) -{ - WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start, - vm_ctx->gpuvm_mgr.mm_range)); -} - /** * pvr_vm_context_release() - Teardown a VM context. * @ref_count: Pointer to reference counter of the VM context. @@ -703,11 +691,7 @@ pvr_vm_lock_extra(struct drm_gpuvm_exec *vm_exec) struct pvr_vm_bind_op *bind_op = vm_exec->extra.priv; struct pvr_gem_object *pvr_obj = bind_op->pvr_obj; - /* Unmap operations don't have an object to lock. */ - if (!pvr_obj) - return 0; - - /* Acquire lock on the GEM being mapped. */ + /* Acquire lock on the GEM object being mapped/unmapped. */ return drm_exec_lock_obj(&vm_exec->exec, gem_from_pvr_gem(pvr_obj)); } @@ -772,8 +756,10 @@ pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, } /** - * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory. + * pvr_vm_unmap_obj_locked() - Unmap an already mapped section of device-virtual + * memory. * @vm_ctx: Target VM context. + * @pvr_obj: Target PowerVR memory object. * @device_addr: Virtual device address at the start of the target mapping. * @size: Size of the target mapping. * @@ -784,9 +770,13 @@ pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, * * Any error encountered while performing internal operations required to * destroy the mapping (returned from pvr_vm_gpuva_unmap or * pvr_vm_gpuva_remap). + * + * The vm_ctx->lock must be held when calling this function. */ -int -pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) +static int +pvr_vm_unmap_obj_locked(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) { struct pvr_vm_bind_op bind_op = {0}; struct drm_gpuvm_exec vm_exec = { @@ -799,11 +789,13 @@ pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) }, }; - int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, device_addr, - size); + int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, pvr_obj, + device_addr, size); if (err) return err; + pvr_gem_object_get(pvr_obj); + err = drm_gpuvm_exec_lock(&vm_exec); if (err) goto err_cleanup; @@ -818,6 +810,96 @@ pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) return err; } +/** + * pvr_vm_unmap_obj() - Unmap an already mapped section of device-virtual + * memory. + * @vm_ctx: Target VM context. + * @pvr_obj: Target PowerVR memory object. + * @device_addr: Virtual device address at the start of the target mapping. + * @size: Size of the target mapping. + * + * Return: + * * 0 on success, + * * Any error encountered by pvr_vm_unmap_obj_locked. + */ +int +pvr_vm_unmap_obj(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) +{ + int err; + + mutex_lock(&vm_ctx->lock); + err = pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, device_addr, size); + mutex_unlock(&vm_ctx->lock); + + return err; +} + +/** + * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory. + * @vm_ctx: Target VM context. + * @device_addr: Virtual device address at the start of the target mapping. + * @size: Size of the target mapping. + * + * Return: + * * 0 on success, + * * Any error encountered by drm_gpuva_find, + * * Any error encountered by pvr_vm_unmap_obj_locked. + */ +int +pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) +{ + struct pvr_gem_object *pvr_obj; + struct drm_gpuva *va; + int err; + + mutex_lock(&vm_ctx->lock); + + va = drm_gpuva_find(&vm_ctx->gpuvm_mgr, device_addr, size); + if (va) { + pvr_obj = gem_to_pvr_gem(va->gem.obj); + err = pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, + va->va.addr, va->va.range); + } else { + err = -ENOENT; + } + + mutex_unlock(&vm_ctx->lock); + + return err; +} + +/** + * pvr_vm_unmap_all() - Unmap all mappings associated with a VM context. + * @vm_ctx: Target VM context. + * + * This function ensures that no mappings are left dangling by unmapping them + * all in order of ascending device-virtual address. + */ +void +pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx) +{ + mutex_lock(&vm_ctx->lock); + + for (;;) { + struct pvr_gem_object *pvr_obj; + struct drm_gpuva *va; + + va = drm_gpuva_find_first(&vm_ctx->gpuvm_mgr, + vm_ctx->gpuvm_mgr.mm_start, + vm_ctx->gpuvm_mgr.mm_range); + if (!va) + break; + + pvr_obj = gem_to_pvr_gem(va->gem.obj); + + WARN_ON(pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, + va->va.addr, va->va.range)); + } + + mutex_unlock(&vm_ctx->lock); +} + /* Static data areas are determined by firmware. */ static const struct drm_pvr_static_data_area static_data_areas[] = { { diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h index 79406243617c1..b0528dffa7f1b 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.h +++ b/drivers/gpu/drm/imagination/pvr_vm.h @@ -38,6 +38,9 @@ struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev, int pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset, u64 device_addr, u64 size); +int pvr_vm_unmap_obj(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size); int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size); void pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx); From 91cf42c63f2d8a9c1bcdfe923218e079b32e1a69 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 3 Mar 2025 10:47:40 +0000 Subject: [PATCH 162/503] spi: microchip-core: prevent RX overflows when transmit size > FIFO size When the size of a transfer exceeds the size of the FIFO (32 bytes), RX overflows will be generated and receive data will be corrupted and warnings will be produced. For example, here's an error generated by a transfer of 36 bytes: spi_master spi0: mchp_corespi_interrupt: RX OVERFLOW: rxlen: 4, txlen: 0 The driver is currently split between handling receiving in the interrupt handler, and sending outside of it. Move all handling out of the interrupt handling, and explicitly link the number of bytes read of of the RX FIFO to the number written into the TX one. This both resolves the overflow problems as well as simplifying the flow of the driver. CC: stable@vger.kernel.org Fixes: 9ac8d17694b6 ("spi: add support for microchip fpga spi controllers") Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20250303-veal-snooper-712c1dfad336@wendy Signed-off-by: Mark Brown --- drivers/spi/spi-microchip-core.c | 41 ++++++++++++++------------------ 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/spi/spi-microchip-core.c b/drivers/spi/spi-microchip-core.c index 5b6af55855efc..62ba0bd9cbb7e 100644 --- a/drivers/spi/spi-microchip-core.c +++ b/drivers/spi/spi-microchip-core.c @@ -70,8 +70,7 @@ #define INT_RX_CHANNEL_OVERFLOW BIT(2) #define INT_TX_CHANNEL_UNDERRUN BIT(3) -#define INT_ENABLE_MASK (CONTROL_RX_DATA_INT | CONTROL_TX_DATA_INT | \ - CONTROL_RX_OVER_INT | CONTROL_TX_UNDER_INT) +#define INT_ENABLE_MASK (CONTROL_RX_OVER_INT | CONTROL_TX_UNDER_INT) #define REG_CONTROL (0x00) #define REG_FRAME_SIZE (0x04) @@ -133,10 +132,15 @@ static inline void mchp_corespi_disable(struct mchp_corespi *spi) mchp_corespi_write(spi, REG_CONTROL, control); } -static inline void mchp_corespi_read_fifo(struct mchp_corespi *spi) +static inline void mchp_corespi_read_fifo(struct mchp_corespi *spi, int fifo_max) { - while (spi->rx_len >= spi->n_bytes && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY)) { - u32 data = mchp_corespi_read(spi, REG_RX_DATA); + for (int i = 0; i < fifo_max; i++) { + u32 data; + + while (mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY) + ; + + data = mchp_corespi_read(spi, REG_RX_DATA); spi->rx_len -= spi->n_bytes; @@ -211,11 +215,10 @@ static inline void mchp_corespi_set_xfer_size(struct mchp_corespi *spi, int len) mchp_corespi_write(spi, REG_FRAMESUP, len); } -static inline void mchp_corespi_write_fifo(struct mchp_corespi *spi) +static inline void mchp_corespi_write_fifo(struct mchp_corespi *spi, int fifo_max) { - int fifo_max, i = 0; + int i = 0; - fifo_max = DIV_ROUND_UP(min(spi->tx_len, FIFO_DEPTH), spi->n_bytes); mchp_corespi_set_xfer_size(spi, fifo_max); while ((i < fifo_max) && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_TXFIFO_FULL)) { @@ -413,19 +416,6 @@ static irqreturn_t mchp_corespi_interrupt(int irq, void *dev_id) if (intfield == 0) return IRQ_NONE; - if (intfield & INT_TXDONE) - mchp_corespi_write(spi, REG_INT_CLEAR, INT_TXDONE); - - if (intfield & INT_RXRDY) { - mchp_corespi_write(spi, REG_INT_CLEAR, INT_RXRDY); - - if (spi->rx_len) - mchp_corespi_read_fifo(spi); - } - - if (!spi->rx_len && !spi->tx_len) - finalise = true; - if (intfield & INT_RX_CHANNEL_OVERFLOW) { mchp_corespi_write(spi, REG_INT_CLEAR, INT_RX_CHANNEL_OVERFLOW); finalise = true; @@ -512,9 +502,14 @@ static int mchp_corespi_transfer_one(struct spi_controller *host, mchp_corespi_write(spi, REG_SLAVE_SELECT, spi->pending_slave_select); - while (spi->tx_len) - mchp_corespi_write_fifo(spi); + while (spi->tx_len) { + int fifo_max = DIV_ROUND_UP(min(spi->tx_len, FIFO_DEPTH), spi->n_bytes); + + mchp_corespi_write_fifo(spi, fifo_max); + mchp_corespi_read_fifo(spi, fifo_max); + } + spi_finalize_current_transfer(host); return 1; } From 68c3de7f707e8a70e0a6d8087cf0fe4a3d5dbfb0 Mon Sep 17 00:00:00 2001 From: Brendan King Date: Wed, 26 Feb 2025 15:43:54 +0000 Subject: [PATCH 163/503] drm/imagination: only init job done fences once Ensure job done fences are only initialised once. This fixes a memory manager not clean warning from drm_mm_takedown on module unload. Cc: stable@vger.kernel.org Fixes: eaf01ee5ba28 ("drm/imagination: Implement job submission and scheduling") Signed-off-by: Brendan King Reviewed-by: Matt Coster Link: https://patchwork.freedesktop.org/patch/msgid/20250226-init-done-fences-once-v2-1-c1b2f556b329@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_queue.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index f3f1c5212df74..43411be930a21 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -313,8 +313,9 @@ pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue) static void pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue) { - pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, - &queue->job_fence_ctx); + if (!fence->ops) + pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, + &queue->job_fence_ctx); } /** From 1d2eabb6616433ccaa13927811bdfa205e91ba60 Mon Sep 17 00:00:00 2001 From: Alessio Belle Date: Fri, 21 Feb 2025 10:49:35 +0000 Subject: [PATCH 164/503] drm/imagination: Fix timestamps in firmware traces When firmware traces are enabled, the firmware dumps 48-bit timestamps for each trace as two 32-bit values, highest 32 bits (of which only 16 useful) first. The driver was reassembling them the other way round i.e. interpreting the first value in memory as the lowest 32 bits, and the second value as the highest 32 bits (then truncated to 16 bits). Due to this, firmware trace dumps showed very large timestamps even for traces recorded shortly after GPU boot. The timestamps in these dumps would also sometimes jump backwards because of the truncation. Example trace dumped after loading the powervr module and enabling firmware traces, where each line is commented with the timestamp value in hexadecimal to better show both issues: [93540092739584] : Host Sync Partition marker: 1 // 0x551300000000 [28419798597632] : GPU units deinit // 0x19d900000000 [28548647616512] : GPU deinit // 0x19f700000000 Update logic to reassemble the timestamps halves in the correct order. Fixes: cb56cd610866 ("drm/imagination: Add firmware trace to debugfs") Signed-off-by: Alessio Belle Reviewed-by: Matt Coster Link: https://patchwork.freedesktop.org/patch/msgid/20250221-fix-fw-trace-timestamps-v1-1-dba4aeb030ca@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_fw_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c index 73707daa4e52d..5dbb636d7d4ff 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_trace.c +++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c @@ -333,8 +333,8 @@ static int fw_trace_seq_show(struct seq_file *s, void *v) if (sf_id == ROGUE_FW_SF_LAST) return -EINVAL; - timestamp = read_fw_trace(trace_seq_data, 1) | - ((u64)read_fw_trace(trace_seq_data, 2) << 32); + timestamp = ((u64)read_fw_trace(trace_seq_data, 1) << 32) | + read_fw_trace(trace_seq_data, 2); timestamp = (timestamp & ~ROGUE_FWT_TIMESTAMP_TIME_CLRMSK) >> ROGUE_FWT_TIMESTAMP_TIME_SHIFT; From 2738d06fb4f01145b24c542fb06de538ffc56430 Mon Sep 17 00:00:00 2001 From: Dmitry Panchenko Date: Thu, 20 Feb 2025 17:39:31 +0200 Subject: [PATCH 165/503] platform/x86: intel-hid: fix volume buttons on Microsoft Surface Go 4 tablet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Volume buttons on Microsoft Surface Go 4 tablet didn't send any events. Add Surface Go 4 DMI match to button_array_table to fix this. Signed-off-by: Dmitry Panchenko Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250220154016.3620917-1-dmitry@d-systems.ee Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/hid.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 927a2993f6160..88a1a9ff2f344 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -139,6 +139,13 @@ static const struct dmi_system_id button_array_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"), }, }, + { + .ident = "Microsoft Surface Go 4", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 4"), + }, + }, { } }; From d0d10eaedcb53740883d7e5d53c5e15c879b48fb Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Sat, 22 Feb 2025 00:48:24 +0800 Subject: [PATCH 166/503] platform/x86: thinkpad_acpi: Add battery quirk for ThinkPad X131e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the dmesg messages from the original reporter: [ 4.964073] ACPI: \_SB_.PCI0.LPCB.EC__.HKEY: BCTG evaluated but flagged as error [ 4.964083] thinkpad_acpi: Error probing battery 2 Lenovo ThinkPad X131e also needs this battery quirk. Reported-by: Fan Yang <804284660@qq.com> Tested-by: Fan Yang <804284660@qq.com> Co-developed-by: Xi Ruoyao Signed-off-by: Xi Ruoyao Signed-off-by: Mingcong Bai Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250221164825.77315-1-jeffbai@aosc.io Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/thinkpad_acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 72a10ed2017ce..1cc91173e0127 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -9972,6 +9972,7 @@ static const struct tpacpi_quirk battery_quirk_table[] __initconst = { * Individual addressing is broken on models that expose the * primary battery as BAT1. */ + TPACPI_Q_LNV('G', '8', true), /* ThinkPad X131e */ TPACPI_Q_LNV('8', 'F', true), /* Thinkpad X120e */ TPACPI_Q_LNV('J', '7', true), /* B5400 */ TPACPI_Q_LNV('J', 'I', true), /* Thinkpad 11e */ From f317f38e7fbb15a0d8329289fef8cf034938fb4f Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 26 Feb 2025 13:47:27 -0800 Subject: [PATCH 167/503] platform/x86/intel/vsec: Add Diamond Rapids support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add PCI ID for the Diamond Rapids Platforms Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20250226214728.1256747-1-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/vsec.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c index 8272f1dd0fbc0..db3c031d17572 100644 --- a/drivers/platform/x86/intel/vsec.c +++ b/drivers/platform/x86/intel/vsec.c @@ -404,6 +404,11 @@ static const struct intel_vsec_platform_info oobmsm_info = { .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_SDSI | VSEC_CAP_TPMI, }; +/* DMR OOBMSM info */ +static const struct intel_vsec_platform_info dmr_oobmsm_info = { + .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_TPMI, +}; + /* TGL info */ static const struct intel_vsec_platform_info tgl_info = { .caps = VSEC_CAP_TELEMETRY, @@ -420,6 +425,7 @@ static const struct intel_vsec_platform_info lnl_info = { #define PCI_DEVICE_ID_INTEL_VSEC_MTL_M 0x7d0d #define PCI_DEVICE_ID_INTEL_VSEC_MTL_S 0xad0d #define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM 0x09a7 +#define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM_DMR 0x09a1 #define PCI_DEVICE_ID_INTEL_VSEC_RPL 0xa77d #define PCI_DEVICE_ID_INTEL_VSEC_TGL 0x9a0d #define PCI_DEVICE_ID_INTEL_VSEC_LNL_M 0x647d @@ -430,6 +436,7 @@ static const struct pci_device_id intel_vsec_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, VSEC_MTL_M, &mtl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_MTL_S, &mtl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM, &oobmsm_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM_DMR, &dmr_oobmsm_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_RPL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_TGL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_LNL_M, &lnl_info) }, From 172a0f509723fe4741d4b8e9190cf434b18320d8 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Mon, 3 Mar 2025 13:04:13 +0300 Subject: [PATCH 168/503] ALSA: usx2y: validate nrpacks module parameter on probe The module parameter defines number of iso packets per one URB. User is allowed to set any value to the parameter of type int, which can lead to various kinds of weird and incorrect behavior like integer overflows, truncations, etc. Number of packets should be a small non-negative number. Since this parameter is read-only, its value can be validated on driver probe. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Murad Masimov Link: https://patch.msgid.link/20250303100413.835-1-m.masimov@mt-integration.ru Signed-off-by: Takashi Iwai --- sound/usb/usx2y/usbusx2y.c | 11 +++++++++++ sound/usb/usx2y/usbusx2y.h | 26 ++++++++++++++++++++++++++ sound/usb/usx2y/usbusx2yaudio.c | 27 --------------------------- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c index 5f81c68fd42b6..5756ff3528a2d 100644 --- a/sound/usb/usx2y/usbusx2y.c +++ b/sound/usb/usx2y/usbusx2y.c @@ -151,6 +151,12 @@ static int snd_usx2y_card_used[SNDRV_CARDS]; static void snd_usx2y_card_private_free(struct snd_card *card); static void usx2y_unlinkseq(struct snd_usx2y_async_seq *s); +#ifdef USX2Y_NRPACKS_VARIABLE +int nrpacks = USX2Y_NRPACKS; /* number of packets per urb */ +module_param(nrpacks, int, 0444); +MODULE_PARM_DESC(nrpacks, "Number of packets per URB."); +#endif + /* * pipe 4 is used for switching the lamps, setting samplerate, volumes .... */ @@ -432,6 +438,11 @@ static int snd_usx2y_probe(struct usb_interface *intf, struct snd_card *card; int err; +#ifdef USX2Y_NRPACKS_VARIABLE + if (nrpacks < 0 || nrpacks > USX2Y_NRPACKS_MAX) + return -EINVAL; +#endif + if (le16_to_cpu(device->descriptor.idVendor) != 0x1604 || (le16_to_cpu(device->descriptor.idProduct) != USB_ID_US122 && le16_to_cpu(device->descriptor.idProduct) != USB_ID_US224 && diff --git a/sound/usb/usx2y/usbusx2y.h b/sound/usb/usx2y/usbusx2y.h index 391fd7b4ed5ef..6a76d04bf1c7d 100644 --- a/sound/usb/usx2y/usbusx2y.h +++ b/sound/usb/usx2y/usbusx2y.h @@ -7,6 +7,32 @@ #define NRURBS 2 +/* Default value used for nr of packs per urb. + * 1 to 4 have been tested ok on uhci. + * To use 3 on ohci, you'd need a patch: + * look for "0000425-linux-2.6.9-rc4-mm1_ohci-hcd.patch.gz" on + * "https://bugtrack.alsa-project.org/alsa-bug/bug_view_page.php?bug_id=0000425" + * + * 1, 2 and 4 work out of the box on ohci, if I recall correctly. + * Bigger is safer operation, smaller gives lower latencies. + */ +#define USX2Y_NRPACKS 4 + +#define USX2Y_NRPACKS_MAX 1024 + +/* If your system works ok with this module's parameter + * nrpacks set to 1, you might as well comment + * this define out, and thereby produce smaller, faster code. + * You'd also set USX2Y_NRPACKS to 1 then. + */ +#define USX2Y_NRPACKS_VARIABLE 1 + +#ifdef USX2Y_NRPACKS_VARIABLE +extern int nrpacks; +#define nr_of_packs() nrpacks +#else +#define nr_of_packs() USX2Y_NRPACKS +#endif #define URBS_ASYNC_SEQ 10 #define URB_DATA_LEN_ASYNC_SEQ 32 diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index f540f46a0b143..acca8bead82e5 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -28,33 +28,6 @@ #include "usx2y.h" #include "usbusx2y.h" -/* Default value used for nr of packs per urb. - * 1 to 4 have been tested ok on uhci. - * To use 3 on ohci, you'd need a patch: - * look for "0000425-linux-2.6.9-rc4-mm1_ohci-hcd.patch.gz" on - * "https://bugtrack.alsa-project.org/alsa-bug/bug_view_page.php?bug_id=0000425" - * - * 1, 2 and 4 work out of the box on ohci, if I recall correctly. - * Bigger is safer operation, smaller gives lower latencies. - */ -#define USX2Y_NRPACKS 4 - -/* If your system works ok with this module's parameter - * nrpacks set to 1, you might as well comment - * this define out, and thereby produce smaller, faster code. - * You'd also set USX2Y_NRPACKS to 1 then. - */ -#define USX2Y_NRPACKS_VARIABLE 1 - -#ifdef USX2Y_NRPACKS_VARIABLE -static int nrpacks = USX2Y_NRPACKS; /* number of packets per urb */ -#define nr_of_packs() nrpacks -module_param(nrpacks, int, 0444); -MODULE_PARM_DESC(nrpacks, "Number of packets per URB."); -#else -#define nr_of_packs() USX2Y_NRPACKS -#endif - static int usx2y_urb_capt_retire(struct snd_usx2y_substream *subs) { struct urb *urb = subs->completed_urb; From 5cfe5612ca9590db69b9be29dc83041dbf001108 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 17 Feb 2025 17:02:42 +0100 Subject: [PATCH 169/503] netfilter: nft_ct: Use __refcount_inc() for per-CPU nft_ct_pcpu_template. nft_ct_pcpu_template is a per-CPU variable and relies on disabled BH for its locking. The refcounter is read and if its value is set to one then the refcounter is incremented and variable is used - otherwise it is already in use and left untouched. Without per-CPU locking in local_bh_disable() on PREEMPT_RT the read-then-increment operation is not atomic and therefore racy. This can be avoided by using unconditionally __refcount_inc() which will increment counter and return the old value as an atomic operation. In case the returned counter is not one, the variable is in use and we need to decrement counter. Otherwise we can use it. Use __refcount_inc() instead of read and a conditional increment. Fixes: edee4f1e9245 ("netfilter: nft_ct: add zone id set support") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 2e59aba681a13..d526e69a2a2b8 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -230,6 +230,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, enum ip_conntrack_info ctinfo; u16 value = nft_reg_load16(®s->data[priv->sreg]); struct nf_conn *ct; + int oldcnt; ct = nf_ct_get(skb, &ctinfo); if (ct) /* already tracked */ @@ -250,10 +251,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); - if (likely(refcount_read(&ct->ct_general.use) == 1)) { - refcount_inc(&ct->ct_general.use); + __refcount_inc(&ct->ct_general.use, &oldcnt); + if (likely(oldcnt == 1)) { nf_ct_zone_add(ct, &zone); } else { + refcount_dec(&ct->ct_general.use); /* previous skb got queued to userspace, allocate temporary * one until percpu template can be reused. */ From 4363f02a39e25e80e68039b4323c570b0848ec66 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 3 Mar 2025 14:55:52 +0800 Subject: [PATCH 170/503] ASoC: Intel: sof_sdw: Fix unlikely uninitialized variable use in create_sdw_dailinks() Initialize current_be_id to 0 to handle the unlikely case when there are no devices connected to a DAI. In this case create_sdw_dailink() would return without touching the passed pointer to current_be_id. Found by gcc -fanalyzer Fixes: 59bf457d8055 ("ASoC: intel: sof_sdw: Factor out SoundWire DAI creation") Signed-off-by: Peter Ujfalusi Cc: stable@vger.kernel.org Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://patch.msgid.link/20250303065552.78328-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index c13064c777261..90dafa810b2ec 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -954,7 +954,7 @@ static int create_sdw_dailinks(struct snd_soc_card *card, /* generate DAI links by each sdw link */ while (sof_dais->initialised) { - int current_be_id; + int current_be_id = 0; ret = create_sdw_dailink(card, sof_dais, dai_links, ¤t_be_id, codec_conf); From d776f016d24816f15033169dcd081f077b6c10f4 Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Fri, 21 Feb 2025 04:40:24 +0000 Subject: [PATCH 171/503] ASoC: codecs: wsa884x: report temps to hwmon in millidegree of Celsius MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Temperatures are reported in units of Celsius however hwmon expects values to be in millidegree of Celsius. Userspace tools observe values close to zero and report it as "Not available" or incorrect values like 0C or 1C. Add a simple conversion to fix that. Before the change: wsa884x-virtual-0 Adapter: Virtual device temp1: +0.0°C -- wsa884x-virtual-0 Adapter: Virtual device temp1: +0.0°C Also reported as N/A before first amplifier power on. After this change and initial wsa884x power on: wsa884x-virtual-0 Adapter: Virtual device temp1: +39.0°C -- wsa884x-virtual-0 Adapter: Virtual device temp1: +37.0°C Tested on sm8550 only. Cc: Krzysztof Kozlowski Cc: Srinivas Kandagatla Signed-off-by: Alexey Klimov Link: https://patch.msgid.link/20250221044024.1207921-1-alexey.klimov@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa884x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wsa884x.c b/sound/soc/codecs/wsa884x.c index 86df5152c547b..560a2c04b6955 100644 --- a/sound/soc/codecs/wsa884x.c +++ b/sound/soc/codecs/wsa884x.c @@ -1875,7 +1875,7 @@ static int wsa884x_get_temp(struct wsa884x_priv *wsa884x, long *temp) * Reading temperature is possible only when Power Amplifier is * off. Report last cached data. */ - *temp = wsa884x->temperature; + *temp = wsa884x->temperature * 1000; return 0; } @@ -1934,7 +1934,7 @@ static int wsa884x_get_temp(struct wsa884x_priv *wsa884x, long *temp) if ((val > WSA884X_LOW_TEMP_THRESHOLD) && (val < WSA884X_HIGH_TEMP_THRESHOLD)) { wsa884x->temperature = val; - *temp = val; + *temp = val * 1000; ret = 0; } else { ret = -EAGAIN; From 3d6c9dd4cb3013fe83524949b914f1497855e3de Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 22 Feb 2025 23:56:59 +0100 Subject: [PATCH 172/503] ASoC: tegra: Fix ADX S24_LE audio format Commit 4204eccc7b2a ("ASoC: tegra: Add support for S24_LE audio format") added support for the S24_LE audio format, but duplicated S16_LE in OUT_DAI() for ADX instead. Fix this by adding support for the S24_LE audio format. Compile-tested only. Cc: stable@vger.kernel.org Fixes: 4204eccc7b2a ("ASoC: tegra: Add support for S24_LE audio format") Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20250222225700.539673-2-thorsten.blum@linux.dev Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_adx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_adx.c b/sound/soc/tegra/tegra210_adx.c index 3e6e8f51f380b..0aa93b948378f 100644 --- a/sound/soc/tegra/tegra210_adx.c +++ b/sound/soc/tegra/tegra210_adx.c @@ -264,7 +264,7 @@ static const struct snd_soc_dai_ops tegra210_adx_out_dai_ops = { .rates = SNDRV_PCM_RATE_8000_192000, \ .formats = SNDRV_PCM_FMTBIT_S8 | \ SNDRV_PCM_FMTBIT_S16_LE | \ - SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ SNDRV_PCM_FMTBIT_S32_LE, \ }, \ .capture = { \ @@ -274,7 +274,7 @@ static const struct snd_soc_dai_ops tegra210_adx_out_dai_ops = { .rates = SNDRV_PCM_RATE_8000_192000, \ .formats = SNDRV_PCM_FMTBIT_S8 | \ SNDRV_PCM_FMTBIT_S16_LE | \ - SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ SNDRV_PCM_FMTBIT_S32_LE, \ }, \ .ops = &tegra210_adx_out_dai_ops, \ From 64e6a754d33d31aa844b3ee66fb93ac84ca1565e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Feb 2025 08:26:42 +0000 Subject: [PATCH 173/503] llc: do not use skb_get() before dev_queue_xmit() syzbot is able to crash hosts [1], using llc and devices not supporting IFF_TX_SKB_SHARING. In this case, e1000 driver calls eth_skb_pad(), while the skb is shared. Simply replace skb_get() by skb_clone() in net/llc/llc_s_ac.c Note that e1000 driver might have an issue with pktgen, because it does not clear IFF_TX_SKB_SHARING, this is an orthogonal change. We need to audit other skb_get() uses in net/llc. [1] kernel BUG at net/core/skbuff.c:2178 ! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 0 UID: 0 PID: 16371 Comm: syz.2.2764 Not tainted 6.14.0-rc4-syzkaller-00052-gac9c34d1e45a #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:pskb_expand_head+0x6ce/0x1240 net/core/skbuff.c:2178 Call Trace: __skb_pad+0x18a/0x610 net/core/skbuff.c:2466 __skb_put_padto include/linux/skbuff.h:3843 [inline] skb_put_padto include/linux/skbuff.h:3862 [inline] eth_skb_pad include/linux/etherdevice.h:656 [inline] e1000_xmit_frame+0x2d99/0x5800 drivers/net/ethernet/intel/e1000/e1000_main.c:3128 __netdev_start_xmit include/linux/netdevice.h:5151 [inline] netdev_start_xmit include/linux/netdevice.h:5160 [inline] xmit_one net/core/dev.c:3806 [inline] dev_hard_start_xmit+0x9a/0x7b0 net/core/dev.c:3822 sch_direct_xmit+0x1ae/0xc30 net/sched/sch_generic.c:343 __dev_xmit_skb net/core/dev.c:4045 [inline] __dev_queue_xmit+0x13d4/0x43e0 net/core/dev.c:4621 dev_queue_xmit include/linux/netdevice.h:3313 [inline] llc_sap_action_send_test_c+0x268/0x320 net/llc/llc_s_ac.c:144 llc_exec_sap_trans_actions net/llc/llc_sap.c:153 [inline] llc_sap_next_state net/llc/llc_sap.c:182 [inline] llc_sap_state_process+0x239/0x510 net/llc/llc_sap.c:209 llc_ui_sendmsg+0xd0d/0x14e0 net/llc/af_llc.c:993 sock_sendmsg_nosec net/socket.c:718 [inline] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+da65c993ae113742a25f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67c020c0.050a0220.222324.0011.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/llc/llc_s_ac.c | 49 +++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 06fb8e6944b06..7a0cae9a81114 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -24,7 +24,7 @@ #include #include #include - +#include /** * llc_sap_action_unitdata_ind - forward UI PDU to network layer @@ -40,6 +40,26 @@ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb) return 0; } +static int llc_prepare_and_xmit(struct sk_buff *skb) +{ + struct llc_sap_state_ev *ev = llc_sap_ev(skb); + struct sk_buff *nskb; + int rc; + + rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); + if (rc) + return rc; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + if (skb->sk) + skb_set_owner_w(nskb, skb->sk); + + return dev_queue_xmit(nskb); +} + /** * llc_sap_action_send_ui - sends UI PDU resp to UNITDATA REQ to MAC layer * @sap: SAP @@ -52,17 +72,12 @@ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb) int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } /** @@ -77,17 +92,12 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } /** @@ -133,17 +143,12 @@ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb) int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) From 10fce7ebe888fa8c97eee7e317a47e7603e5e78d Mon Sep 17 00:00:00 2001 From: Xinghuo Chen Date: Mon, 3 Mar 2025 07:57:33 -0500 Subject: [PATCH 174/503] hwmon: fix a NULL vs IS_ERR_OR_NULL() check in xgene_hwmon_probe() The devm_memremap() function returns error pointers on error, it doesn't return NULL. Fixes: c7cefce03e69 ("hwmon: (xgene) access mailbox as RAM") Signed-off-by: Xinghuo Chen Link: https://lore.kernel.org/r/tencent_9AD8E7683EC29CAC97496B44F3F865BA070A@qq.com Signed-off-by: Guenter Roeck --- drivers/hwmon/xgene-hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index 1e3bd129a922d..7087197383c96 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -706,7 +706,7 @@ static int xgene_hwmon_probe(struct platform_device *pdev) goto out; } - if (!ctx->pcc_comm_addr) { + if (IS_ERR_OR_NULL(ctx->pcc_comm_addr)) { dev_err(&pdev->dev, "Failed to ioremap PCC comm region\n"); rc = -ENOMEM; From 23e0832d6d7be2d3c713f9390c060b6f1c48bf36 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Tue, 18 Feb 2025 13:41:50 +0100 Subject: [PATCH 175/503] drm/sched: Fix preprocessor guard When writing the header guard for gpu_scheduler_trace.h, a typo, apparently, occurred. Fix the typo and document the scope of the guard. Fixes: 353da3c520b4 ("drm/amdgpu: add tracepoint for scheduler (v2)") Reviewed-by: Tvrtko Ursulin Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250218124149.118002-2-phasta@kernel.org --- drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index c75302ca3427c..f56e77e7f6d02 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -21,7 +21,7 @@ * */ -#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_GPU_SCHED_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _GPU_SCHED_TRACE_H_ #include @@ -106,7 +106,7 @@ TRACE_EVENT(drm_sched_job_wait_dep, __entry->seqno) ); -#endif +#endif /* _GPU_SCHED_TRACE_H_ */ /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH From b2653cd3b75f62f29b72df4070e20357acb52bc4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 26 Feb 2025 17:25:32 -0800 Subject: [PATCH 176/503] KVM: SVM: Save host DR masks on CPUs with DebugSwap When running SEV-SNP guests on a CPU that supports DebugSwap, always save the host's DR0..DR3 mask MSR values irrespective of whether or not DebugSwap is enabled, to ensure the host values aren't clobbered by the CPU. And for now, also save DR0..DR3, even though doing so isn't necessary (see below). SVM_VMGEXIT_AP_CREATE is deeply flawed in that it allows the *guest* to create a VMSA with guest-controlled SEV_FEATURES. A well behaved guest can inform the hypervisor, i.e. KVM, of its "requested" features, but on CPUs without ALLOWED_SEV_FEATURES support, nothing prevents the guest from lying about which SEV features are being enabled (or not!). If a misbehaving guest enables DebugSwap in a secondary vCPU's VMSA, the CPU will load the DR0..DR3 mask MSRs on #VMEXIT, i.e. will clobber the MSRs with '0' if KVM doesn't save its desired value. Note, DR0..DR3 themselves are "ok", as DR7 is reset on #VMEXIT, and KVM restores all DRs in common x86 code as needed via hw_breakpoint_restore(). I.e. there is no risk of host DR0..DR3 being clobbered (when it matters). However, there is a flaw in the opposite direction; because the guest can lie about enabling DebugSwap, i.e. can *disable* DebugSwap without KVM's knowledge, KVM must not rely on the CPU to restore DRs. Defer fixing that wart, as it's more of a documentation issue than a bug in the code. Note, KVM added support for DebugSwap on commit d1f85fbe836e ("KVM: SEV: Enable data breakpoints in SEV-ES"), but that is not an appropriate Fixes, as the underlying flaw exists in hardware, not in KVM. I.e. all kernels that support SEV-SNP need to be patched, not just kernels with KVM's full support for DebugSwap (ignoring that DebugSwap support landed first). Opportunistically fix an incorrect statement in the comment; on CPUs without DebugSwap, the CPU does NOT save or load debug registers, i.e. Fixes: e366f92ea99e ("KVM: SEV: Support SEV-SNP AP Creation NAE event") Cc: stable@vger.kernel.org Cc: Naveen N Rao Cc: Kim Phillips Cc: Tom Lendacky Cc: Alexey Kardashevskiy Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20250227012541.3234589-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/sev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a2a794c320503..ef057c85a67ce 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -4580,6 +4580,8 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm) void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa) { + struct kvm *kvm = svm->vcpu.kvm; + /* * All host state for SEV-ES guests is categorized into three swap types * based on how it is handled by hardware during a world switch: @@ -4603,10 +4605,15 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are /* * If DebugSwap is enabled, debug registers are loaded but NOT saved by - * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both - * saves and loads debug registers (Type-A). + * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU does + * not save or load debug registers. Sadly, on CPUs without + * ALLOWED_SEV_FEATURES, KVM can't prevent SNP guests from enabling + * DebugSwap on secondary vCPUs without KVM's knowledge via "AP Create". + * Save all registers if DebugSwap is supported to prevent host state + * from being clobbered by a misbehaving guest. */ - if (sev_vcpu_has_debug_swap(svm)) { + if (sev_vcpu_has_debug_swap(svm) || + (sev_snp_guest(kvm) && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP))) { hostsa->dr0 = native_get_debugreg(0); hostsa->dr1 = native_get_debugreg(1); hostsa->dr2 = native_get_debugreg(2); From 807cb9ce2ed9a1b6e79e70fb2cdb7860f1517dcc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 26 Feb 2025 17:25:33 -0800 Subject: [PATCH 177/503] KVM: SVM: Don't rely on DebugSwap to restore host DR0..DR3 Never rely on the CPU to restore/load host DR0..DR3 values, even if the CPU supports DebugSwap, as there are no guarantees that SNP guests will actually enable DebugSwap on APs. E.g. if KVM were to rely on the CPU to load DR0..DR3 and skipped them during hw_breakpoint_restore(), KVM would run with clobbered-to-zero DRs if an SNP guest created APs without DebugSwap enabled. Update the comment to explain the dangers, and hopefully prevent breaking KVM in the future. Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20250227012541.3234589-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/sev.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index ef057c85a67ce..080f8cecd7ca6 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -4606,18 +4606,21 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are /* * If DebugSwap is enabled, debug registers are loaded but NOT saved by * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU does - * not save or load debug registers. Sadly, on CPUs without - * ALLOWED_SEV_FEATURES, KVM can't prevent SNP guests from enabling - * DebugSwap on secondary vCPUs without KVM's knowledge via "AP Create". - * Save all registers if DebugSwap is supported to prevent host state - * from being clobbered by a misbehaving guest. + * not save or load debug registers. Sadly, KVM can't prevent SNP + * guests from lying about DebugSwap on secondary vCPUs, i.e. the + * SEV_FEATURES provided at "AP Create" isn't guaranteed to match what + * the guest has actually enabled (or not!) in the VMSA. + * + * If DebugSwap is *possible*, save the masks so that they're restored + * if the guest enables DebugSwap. But for the DRs themselves, do NOT + * rely on the CPU to restore the host values; KVM will restore them as + * needed in common code, via hw_breakpoint_restore(). Note, KVM does + * NOT support virtualizing Breakpoint Extensions, i.e. the mask MSRs + * don't need to be restored per se, KVM just needs to ensure they are + * loaded with the correct values *if* the CPU writes the MSRs. */ if (sev_vcpu_has_debug_swap(svm) || (sev_snp_guest(kvm) && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP))) { - hostsa->dr0 = native_get_debugreg(0); - hostsa->dr1 = native_get_debugreg(1); - hostsa->dr2 = native_get_debugreg(2); - hostsa->dr3 = native_get_debugreg(3); hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0); hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1); hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2); From d88ed5fb7c88f404e57fe2b2a6d19fefc35b4dc7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 28 Feb 2025 15:08:04 -0800 Subject: [PATCH 178/503] KVM: selftests: Ensure all vCPUs hit -EFAULT during initial RO stage During the initial mprotect(RO) stage of mmu_stress_test, keep vCPUs spinning until all vCPUs have hit -EFAULT, i.e. until all vCPUs have tried to write to a read-only page. If a vCPU manages to complete an entire iteration of the loop without hitting a read-only page, *and* the vCPU observes mprotect_ro_done before starting a second iteration, then the vCPU will prematurely fall through to GUEST_SYNC(3) (on x86 and arm64) and get out of sequence. Replace the "do-while (!r)" loop around the associated _vcpu_run() with a single invocation, as barring a KVM bug, the vCPU is guaranteed to hit -EFAULT, and retrying on success is super confusion, hides KVM bugs, and complicates this fix. The do-while loop was semi-unintentionally added specifically to fudge around a KVM x86 bug, and said bug is unhittable without modifying the test to force x86 down the !(x86||arm64) path. On x86, if forced emulation is enabled, vcpu_arch_put_guest() may trigger emulation of the store to memory. Due a (very, very) longstanding bug in KVM x86's emulator, emulate writes to guest memory that fail during __kvm_write_guest_page() unconditionally return KVM_EXIT_MMIO. While that is desirable in the !memslot case, it's wrong in this case as the failure happens due to __copy_to_user() hitting a read-only page, not an emulated MMIO region. But as above, x86 only uses vcpu_arch_put_guest() if the __x86_64__ guards are clobbered to force x86 down the common path, and of course the unexpected MMIO is a KVM bug, i.e. *should* cause a test failure. Fixes: b6c304aec648 ("KVM: selftests: Verify KVM correctly handles mprotect(PROT_READ)") Reported-by: Yan Zhao Closes: https://lore.kernel.org/all/20250208105318.16861-1-yan.y.zhao@intel.com Debugged-by: Yan Zhao Reviewed-by: Yan Zhao Tested-by: Yan Zhao Link: https://lore.kernel.org/r/20250228230804.3845860-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index d9c76b4c0d88a..6a437d2be9fa4 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -18,6 +18,7 @@ #include "ucall_common.h" static bool mprotect_ro_done; +static bool all_vcpus_hit_ro_fault; static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { @@ -36,9 +37,9 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) /* * Write to the region while mprotect(PROT_READ) is underway. Keep - * looping until the memory is guaranteed to be read-only, otherwise - * vCPUs may complete their writes and advance to the next stage - * prematurely. + * looping until the memory is guaranteed to be read-only and a fault + * has occurred, otherwise vCPUs may complete their writes and advance + * to the next stage prematurely. * * For architectures that support skipping the faulting instruction, * generate the store via inline assembly to ensure the exact length @@ -56,7 +57,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) #else vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); #endif - } while (!READ_ONCE(mprotect_ro_done)); + } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault)); /* * Only architectures that write the entire range can explicitly sync, @@ -81,6 +82,7 @@ struct vcpu_info { static int nr_vcpus; static atomic_t rendezvous; +static atomic_t nr_ro_faults; static void rendezvous_with_boss(void) { @@ -148,12 +150,16 @@ static void *vcpu_worker(void *data) * be stuck on the faulting instruction for other architectures. Go to * stage 3 without a rendezvous */ - do { - r = _vcpu_run(vcpu); - } while (!r); + r = _vcpu_run(vcpu); TEST_ASSERT(r == -1 && errno == EFAULT, "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno); + atomic_inc(&nr_ro_faults); + if (atomic_read(&nr_ro_faults) == nr_vcpus) { + WRITE_ONCE(all_vcpus_hit_ro_fault, true); + sync_global_to_guest(vm, all_vcpus_hit_ro_fault); + } + #if defined(__x86_64__) || defined(__aarch64__) /* * Verify *all* writes from the guest hit EFAULT due to the VMA now @@ -378,7 +384,6 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_run2, "run 2"); mprotect(mem, slot_size, PROT_READ); - usleep(10); mprotect_ro_done = true; sync_global_to_guest(vm, mprotect_ro_done); From 3b2d3db368013729fd2167a0d91fec821dba807c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 28 Feb 2025 15:38:52 -0800 Subject: [PATCH 179/503] KVM: selftests: Fix printf() format goof in SEV smoke test Print out the index of mismatching XSAVE bytes using unsigned decimal format. Some versions of clang complain about trying to print an integer as an unsigned char. x86/sev_smoke_test.c:55:51: error: format specifies type 'unsigned char' but the argument has type 'int' [-Werror,-Wformat] Fixes: 8c53183dbaa2 ("selftests: kvm: add test for transferring FPU state into VMSA") Link: https://lore.kernel.org/r/20250228233852.3855676-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/sev_smoke_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index a1a688e752666..d97816dc476a2 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -52,7 +52,8 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) bool bad = false; for (i = 0; i < 4095; i++) { if (from_host[i] != from_guest[i]) { - printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); + printf("mismatch at %u | %02hhx %02hhx\n", + i, from_host[i], from_guest[i]); bad = true; } } From 9360dfe4cbd62ff1eb8217b815964931523b75b3 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 3 Mar 2025 18:51:59 +0100 Subject: [PATCH 180/503] sched_ext: Validate prev_cpu in scx_bpf_select_cpu_dfl() If a BPF scheduler provides an invalid CPU (outside the nr_cpu_ids range) as prev_cpu to scx_bpf_select_cpu_dfl() it can cause a kernel crash. To prevent this, validate prev_cpu in scx_bpf_select_cpu_dfl() and trigger an scx error if an invalid CPU is specified. Fixes: f0e1a0643a59b ("sched_ext: Implement BPF extensible scheduler class") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 0f1da199cfc7c..7b9dfee858e79 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6422,6 +6422,9 @@ static bool check_builtin_idle_enabled(void) __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) { + if (!ops_cpu_valid(prev_cpu, NULL)) + goto prev_cpu; + if (!check_builtin_idle_enabled()) goto prev_cpu; From cc5bfc4e16fc1d1c520cd7bb28646e82b6e69217 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 30 Jan 2025 23:49:31 +0000 Subject: [PATCH 181/503] usb: dwc3: Set SUSPENDENABLE soon after phy init After phy initialization, some phy operations can only be executed while in lower P states. Ensure GUSB3PIPECTL.SUSPENDENABLE and GUSB2PHYCFG.SUSPHY are set soon after initialization to avoid blocking phy ops. Previously the SUSPENDENABLE bits are only set after the controller initialization, which may not happen right away if there's no gadget driver or xhci driver bound. Revise this to clear SUSPENDENABLE bits only when there's mode switching (change in GCTL.PRTCAPDIR). Fixes: 6d735722063a ("usb: dwc3: core: Prevent phy suspend during init") Cc: stable Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/633aef0afee7d56d2316f7cc3e1b2a6d518a8cc9.1738280911.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 69 +++++++++++++++++++++++++---------------- drivers/usb/dwc3/core.h | 2 +- drivers/usb/dwc3/drd.c | 4 +-- 3 files changed, 45 insertions(+), 30 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 2c472cb97f6c7..66a08b5271653 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -131,11 +131,24 @@ void dwc3_enable_susphy(struct dwc3 *dwc, bool enable) } } -void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) +void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode, bool ignore_susphy) { + unsigned int hw_mode; u32 reg; reg = dwc3_readl(dwc->regs, DWC3_GCTL); + + /* + * For DRD controllers, GUSB3PIPECTL.SUSPENDENABLE and + * GUSB2PHYCFG.SUSPHY should be cleared during mode switching, + * and they can be set after core initialization. + */ + hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); + if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD && !ignore_susphy) { + if (DWC3_GCTL_PRTCAP(reg) != mode) + dwc3_enable_susphy(dwc, false); + } + reg &= ~(DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG)); reg |= DWC3_GCTL_PRTCAPDIR(mode); dwc3_writel(dwc->regs, DWC3_GCTL, reg); @@ -216,7 +229,7 @@ static void __dwc3_set_mode(struct work_struct *work) spin_lock_irqsave(&dwc->lock, flags); - dwc3_set_prtcap(dwc, desired_dr_role); + dwc3_set_prtcap(dwc, desired_dr_role, false); spin_unlock_irqrestore(&dwc->lock, flags); @@ -658,16 +671,7 @@ static int dwc3_ss_phy_setup(struct dwc3 *dwc, int index) */ reg &= ~DWC3_GUSB3PIPECTL_UX_EXIT_PX; - /* - * Above DWC_usb3.0 1.94a, it is recommended to set - * DWC3_GUSB3PIPECTL_SUSPHY to '0' during coreConsultant configuration. - * So default value will be '0' when the core is reset. Application - * needs to set it to '1' after the core initialization is completed. - * - * Similarly for DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be - * cleared after power-on reset, and it can be set after core - * initialization. - */ + /* Ensure the GUSB3PIPECTL.SUSPENDENABLE is cleared prior to phy init. */ reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; if (dwc->u2ss_inp3_quirk) @@ -747,15 +751,7 @@ static int dwc3_hs_phy_setup(struct dwc3 *dwc, int index) break; } - /* - * Above DWC_usb3.0 1.94a, it is recommended to set - * DWC3_GUSB2PHYCFG_SUSPHY to '0' during coreConsultant configuration. - * So default value will be '0' when the core is reset. Application - * needs to set it to '1' after the core initialization is completed. - * - * Similarly for DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared - * after power-on reset, and it can be set after core initialization. - */ + /* Ensure the GUSB2PHYCFG.SUSPHY is cleared prior to phy init. */ reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; if (dwc->dis_enblslpm_quirk) @@ -830,6 +826,25 @@ static int dwc3_phy_init(struct dwc3 *dwc) goto err_exit_usb3_phy; } + /* + * Above DWC_usb3.0 1.94a, it is recommended to set + * DWC3_GUSB3PIPECTL_SUSPHY and DWC3_GUSB2PHYCFG_SUSPHY to '0' during + * coreConsultant configuration. So default value will be '0' when the + * core is reset. Application needs to set it to '1' after the core + * initialization is completed. + * + * Certain phy requires to be in P0 power state during initialization. + * Make sure GUSB3PIPECTL.SUSPENDENABLE and GUSB2PHYCFG.SUSPHY are clear + * prior to phy init to maintain in the P0 state. + * + * After phy initialization, some phy operations can only be executed + * while in lower P states. Ensure GUSB3PIPECTL.SUSPENDENABLE and + * GUSB2PHYCFG.SUSPHY are set soon after initialization to avoid + * blocking phy ops. + */ + if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) + dwc3_enable_susphy(dwc, true); + return 0; err_exit_usb3_phy: @@ -1588,7 +1603,7 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) switch (dwc->dr_mode) { case USB_DR_MODE_PERIPHERAL: - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE, false); if (dwc->usb2_phy) otg_set_vbus(dwc->usb2_phy->otg, false); @@ -1600,7 +1615,7 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) return dev_err_probe(dev, ret, "failed to initialize gadget\n"); break; case USB_DR_MODE_HOST: - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST, false); if (dwc->usb2_phy) otg_set_vbus(dwc->usb2_phy->otg, true); @@ -1645,7 +1660,7 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc) } /* de-assert DRVVBUS for HOST and OTG mode */ - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE, true); } static void dwc3_get_software_properties(struct dwc3 *dwc) @@ -2453,7 +2468,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) if (ret) return ret; - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE, true); dwc3_gadget_resume(dwc); break; case DWC3_GCTL_PRTCAP_HOST: @@ -2461,7 +2476,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) ret = dwc3_core_init_for_resume(dwc); if (ret) return ret; - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST, true); break; } /* Restore GUSB2PHYCFG bits that were modified in suspend */ @@ -2490,7 +2505,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) if (ret) return ret; - dwc3_set_prtcap(dwc, dwc->current_dr_role); + dwc3_set_prtcap(dwc, dwc->current_dr_role, true); dwc3_otg_init(dwc); if (dwc->current_otg_role == DWC3_OTG_ROLE_HOST) { diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index c955039bb4f62..aaa39e663f60a 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1558,7 +1558,7 @@ struct dwc3_gadget_ep_cmd_params { #define DWC3_HAS_OTG BIT(3) /* prototypes */ -void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode); +void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode, bool ignore_susphy); void dwc3_set_mode(struct dwc3 *dwc, u32 mode); u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type); diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c index d76ae676783cf..7977860932b14 100644 --- a/drivers/usb/dwc3/drd.c +++ b/drivers/usb/dwc3/drd.c @@ -173,7 +173,7 @@ void dwc3_otg_init(struct dwc3 *dwc) * block "Initialize GCTL for OTG operation". */ /* GCTL.PrtCapDir=2'b11 */ - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG, true); /* GUSB2PHYCFG0.SusPHY=0 */ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; @@ -556,7 +556,7 @@ int dwc3_drd_init(struct dwc3 *dwc) dwc3_drd_update(dwc); } else { - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG, true); /* use OTG block to get ID event */ irq = dwc3_otg_get_irq(dwc); From dfd3df31c9db752234d7d2e09bef2aeabb643ce4 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Fri, 28 Feb 2025 13:13:56 +0100 Subject: [PATCH 182/503] mm/slab/kvfree_rcu: Switch to WQ_MEM_RECLAIM wq Currently kvfree_rcu() APIs use a system workqueue which is "system_unbound_wq" to driver RCU machinery to reclaim a memory. Recently, it has been noted that the following kernel warning can be observed: workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work WARNING: CPU: 21 PID: 330 at kernel/workqueue.c:3719 check_flush_dependency+0x112/0x120 Modules linked in: intel_uncore_frequency(E) intel_uncore_frequency_common(E) skx_edac(E) ... CPU: 21 UID: 0 PID: 330 Comm: kworker/u144:6 Tainted: G E 6.13.2-0_g925d379822da #1 Hardware name: Wiwynn Twin Lakes MP/Twin Lakes Passive MP, BIOS YMM20 02/01/2023 Workqueue: nvme-wq nvme_scan_work RIP: 0010:check_flush_dependency+0x112/0x120 Code: 05 9a 40 14 02 01 48 81 c6 c0 00 00 00 48 8b 50 18 48 81 c7 c0 00 00 00 48 89 f9 48 ... RSP: 0018:ffffc90000df7bd8 EFLAGS: 00010082 RAX: 000000000000006a RBX: ffffffff81622390 RCX: 0000000000000027 RDX: 00000000fffeffff RSI: 000000000057ffa8 RDI: ffff88907f960c88 RBP: 0000000000000000 R08: ffffffff83068e50 R09: 000000000002fffd R10: 0000000000000004 R11: 0000000000000000 R12: ffff8881001a4400 R13: 0000000000000000 R14: ffff88907f420fb8 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88907f940000(0000) knlGS:0000000000000000 CR2: 00007f60c3001000 CR3: 000000107d010005 CR4: 00000000007726f0 PKRU: 55555554 Call Trace: ? __warn+0xa4/0x140 ? check_flush_dependency+0x112/0x120 ? report_bug+0xe1/0x140 ? check_flush_dependency+0x112/0x120 ? handle_bug+0x5e/0x90 ? exc_invalid_op+0x16/0x40 ? asm_exc_invalid_op+0x16/0x20 ? timer_recalc_next_expiry+0x190/0x190 ? check_flush_dependency+0x112/0x120 ? check_flush_dependency+0x112/0x120 __flush_work.llvm.1643880146586177030+0x174/0x2c0 flush_rcu_work+0x28/0x30 kvfree_rcu_barrier+0x12f/0x160 kmem_cache_destroy+0x18/0x120 bioset_exit+0x10c/0x150 disk_release.llvm.6740012984264378178+0x61/0xd0 device_release+0x4f/0x90 kobject_put+0x95/0x180 nvme_put_ns+0x23/0xc0 nvme_remove_invalid_namespaces+0xb3/0xd0 nvme_scan_work+0x342/0x490 process_scheduled_works+0x1a2/0x370 worker_thread+0x2ff/0x390 ? pwq_release_workfn+0x1e0/0x1e0 kthread+0xb1/0xe0 ? __kthread_parkme+0x70/0x70 ret_from_fork+0x30/0x40 ? __kthread_parkme+0x70/0x70 ret_from_fork_asm+0x11/0x20 ---[ end trace 0000000000000000 ]--- To address this switch to use of independent WQ_MEM_RECLAIM workqueue, so the rules are not violated from workqueue framework point of view. Apart of that, since kvfree_rcu() does reclaim memory it is worth to go with WQ_MEM_RECLAIM type of wq because it is designed for this purpose. Fixes: 6c6c47b063b5 ("mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()"), Reported-by: Keith Busch Closes: https://lore.kernel.org/all/Z7iqJtCjHKfo8Kho@kbusch-mbp/ Cc: stable@vger.kernel.org Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Joel Fernandes Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 4030907b6b7d8..4c9f0a87f733b 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1304,6 +1304,8 @@ module_param(rcu_min_cached_objs, int, 0444); static int rcu_delay_page_cache_fill_msec = 5000; module_param(rcu_delay_page_cache_fill_msec, int, 0444); +static struct workqueue_struct *rcu_reclaim_wq; + /* Maximum number of jiffies to wait before draining a batch. */ #define KFREE_DRAIN_JIFFIES (5 * HZ) #define KFREE_N_BATCHES 2 @@ -1632,10 +1634,10 @@ __schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp) if (delayed_work_pending(&krcp->monitor_work)) { delay_left = krcp->monitor_work.timer.expires - jiffies; if (delay < delay_left) - mod_delayed_work(system_unbound_wq, &krcp->monitor_work, delay); + mod_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay); return; } - queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay); + queue_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay); } static void @@ -1733,7 +1735,7 @@ kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp) // "free channels", the batch can handle. Break // the loop since it is done with this CPU thus // queuing an RCU work is _always_ success here. - queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work); + queued = queue_rcu_work(rcu_reclaim_wq, &krwp->rcu_work); WARN_ON_ONCE(!queued); break; } @@ -1883,7 +1885,7 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp) if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && !atomic_xchg(&krcp->work_in_progress, 1)) { if (atomic_read(&krcp->backoff_page_cache_fill)) { - queue_delayed_work(system_unbound_wq, + queue_delayed_work(rcu_reclaim_wq, &krcp->page_cache_work, msecs_to_jiffies(rcu_delay_page_cache_fill_msec)); } else { @@ -2120,6 +2122,10 @@ void __init kvfree_rcu_init(void) int i, j; struct shrinker *kfree_rcu_shrinker; + rcu_reclaim_wq = alloc_workqueue("kvfree_rcu_reclaim", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + WARN_ON(!rcu_reclaim_wq); + /* Clamp it to [0:100] seconds interval. */ if (rcu_delay_page_cache_fill_msec < 0 || rcu_delay_page_cache_fill_msec > 100 * MSEC_PER_SEC) { From 1a82d19ca2d6835904ee71e2d40fd331098f94a0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 27 Feb 2025 18:41:29 +0200 Subject: [PATCH 183/503] be2net: fix sleeping while atomic bugs in be_ndo_bridge_getlink Partially revert commit b71724147e73 ("be2net: replace polling with sleeping in the FW completion path") w.r.t mcc mutex it introduces and the use of usleep_range. The be2net be_ndo_bridge_getlink() callback is called with rcu_read_lock, so this code has been broken for a long time. Both the mutex_lock and the usleep_range can cause the issue Ian Kumlien reported[1]. The call path is: be_ndo_bridge_getlink -> be_cmd_get_hsw_config -> be_mcc_notify_wait -> be_mcc_wait_compl -> usleep_range() [1] https://lore.kernel.org/netdev/CAA85sZveppNgEVa_FD+qhOMtG_AavK9_mFiU+jWrMtXmwqefGA@mail.gmail.com/ Tested-by: Ian Kumlien Fixes: b71724147e73 ("be2net: replace polling with sleeping in the FW completion path") Signed-off-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250227164129.1201164-1-razor@blackwall.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/emulex/benet/be.h | 2 +- drivers/net/ethernet/emulex/benet/be_cmds.c | 197 ++++++++++---------- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 3 files changed, 100 insertions(+), 101 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index e48b861e4ce15..270ff9aab3352 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -562,7 +562,7 @@ struct be_adapter { struct be_dma_mem mbox_mem_alloced; struct be_mcc_obj mcc_obj; - struct mutex mcc_lock; /* For serializing mcc cmds to BE card */ + spinlock_t mcc_lock; /* For serializing mcc cmds to BE card */ spinlock_t mcc_cq_lock; u16 cfg_num_rx_irqs; /* configured via set-channels */ diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 61adcebeef010..51b8377edd1d0 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -575,7 +575,7 @@ int be_process_mcc(struct be_adapter *adapter) /* Wait till no more pending mcc requests are present */ static int be_mcc_wait_compl(struct be_adapter *adapter) { -#define mcc_timeout 12000 /* 12s timeout */ +#define mcc_timeout 120000 /* 12s timeout */ int i, status = 0; struct be_mcc_obj *mcc_obj = &adapter->mcc_obj; @@ -589,7 +589,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter) if (atomic_read(&mcc_obj->q.used) == 0) break; - usleep_range(500, 1000); + udelay(100); } if (i == mcc_timeout) { dev_err(&adapter->pdev->dev, "FW not responding\n"); @@ -866,7 +866,7 @@ static bool use_mcc(struct be_adapter *adapter) static int be_cmd_lock(struct be_adapter *adapter) { if (use_mcc(adapter)) { - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); return 0; } else { return mutex_lock_interruptible(&adapter->mbox_lock); @@ -877,7 +877,7 @@ static int be_cmd_lock(struct be_adapter *adapter) static void be_cmd_unlock(struct be_adapter *adapter) { if (use_mcc(adapter)) - return mutex_unlock(&adapter->mcc_lock); + return spin_unlock_bh(&adapter->mcc_lock); else return mutex_unlock(&adapter->mbox_lock); } @@ -1047,7 +1047,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, struct be_cmd_req_mac_query *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1076,7 +1076,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1088,7 +1088,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr, struct be_cmd_req_pmac_add *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1113,7 +1113,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST) status = -EPERM; @@ -1131,7 +1131,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom) if (pmac_id == -1) return 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1151,7 +1151,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1414,7 +1414,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter, struct be_dma_mem *q_mem = &rxq->dma_mem; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1444,7 +1444,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1508,7 +1508,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) struct be_cmd_req_q_destroy *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1525,7 +1525,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) q->created = false; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1593,7 +1593,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) struct be_cmd_req_hdr *hdr; int status = 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1621,7 +1621,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) adapter->stats_cmd_sent = true; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1637,7 +1637,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, CMD_SUBSYSTEM_ETH)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1660,7 +1660,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, adapter->stats_cmd_sent = true; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1697,7 +1697,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, struct be_cmd_req_link_status *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); if (link_status) *link_status = LINK_DOWN; @@ -1736,7 +1736,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1747,7 +1747,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) struct be_cmd_req_get_cntl_addnl_attribs *req; int status = 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1762,7 +1762,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) status = be_mcc_notify(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1811,7 +1811,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) if (!get_fat_cmd.va) return -ENOMEM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); while (total_size) { buf_size = min(total_size, (u32)60 * 1024); @@ -1849,9 +1849,9 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) log_offset += buf_size; } err: + spin_unlock_bh(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size, get_fat_cmd.va, get_fat_cmd.dma); - mutex_unlock(&adapter->mcc_lock); return status; } @@ -1862,7 +1862,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter) struct be_cmd_req_get_fw_version *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1885,7 +1885,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter) sizeof(adapter->fw_on_flash)); } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1899,7 +1899,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, struct be_cmd_req_modify_eq_delay *req; int status = 0, i; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1922,7 +1922,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, status = be_mcc_notify(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1949,7 +1949,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, struct be_cmd_req_vlan_config *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1971,7 +1971,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1982,7 +1982,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) struct be_cmd_req_rx_filter *req = mem->va; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2015,7 +2015,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2046,7 +2046,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2066,7 +2066,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (base_status(status) == MCC_STATUS_FEATURE_NOT_SUPPORTED) return -EOPNOTSUPP; @@ -2085,7 +2085,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2108,7 +2108,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2189,7 +2189,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) return 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2214,7 +2214,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2226,7 +2226,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, struct be_cmd_req_enable_disable_beacon *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2247,7 +2247,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2258,7 +2258,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) struct be_cmd_req_get_beacon_state *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2282,7 +2282,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2306,7 +2306,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, return -ENOMEM; } - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2328,7 +2328,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, memcpy(data, resp->page_data + off, len); } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); return status; } @@ -2345,7 +2345,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, void *ctxt = NULL; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); adapter->flash_status = 0; wrb = wrb_from_mccq(adapter); @@ -2387,7 +2387,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, if (status) goto err_unlock; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(60000))) @@ -2406,7 +2406,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, return status; err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2460,7 +2460,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter, struct be_mcc_wrb *wrb; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2478,7 +2478,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2491,7 +2491,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, struct lancer_cmd_resp_read_object *resp; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2525,7 +2525,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, } err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2537,7 +2537,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_cmd_write_flashrom *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); adapter->flash_status = 0; wrb = wrb_from_mccq(adapter); @@ -2562,7 +2562,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, if (status) goto err_unlock; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(40000))) @@ -2573,7 +2573,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, return status; err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2584,7 +2584,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, struct be_mcc_wrb *wrb; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2611,7 +2611,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, memcpy(flashed_crc, req->crc, 4); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3217,7 +3217,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac, struct be_cmd_req_acpi_wol_magic_config *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3234,7 +3234,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3249,7 +3249,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3272,7 +3272,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, if (status) goto err_unlock; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(SET_LB_MODE_TIMEOUT))) @@ -3281,7 +3281,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, return status; err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3298,7 +3298,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3324,7 +3324,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, if (status) goto err; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); wait_for_completion(&adapter->et_cmd_compl); resp = embedded_payload(wrb); @@ -3332,7 +3332,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, return status; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3348,7 +3348,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3382,7 +3382,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3393,7 +3393,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, struct be_cmd_req_seeprom_read *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3409,7 +3409,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3424,7 +3424,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3469,7 +3469,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) } dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3479,7 +3479,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain) struct be_cmd_req_set_qos *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3499,7 +3499,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3611,7 +3611,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, struct be_cmd_req_get_fn_privileges *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3643,7 +3643,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3655,7 +3655,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges, struct be_cmd_req_set_fn_privileges *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3675,7 +3675,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3707,7 +3707,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, return -ENOMEM; } - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3771,7 +3771,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, } out: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size, get_mac_list_cmd.va, get_mac_list_cmd.dma); return status; @@ -3831,7 +3831,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, if (!cmd.va) return -ENOMEM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3853,7 +3853,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, err: dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3889,7 +3889,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3930,7 +3930,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3944,7 +3944,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, int status; u16 vid; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3991,7 +3991,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4190,7 +4190,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, struct be_cmd_req_set_ext_fat_caps *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4206,7 +4206,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4684,7 +4684,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op) if (iface == 0xFFFFFFFF) return -1; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4701,7 +4701,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4735,7 +4735,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, struct be_cmd_resp_get_iface_list *resp; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4756,7 +4756,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4850,7 +4850,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) if (BEx_chip(adapter)) return 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4868,7 +4868,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) req->enable = 1; status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4941,7 +4941,7 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter, u32 link_config = 0; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4969,7 +4969,7 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -5000,8 +5000,7 @@ int be_cmd_set_features(struct be_adapter *adapter) struct be_mcc_wrb *wrb; int status; - if (mutex_lock_interruptible(&adapter->mcc_lock)) - return -1; + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -5039,7 +5038,7 @@ int be_cmd_set_features(struct be_adapter *adapter) dev_info(&adapter->pdev->dev, "Adapter does not support HW error recovery\n"); - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -5053,7 +5052,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, struct be_cmd_resp_hdr *resp; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -5076,7 +5075,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, memcpy(wrb_payload, resp, sizeof(*resp) + resp->response_length); be_dws_le_to_cpu(wrb_payload, sizeof(*resp) + resp->response_length); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } EXPORT_SYMBOL(be_roce_mcc_cmd); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 875fe379eea21..3d2e215921191 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5667,8 +5667,8 @@ static int be_drv_init(struct be_adapter *adapter) } mutex_init(&adapter->mbox_lock); - mutex_init(&adapter->mcc_lock); mutex_init(&adapter->rx_filter_lock); + spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); init_completion(&adapter->et_cmd_compl); From c34424eb3be4c01db831428c0d7d483701ae820f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Feb 2025 16:45:34 -0800 Subject: [PATCH 184/503] net: dsa: rtl8366rb: don't prompt users for LED control Make NET_DSA_REALTEK_RTL8366RB_LEDS a hidden symbol. It seems very unlikely user would want to intentionally disable it. Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250228004534.3428681-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/dsa/realtek/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig index 10687722d14c0..d6eb6713e5f6b 100644 --- a/drivers/net/dsa/realtek/Kconfig +++ b/drivers/net/dsa/realtek/Kconfig @@ -44,7 +44,7 @@ config NET_DSA_REALTEK_RTL8366RB Select to enable support for Realtek RTL8366RB. config NET_DSA_REALTEK_RTL8366RB_LEDS - bool "Support RTL8366RB LED control" + bool depends on (LEDS_CLASS=y || LEDS_CLASS=NET_DSA_REALTEK_RTL8366RB) depends on NET_DSA_REALTEK_RTL8366RB default NET_DSA_REALTEK_RTL8366RB From 1f860eb4cdda634589d75e78ff586d5dff20b8af Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Mar 2025 11:05:36 +0100 Subject: [PATCH 185/503] wifi: nl80211: disable multi-link reconfiguration Both the APIs in cfg80211 and the implementation in mac80211 aren't really ready yet, we have a large number of fixes. In addition, it's not possible right now to discover support for this feature from userspace. Disable it for now, there's no rush. Link: https://patch.msgid.link/20250303110538.fbeef42a5687.Iab122c22137e5675ebd99f5c031e30c0e5c7af2e@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fdb2aac951d18..e87267fbb442e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -16534,7 +16534,7 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) goto out; } - err = cfg80211_assoc_ml_reconf(rdev, dev, links, rem_links); + err = -EOPNOTSUPP; out: for (link_id = 0; link_id < ARRAY_SIZE(links); link_id++) From b7365eab39831487a84e63a9638209b68dc54008 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Fri, 28 Feb 2025 18:52:58 +0800 Subject: [PATCH 186/503] net: hns3: make sure ptp clock is unregister and freed if hclge_ptp_get_cycle returns an error During the initialization of ptp, hclge_ptp_get_cycle might return an error and returned directly without unregister clock and free it. To avoid that, call hclge_ptp_destroy_clock to unregist and free clock if hclge_ptp_get_cycle failed. Fixes: 8373cd38a888 ("net: hns3: change the method of obtaining default ptp cycle") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250228105258.1243461-1-shaojijie@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index bab16c2191b2f..181af419b878d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -483,7 +483,7 @@ int hclge_ptp_init(struct hclge_dev *hdev) ret = hclge_ptp_get_cycle(hdev); if (ret) - return ret; + goto out; } ret = hclge_ptp_int_en(hdev, true); From c7c1f3b05c67173f462d73d301d572b3f9e57e3b Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Tue, 4 Mar 2025 13:31:47 +0200 Subject: [PATCH 187/503] usb: xhci: Fix host controllers "dying" after suspend and resume A recent cleanup went a bit too far and dropped clearing the cycle bit of link TRBs, so it stays different from the rest of the ring half of the time. Then a race occurs: if the xHC reaches such link TRB before more commands are queued, the link's cycle bit unintentionally matches the xHC's cycle so it follows the link and waits for further commands. If more commands are queued before the xHC gets there, inc_enq() flips the bit so the xHC later sees a mismatch and stops executing commands. This function is called before suspend and 50% of times after resuming the xHC is doomed to get stuck sooner or later. Then some Stop Endpoint command fails to complete in 5 seconds and this shows up xhci_hcd 0000:00:10.0: xHCI host not responding to stop endpoint command xhci_hcd 0000:00:10.0: xHCI host controller not responding, assume dead xhci_hcd 0000:00:10.0: HC died; cleaning up followed by loss of all USB decives on the affected bus. That's if you are lucky, because if Set Deq gets stuck instead, the failure is silent. Likely responsible for kernel bug 219824. I found this while searching for possible causes of that regression and reproduced it locally before hearing back from the reporter. To repro, simply wait for link cycle to become set (debugfs), then suspend, resume and wait. To accelerate the failure I used a script which repeatedly starts and stops a UVC camera. Some HCs get fully reinitialized on resume and they are not affected. Link: https://bugzilla.kernel.org/show_bug.cgi?id=219824 Fixes: 36b972d4b7ce ("usb: xhci: improve xhci_clear_command_ring()") Cc: stable@vger.kernel.org Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250304113147.3322584-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 45653114ccd7f..1a90ebc8a30ea 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -780,8 +780,12 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci) struct xhci_segment *seg; ring = xhci->cmd_ring; - xhci_for_each_ring_seg(ring->first_seg, seg) + xhci_for_each_ring_seg(ring->first_seg, seg) { + /* erase all TRBs before the link */ memset(seg->trbs, 0, sizeof(union xhci_trb) * (TRBS_PER_SEGMENT - 1)); + /* clear link cycle bit */ + seg->trbs[TRBS_PER_SEGMENT - 1].link.control &= cpu_to_le32(~TRB_CYCLE); + } xhci_initialize_ring_info(ring); /* From 164b7dd4546b57c08b373e9e3cf315ff98cb032d Mon Sep 17 00:00:00 2001 From: Maciej Strozek Date: Tue, 4 Mar 2025 14:05:04 +0000 Subject: [PATCH 188/503] ASoC: cs42l43: Add jack delay debounce after suspend Hardware reports jack absent after reset/suspension regardless of jack state, so introduce an additional delay only in suspension case to allow proper detection to take place after a short delay. Signed-off-by: Maciej Strozek Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250304140504.139245-1-mstrozek@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 13 ++++++++++--- sound/soc/codecs/cs42l43.c | 15 ++++++++++++++- sound/soc/codecs/cs42l43.h | 3 +++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index d9ab003e166bf..ac19a572fe70c 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -167,7 +167,7 @@ int cs42l43_set_jack(struct snd_soc_component *component, autocontrol |= 0x3 << CS42L43_JACKDET_MODE_SHIFT; ret = cs42l43_find_index(priv, "cirrus,tip-fall-db-ms", 500, - NULL, cs42l43_accdet_db_ms, + &priv->tip_fall_db_ms, cs42l43_accdet_db_ms, ARRAY_SIZE(cs42l43_accdet_db_ms)); if (ret < 0) goto error; @@ -175,7 +175,7 @@ int cs42l43_set_jack(struct snd_soc_component *component, tip_deb |= ret << CS42L43_TIPSENSE_FALLING_DB_TIME_SHIFT; ret = cs42l43_find_index(priv, "cirrus,tip-rise-db-ms", 500, - NULL, cs42l43_accdet_db_ms, + &priv->tip_rise_db_ms, cs42l43_accdet_db_ms, ARRAY_SIZE(cs42l43_accdet_db_ms)); if (ret < 0) goto error; @@ -764,6 +764,8 @@ void cs42l43_tip_sense_work(struct work_struct *work) error: mutex_unlock(&priv->jack_lock); + priv->suspend_jack_debounce = false; + pm_runtime_mark_last_busy(priv->dev); pm_runtime_put_autosuspend(priv->dev); } @@ -771,14 +773,19 @@ void cs42l43_tip_sense_work(struct work_struct *work) irqreturn_t cs42l43_tip_sense(int irq, void *data) { struct cs42l43_codec *priv = data; + unsigned int db_delay = priv->tip_debounce_ms; cancel_delayed_work(&priv->bias_sense_timeout); cancel_delayed_work(&priv->tip_sense_work); cancel_delayed_work(&priv->button_press_work); cancel_work(&priv->button_release_work); + // Ensure delay after suspend is long enough to avoid false detection + if (priv->suspend_jack_debounce) + db_delay += priv->tip_fall_db_ms + priv->tip_rise_db_ms; + queue_delayed_work(system_long_wq, &priv->tip_sense_work, - msecs_to_jiffies(priv->tip_debounce_ms)); + msecs_to_jiffies(db_delay)); return IRQ_HANDLED; } diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index d2a2daefc2ec6..4257dbefe9dd1 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2402,9 +2402,22 @@ static int cs42l43_codec_runtime_resume(struct device *dev) return 0; } +static int cs42l43_codec_runtime_force_suspend(struct device *dev) +{ + struct cs42l43_codec *priv = dev_get_drvdata(dev); + + dev_dbg(priv->dev, "Runtime suspend\n"); + + priv->suspend_jack_debounce = true; + + pm_runtime_force_suspend(dev); + + return 0; +} + static const struct dev_pm_ops cs42l43_codec_pm_ops = { RUNTIME_PM_OPS(NULL, cs42l43_codec_runtime_resume, NULL) - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + SET_SYSTEM_SLEEP_PM_OPS(cs42l43_codec_runtime_force_suspend, pm_runtime_force_resume) }; static const struct platform_device_id cs42l43_codec_id_table[] = { diff --git a/sound/soc/codecs/cs42l43.h b/sound/soc/codecs/cs42l43.h index 9c144e129535f..1cd9d8a71c439 100644 --- a/sound/soc/codecs/cs42l43.h +++ b/sound/soc/codecs/cs42l43.h @@ -78,6 +78,8 @@ struct cs42l43_codec { bool use_ring_sense; unsigned int tip_debounce_ms; + unsigned int tip_fall_db_ms; + unsigned int tip_rise_db_ms; unsigned int bias_low; unsigned int bias_sense_ua; unsigned int bias_ramp_ms; @@ -95,6 +97,7 @@ struct cs42l43_codec { bool button_detect_running; bool jack_present; int jack_override; + bool suspend_jack_debounce; struct work_struct hp_ilimit_work; struct delayed_work hp_ilimit_clear_work; From d0bbe332669c5db32c8c92bc967f8e7f8d460ddf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 4 Mar 2025 15:25:55 +0100 Subject: [PATCH 189/503] ALSA: hda: realtek: fix incorrect IS_REACHABLE() usage The alternative path leads to a build error after a recent change: sound/pci/hda/patch_realtek.c: In function 'alc233_fixup_lenovo_low_en_micmute_led': include/linux/stddef.h:9:14: error: called object is not a function or function pointer 9 | #define NULL ((void *)0) | ^ sound/pci/hda/patch_realtek.c:5041:49: note: in expansion of macro 'NULL' 5041 | #define alc233_fixup_lenovo_line2_mic_hotkey NULL | ^~~~ sound/pci/hda/patch_realtek.c:5063:9: note: in expansion of macro 'alc233_fixup_lenovo_line2_mic_hotkey' 5063 | alc233_fixup_lenovo_line2_mic_hotkey(codec, fix, action); Using IS_REACHABLE() is somewhat questionable here anyway since it leads to the input code not working when the HDA driver is builtin but input is in a loadable module. Replace this with a hard compile-time dependency on CONFIG_INPUT. In practice this won't chance much other than solve the compiler error because it is rare to require sound output but no input support. Fixes: f603b159231b ("ALSA: hda/realtek - add supported Mic Mute LED for Lenovo platform") Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20250304142620.582191-1-arnd@kernel.org Signed-off-by: Takashi Iwai --- sound/pci/hda/Kconfig | 1 + sound/pci/hda/patch_realtek.c | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index e393578cbe684..84ebf19f28836 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -222,6 +222,7 @@ comment "Set to Y if you want auto-loading the side codec driver" config SND_HDA_CODEC_REALTEK tristate "Build Realtek HD-audio codec support" + depends on INPUT select SND_HDA_GENERIC select SND_HDA_GENERIC_LEDS select SND_HDA_SCODEC_COMPONENT diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ebf54ef5877a4..697a38e41e166 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4927,7 +4927,6 @@ static void alc298_fixup_samsung_amp_v2_4_amps(struct hda_codec *codec, alc298_samsung_v2_init_amps(codec, 4); } -#if IS_REACHABLE(CONFIG_INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) { @@ -5036,10 +5035,6 @@ static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec, spec->kb_dev = NULL; } } -#else /* INPUT */ -#define alc280_fixup_hp_gpio2_mic_hotkey NULL -#define alc233_fixup_lenovo_line2_mic_hotkey NULL -#endif /* INPUT */ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) From 5623bc23a1cb9f9a9470fa73b3a20321dc4c4870 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 25 Feb 2025 10:53:10 +0100 Subject: [PATCH 190/503] s390/traps: Fix test_monitor_call() inline assembly The test_monitor_call() inline assembly uses the xgr instruction, which also modifies the condition code, to clear a register. However the clobber list of the inline assembly does not specify that the condition code is modified, which may lead to incorrect code generation. Use the lhi instruction instead to clear the register without that the condition code is modified. Furthermore this limits clearing to the lower 32 bits of val, since its type is int. Fixes: 17248ea03674 ("s390: fix __EMIT_BUG() macro") Cc: stable@vger.kernel.org Reviewed-by: Juergen Christ Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/traps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 24fee11b030d8..b746213d3110c 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -285,10 +285,10 @@ static void __init test_monitor_call(void) return; asm volatile( " mc 0,0\n" - "0: xgr %0,%0\n" + "0: lhi %[val],0\n" "1:\n" - EX_TABLE(0b,1b) - : "+d" (val)); + EX_TABLE(0b, 1b) + : [val] "+d" (val)); if (!val) panic("Monitor call doesn't work!\n"); } From a22ee38d2efe18edc53791fd1036396c23b43ad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 14:04:18 +0100 Subject: [PATCH 191/503] selftests/vDSO: Fix GNU hash table entry size for s390x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 14be4e6f3522 ("selftests: vDSO: fix ELF hash table entry size for s390x") changed the type of the ELF hash table entries to 64bit on s390x. However the *GNU* hash tables entries are always 32bit. The "bucket" pointer is shared between both hash algorithms. On s390, this caused the GNU hash algorithm to access its 32-bit entries as if they were 64-bit, triggering compiler warnings (assignment between "Elf64_Xword *" and "Elf64_Word *") and runtime crashes. Introduce a new dedicated "gnu_bucket" pointer which is used by the GNU hash. Fixes: e0746bde6f82 ("selftests/vDSO: support DT_GNU_HASH") Reviewed-by: Jens Remus Signed-off-by: Thomas Weißschuh Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20250217-selftests-vdso-s390-gnu-hash-v2-1-f6c2532ffe2a@linutronix.de Signed-off-by: Vasily Gorbik --- tools/testing/selftests/vDSO/parse_vdso.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 2fe5e983cb22f..f89d052c730eb 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -53,7 +53,7 @@ static struct vdso_info /* Symbol table */ ELF(Sym) *symtab; const char *symstrings; - ELF(Word) *gnu_hash; + ELF(Word) *gnu_hash, *gnu_bucket; ELF_HASH_ENTRY *bucket, *chain; ELF_HASH_ENTRY nbucket, nchain; @@ -185,8 +185,8 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) /* The bucket array is located after the header (4 uint32) and the bloom * filter (size_t array of gnu_hash[2] elements). */ - vdso_info.bucket = vdso_info.gnu_hash + 4 + - sizeof(size_t) / 4 * vdso_info.gnu_hash[2]; + vdso_info.gnu_bucket = vdso_info.gnu_hash + 4 + + sizeof(size_t) / 4 * vdso_info.gnu_hash[2]; } else { vdso_info.nbucket = hash[0]; vdso_info.nchain = hash[1]; @@ -268,11 +268,11 @@ void *vdso_sym(const char *version, const char *name) if (vdso_info.gnu_hash) { uint32_t h1 = gnu_hash(name), h2, *hashval; - i = vdso_info.bucket[h1 % vdso_info.nbucket]; + i = vdso_info.gnu_bucket[h1 % vdso_info.nbucket]; if (i == 0) return 0; h1 |= 1; - hashval = vdso_info.bucket + vdso_info.nbucket + + hashval = vdso_info.gnu_bucket + vdso_info.nbucket + (i - vdso_info.gnu_hash[1]); for (;; i++) { ELF(Sym) *sym = &vdso_info.symtab[i]; From b4a1dec11793936ffe1a9fb811724532ff3b1174 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Mon, 24 Feb 2025 12:27:04 +0100 Subject: [PATCH 192/503] s390/ftrace: Fix return address recovery of traced function When fgraph is enabled the traced function return address is replaced with trampoline return_to_handler(). The original return address of the traced function is saved in per task return stack along with a stack pointer for reliable stack unwinding via function_graph_enter_regs(). During stack unwinding e.g. for livepatching, ftrace_graph_ret_addr() identifies the original return address of the traced function with the saved stack pointer. With a recent change, the stack pointers passed to ftrace_graph_ret_addr() and function_graph_enter_regs() do not match anymore, and therefore the original return address is not found. Pass the correct stack pointer to function_graph_enter_regs() to fix this. Fixes: 7495e179b478 ("s390/tracing: Enable HAVE_FTRACE_GRAPH_FUNC") Reviewed-by: Heiko Carstens Signed-off-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 63ba6306632ef..e540b022ceb23 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -266,12 +266,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15]; if (unlikely(ftrace_graph_is_dead())) return; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; - if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs)) + if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs)) *parent = (unsigned long)&return_to_handler; } From f9dc8fb3afc968042bdaf4b6e445a9272071c9f3 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 4 Mar 2025 03:23:14 -0500 Subject: [PATCH 193/503] KVM: x86: Explicitly zero EAX and EBX when PERFMON_V2 isn't supported by KVM Fix a goof where KVM sets CPUID.0x80000022.EAX to CPUID.0x80000022.EBX instead of zeroing both when PERFMON_V2 isn't supported by KVM. In practice, barring a buggy CPU (or vCPU model when running nested) only the !enable_pmu case is affected, as KVM always supports PERFMON_V2 if it's available in hardware, i.e. CPUID.0x80000022.EBX will be '0' if PERFMON_V2 is unsupported. For the !enable_pmu case, the bug is relatively benign as KVM will refuse to enable PMU capabilities, but a VMM that reflects KVM's supported CPUID into the guest could inadvertently induce #GPs in the guest due to advertising support for MSRs that KVM refuses to emulate. Fixes: 94cdeebd8211 ("KVM: x86/cpuid: Add AMD CPUID ExtPerfMonAndDbg leaf 0x80000022") Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250304082314.472202-3-xiaoyao.li@intel.com [sean: massage shortlog and changelog, tag for stable] Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8eb3a88707f21..121edf1f2a79a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1763,7 +1763,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ecx = entry->edx = 0; if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) { - entry->eax = entry->ebx; + entry->eax = entry->ebx = 0; break; } From 3d252160b818045f3a152b13756f6f37ca34639d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 4 Mar 2025 13:51:38 +0000 Subject: [PATCH 194/503] fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex pipe_readable(), pipe_writable(), and pipe_poll() can read "pipe->head" and "pipe->tail" outside of "pipe->mutex" critical section. When the head and the tail are read individually in that order, there is a window for interruption between the two reads in which both the head and the tail can be updated by concurrent readers and writers. One of the problematic scenarios observed with hackbench running multiple groups on a large server on a particular pipe inode is as follows: pipe->head = 36 pipe->tail = 36 hackbench-118762 [057] ..... 1029.550548: pipe_write: *wakes up: pipe not full* hackbench-118762 [057] ..... 1029.550548: pipe_write: head: 36 -> 37 [tail: 36] hackbench-118762 [057] ..... 1029.550548: pipe_write: *wake up next reader 118740* hackbench-118762 [057] ..... 1029.550548: pipe_write: *wake up next writer 118768* hackbench-118768 [206] ..... 1029.55055X: pipe_write: *writer wakes up* hackbench-118768 [206] ..... 1029.55055X: pipe_write: head = READ_ONCE(pipe->head) [37] ... CPU 206 interrupted (exact wakeup was not traced but 118768 did read head at 37 in traces) hackbench-118740 [057] ..... 1029.550558: pipe_read: *reader wakes up: pipe is not empty* hackbench-118740 [057] ..... 1029.550558: pipe_read: tail: 36 -> 37 [head = 37] hackbench-118740 [057] ..... 1029.550559: pipe_read: *pipe is empty; wakeup writer 118768* hackbench-118740 [057] ..... 1029.550559: pipe_read: *sleeps* hackbench-118766 [185] ..... 1029.550592: pipe_write: *New writer comes in* hackbench-118766 [185] ..... 1029.550592: pipe_write: head: 37 -> 38 [tail: 37] hackbench-118766 [185] ..... 1029.550592: pipe_write: *wakes up reader 118766* hackbench-118740 [185] ..... 1029.550598: pipe_read: *reader wakes up; pipe not empty* hackbench-118740 [185] ..... 1029.550599: pipe_read: tail: 37 -> 38 [head: 38] hackbench-118740 [185] ..... 1029.550599: pipe_read: *pipe is empty* hackbench-118740 [185] ..... 1029.550599: pipe_read: *reader sleeps; wakeup writer 118768* ... CPU 206 switches back to writer hackbench-118768 [206] ..... 1029.550601: pipe_write: tail = READ_ONCE(pipe->tail) [38] hackbench-118768 [206] ..... 1029.550601: pipe_write: pipe_full()? (u32)(37 - 38) >= 16? Yes hackbench-118768 [206] ..... 1029.550601: pipe_write: *writer goes back to sleep* [ Tasks 118740 and 118768 can then indefinitely wait on each other. ] The unsigned arithmetic in pipe_occupancy() wraps around when "pipe->tail > pipe->head" leading to pipe_full() returning true despite the pipe being empty. The case of genuine wraparound of "pipe->head" is handled since pipe buffer has data allowing readers to make progress until the pipe->tail wraps too after which the reader will wakeup a sleeping writer, however, mistaking the pipe to be full when it is in fact empty can lead to readers and writers waiting on each other indefinitely. This issue became more problematic and surfaced as a hang in hackbench after the optimization in commit aaec5a95d596 ("pipe_read: don't wake up the writer if the pipe is still full") significantly reduced the number of spurious wakeups of writers that had previously helped mask the issue. To avoid missing any updates between the reads of "pipe->head" and "pipe->write", unionize the two with a single unsigned long "pipe->head_tail" member that can be loaded atomically. Using "pipe->head_tail" to read the head and the tail ensures the lockless checks do not miss any updates to the head or the tail and since those two are only updated under "pipe->mutex", it ensures that the head is always ahead of, or equal to the tail resulting in correct calculations. [ prateek: commit log, testing on x86 platforms. ] Reported-and-debugged-by: Swapnil Sapkal Closes: https://lore.kernel.org/lkml/e813814e-7094-4673-bc69-731af065a0eb@amd.com/ Reported-by: Alexey Gladkov Closes: https://lore.kernel.org/all/Z8Wn0nTvevLRG_4m@example.org/ Fixes: 8cefc107ca54 ("pipe: Use head and tail pointers for the ring, not cursor and length") Tested-by: Swapnil Sapkal Reviewed-by: Oleg Nesterov Tested-by: Alexey Gladkov Signed-off-by: K Prateek Nayak Signed-off-by: Linus Torvalds --- fs/pipe.c | 19 ++++++++----------- include/linux/pipe_fs_i.h | 39 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index ce1af7592780d..e8e6698f36981 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -210,11 +210,10 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ static inline bool pipe_readable(const struct pipe_inode_info *pipe) { - unsigned int head = READ_ONCE(pipe->head); - unsigned int tail = READ_ONCE(pipe->tail); + union pipe_index idx = { .head_tail = READ_ONCE(pipe->head_tail) }; unsigned int writers = READ_ONCE(pipe->writers); - return !pipe_empty(head, tail) || !writers; + return !pipe_empty(idx.head, idx.tail) || !writers; } static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe, @@ -417,11 +416,10 @@ static inline int is_packetized(struct file *file) /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ static inline bool pipe_writable(const struct pipe_inode_info *pipe) { - unsigned int head = READ_ONCE(pipe->head); - unsigned int tail = READ_ONCE(pipe->tail); + union pipe_index idx = { .head_tail = READ_ONCE(pipe->head_tail) }; unsigned int max_usage = READ_ONCE(pipe->max_usage); - return !pipe_full(head, tail, max_usage) || + return !pipe_full(idx.head, idx.tail, max_usage) || !READ_ONCE(pipe->readers); } @@ -659,7 +657,7 @@ pipe_poll(struct file *filp, poll_table *wait) { __poll_t mask; struct pipe_inode_info *pipe = filp->private_data; - unsigned int head, tail; + union pipe_index idx; /* Epoll has some historical nasty semantics, this enables them */ WRITE_ONCE(pipe->poll_usage, true); @@ -680,19 +678,18 @@ pipe_poll(struct file *filp, poll_table *wait) * if something changes and you got it wrong, the poll * table entry will wake you up and fix it. */ - head = READ_ONCE(pipe->head); - tail = READ_ONCE(pipe->tail); + idx.head_tail = READ_ONCE(pipe->head_tail); mask = 0; if (filp->f_mode & FMODE_READ) { - if (!pipe_empty(head, tail)) + if (!pipe_empty(idx.head, idx.tail)) mask |= EPOLLIN | EPOLLRDNORM; if (!pipe->writers && filp->f_pipe != pipe->w_counter) mask |= EPOLLHUP; } if (filp->f_mode & FMODE_WRITE) { - if (!pipe_full(head, tail, pipe->max_usage)) + if (!pipe_full(idx.head, idx.tail, pipe->max_usage)) mask |= EPOLLOUT | EPOLLWRNORM; /* * Most Unices do not set EPOLLERR for FIFOs but on Linux they diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8ff23bf5a8197..3cc4f8eab853f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -31,6 +31,33 @@ struct pipe_buffer { unsigned long private; }; +/* + * Really only alpha needs 32-bit fields, but + * might as well do it for 64-bit architectures + * since that's what we've historically done, + * and it makes 'head_tail' always be a simple + * 'unsigned long'. + */ +#ifdef CONFIG_64BIT +typedef unsigned int pipe_index_t; +#else +typedef unsigned short pipe_index_t; +#endif + +/* + * We have to declare this outside 'struct pipe_inode_info', + * but then we can't use 'union pipe_index' for an anonymous + * union, so we end up having to duplicate this declaration + * below. Annoying. + */ +union pipe_index { + unsigned long head_tail; + struct { + pipe_index_t head; + pipe_index_t tail; + }; +}; + /** * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing @@ -58,8 +85,16 @@ struct pipe_buffer { struct pipe_inode_info { struct mutex mutex; wait_queue_head_t rd_wait, wr_wait; - unsigned int head; - unsigned int tail; + + /* This has to match the 'union pipe_index' above */ + union { + unsigned long head_tail; + struct { + pipe_index_t head; + pipe_index_t tail; + }; + }; + unsigned int max_usage; unsigned int ring_size; unsigned int nr_accounted; From dfc1b168a8c4b376fa222b27b97c2c4ad4b786e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 08:27:54 +0100 Subject: [PATCH 195/503] kbuild: userprogs: use correct lld when linking through clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The userprog infrastructure links objects files through $(CC). Either explicitly by manually calling $(CC) on multiple object files or implicitly by directly compiling a source file to an executable. The documentation at Documentation/kbuild/llvm.rst indicates that ld.lld would be used for linking if LLVM=1 is specified. However clang instead will use either a globally installed cross linker from $PATH called ${target}-ld or fall back to the system linker, which probably does not support crosslinking. For the normal kernel build this is not an issue because the linker is always executed directly, without the compiler being involved. Explicitly pass --ld-path to clang so $(LD) is respected. As clang 13.0.1 is required to build the kernel, this option is available. Fixes: 7f3a59db274c ("kbuild: add infrastructure to build userspace programs") Cc: stable@vger.kernel.org # needs wrapping in $(cc-option) for < 6.9 Signed-off-by: Thomas Weißschuh Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 70bdbf2218fc5..6a8e5be6b0043 100644 --- a/Makefile +++ b/Makefile @@ -1123,6 +1123,11 @@ endif KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) +# userspace programs are linked via the compiler, use the correct linker +ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_LD_IS_LLD),yy) +KBUILD_USERLDFLAGS += --ld-path=$(LD) +endif + # make the checker run with the right architecture CHECKFLAGS += --arch=$(ARCH) From 02e9a22ceef0227175e391902d8760425fa072c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Feb 2025 11:00:31 +0100 Subject: [PATCH 196/503] kbuild: hdrcheck: fix cross build with clang The headercheck tries to call clang with a mix of compiler arguments that don't include the target architecture. When building e.g. x86 headers on arm64, this produces a warning like clang: warning: unknown platform, assuming -mfloat-abi=soft Add in the KBUILD_CPPFLAGS, which contain the target, in order to make it build properly. See also 1b71c2fb04e7 ("kbuild: userprogs: fix bitsize and target detection on clang"). Reviewed-by: Nathan Chancellor Fixes: feb843a469fb ("kbuild: add $(CLANG_FLAGS) to KBUILD_CPPFLAGS") Signed-off-by: Arnd Bergmann --- usr/include/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr/include/Makefile b/usr/include/Makefile index 6c6de1b1622b1..e3d6b03527fec 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -10,7 +10,7 @@ UAPI_CFLAGS := -std=c90 -Wall -Werror=implicit-function-declaration # In theory, we do not care -m32 or -m64 for header compile tests. # It is here just because CONFIG_CC_CAN_LINK is tested with -m32 or -m64. -UAPI_CFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) +UAPI_CFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) # USERCFLAGS might contain sysroot location for CC. UAPI_CFLAGS += $(USERCFLAGS) From dd0b7d4a56e3349de65bf9752734510fb55baf29 Mon Sep 17 00:00:00 2001 From: Satoru Takeuchi Date: Fri, 28 Feb 2025 20:37:08 +0900 Subject: [PATCH 197/503] docs: Kconfig: fix defconfig description Commit 2a86f6612164 ("kbuild: use KBUILD_DEFCONFIG as the fallback for DEFCONFIG_LIST") removed arch/$ARCH/defconfig; however, the document has not been updated to reflect this change yet. Signed-off-by: Satoru Takeuchi Signed-off-by: Masahiro Yamada --- Documentation/admin-guide/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index eb94526689091..b557cf1c820d2 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -176,7 +176,7 @@ Configuring the kernel values without prompting. "make defconfig" Create a ./.config file by using the default - symbol values from either arch/$ARCH/defconfig + symbol values from either arch/$ARCH/configs/defconfig or arch/$ARCH/configs/${PLATFORM}_defconfig, depending on the architecture. From 30bfc151f0c1ec80c27a80a7651b2c15c648ad16 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 10 Dec 2024 09:31:02 +0100 Subject: [PATCH 198/503] drm/xe: Remove double pageflip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is already handled below in the code by fixup_initial_plane_config. Fixes: a8153627520a ("drm/i915: Try to relocate the BIOS fb to the start of ggtt") Cc: Ville Syrjälä Reviewed-by: Vinod Govindapillai Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20241210083111.230484-3-dev@lankhorst.se Signed-off-by: Maarten Lankhorst (cherry picked from commit 2218704997979fbf11765281ef752f07c5cf25bb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_plane_initial.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 2eb9633f163a7..2a2f250fa495d 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -194,8 +194,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, to_intel_plane(crtc->base.primary); struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); - struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); struct drm_framebuffer *fb; struct i915_vma *vma; @@ -241,14 +239,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits); plane_config->vma = vma; - - /* - * Flip to the newly created mapping ASAP, so we can re-use the - * first part of GGTT for WOPCM, prevent flickering, and prevent - * the lookup of sysmem scratch pages. - */ - plane->check_plane(crtc_state, plane_state); - plane->async_flip(NULL, plane, crtc_state, plane_state, true); return; nofb: From d1039a3c12fffe501c5379c7eb1372eaab318e0a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 26 Feb 2025 15:56:26 +0200 Subject: [PATCH 199/503] drm/i915/mst: update max stream count to match number of pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We create the stream encoders and attach connectors for each pipe we have. As the number of pipes has increased, we've failed to update the topology manager maximum number of payloads to match that. Bump up the max stream count to match number of pipes, enabling the fourth stream on platforms that support four pipes. Cc: stable@vger.kernel.org Cc: Imre Deak Cc: Ville Syrjala Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250226135626.1956012-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 15bccbfb78d63a2a621b30caff8b9424160c6c89) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index a65cf97ad12df..86d6185fda50a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1867,7 +1867,8 @@ intel_dp_mst_encoder_init(struct intel_digital_port *dig_port, int conn_base_id) /* create encoders */ mst_stream_encoders_create(dig_port); ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, display->drm, - &intel_dp->aux, 16, 3, conn_base_id); + &intel_dp->aux, 16, + INTEL_NUM_PIPES(display), conn_base_id); if (ret) { intel_dp->mst_mgr.cbs = NULL; return ret; From 475d06e00b7496c7915d87f7ae67af26738e4649 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 26 Feb 2025 17:47:49 +0000 Subject: [PATCH 200/503] drm/xe/userptr: properly setup pfn_flags_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we just leave it uninitialised, which at first looks harmless, however we also don't zero out the pfn array, and with pfn_flags_mask the idea is to be able set individual flags for a given range of pfn or completely ignore them, outside of default_flags. So here we end up with pfn[i] & pfn_flags_mask, and if both are uninitialised we might get back an unexpected flags value, like asking for read only with default_flags, but getting back write on top, leading to potentially bogus behaviour. To fix this ensure we zero the pfn_flags_mask, such that hmm only considers the default_flags and not also the initial pfn[i] value. v2 (Thomas): - Prefer proper initializer. Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Cc: # v6.10+ Reviewed-by: Thomas Hellström Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20250226174748.294285-2-matthew.auld@intel.com (cherry picked from commit dd8c01e42f4c5c1eaf02f003d7d588ba6706aa71) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hmm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 0898344678801..2e4ae61567d8d 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -166,13 +166,20 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, { unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - unsigned long *pfns, flags = HMM_PFN_REQ_FAULT; + unsigned long *pfns; struct xe_userptr *userptr; struct xe_vma *vma = &uvma->vma; u64 userptr_start = xe_vma_userptr(vma); u64 userptr_end = userptr_start + xe_vma_size(vma); struct xe_vm *vm = xe_vma_vm(vma); - struct hmm_range hmm_range; + struct hmm_range hmm_range = { + .pfn_flags_mask = 0, /* ignore pfns */ + .default_flags = HMM_PFN_REQ_FAULT, + .start = userptr_start, + .end = userptr_end, + .notifier = &uvma->userptr.notifier, + .dev_private_owner = vm->xe, + }; bool write = !xe_vma_read_only(vma); unsigned long notifier_seq; u64 npages; @@ -199,19 +206,14 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, return -ENOMEM; if (write) - flags |= HMM_PFN_REQ_WRITE; + hmm_range.default_flags |= HMM_PFN_REQ_WRITE; if (!mmget_not_zero(userptr->notifier.mm)) { ret = -EFAULT; goto free_pfns; } - hmm_range.default_flags = flags; hmm_range.hmm_pfns = pfns; - hmm_range.notifier = &userptr->notifier; - hmm_range.start = userptr_start; - hmm_range.end = userptr_end; - hmm_range.dev_private_owner = vm->xe; while (true) { hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); From 54f94dc7f6b4db45dbc23b4db3d20c7194e2c54f Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 27 Feb 2025 10:13:00 +0000 Subject: [PATCH 201/503] drm/xe: Fix GT "for each engine" workarounds Any rules using engine matching are currently broken due RTP processing happening too in early init, before the list of hardware engines has been initialised. Fix this by moving workaround processing to later in the driver probe sequence, to just before the processed list is used for the first time. Looking at the debugfs gt0/workarounds on ADL-P we notice 14011060649 should be present while we see, before: GT Workarounds 14011059788 14015795083 And with the patch: GT Workarounds 14011060649 14011059788 14015795083 Signed-off-by: Tvrtko Ursulin Cc: Lucas De Marchi Cc: Matt Roper Cc: stable@vger.kernel.org # v6.11+ Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250227101304.46660-2-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi (cherry picked from commit 25d434cef791e03cf40680f5441b576c639bfa84) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 5d6fb79957b63..9f4f27d1ef4a9 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -380,9 +380,7 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_wa_process_gt(gt); xe_wa_process_oob(gt); - xe_tuning_process_gt(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); @@ -474,6 +472,8 @@ static int all_fw_domain_init(struct xe_gt *gt) } xe_gt_mcr_set_implicit_defaults(gt); + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); err = xe_gt_clock_init(gt); From 778b94d7ac17b5800aa857222911f09cc986b509 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 11:01:53 -0600 Subject: [PATCH 202/503] ACPI: platform_profile: Add support for hidden choices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When two drivers don't support all the same profiles the legacy interface only exports the common profiles. This causes problems for cases where one driver uses low-power but another uses quiet because the result is that neither is exported to sysfs. To allow two drivers to disagree, add support for "hidden choices". Hidden choices are platform profiles that a driver supports to be compatible with the platform profile of another driver. Fixes: 688834743d67 ("ACPI: platform_profile: Allow multiple handlers") Reported-by: Antheas Kapenekakis Closes: https://lore.kernel.org/platform-driver-x86/e64b771e-3255-42ad-9257-5b8fc6c24ac9@gmx.de/T/#mc068042dd29df36c16c8af92664860fc4763974b Signed-off-by: Mario Limonciello Tested-by: Antheas Kapenekakis Tested-by: Derek J. Clark Acked-by: Ilpo Järvinen Link: https://patch.msgid.link/20250228170155.2623386-2-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- drivers/acpi/platform_profile.c | 94 +++++++++++++++++++++++++------- include/linux/platform_profile.h | 3 + 2 files changed, 76 insertions(+), 21 deletions(-) diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index 2ad53cc6aae53..ef9444482db19 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -21,9 +21,15 @@ struct platform_profile_handler { struct device dev; int minor; unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + unsigned long hidden_choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; const struct platform_profile_ops *ops; }; +struct aggregate_choices_data { + unsigned long aggregate[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + int count; +}; + static const char * const profile_names[] = { [PLATFORM_PROFILE_LOW_POWER] = "low-power", [PLATFORM_PROFILE_COOL] = "cool", @@ -73,7 +79,7 @@ static int _store_class_profile(struct device *dev, void *data) lockdep_assert_held(&profile_lock); handler = to_pprof_handler(dev); - if (!test_bit(*bit, handler->choices)) + if (!test_bit(*bit, handler->choices) && !test_bit(*bit, handler->hidden_choices)) return -EOPNOTSUPP; return handler->ops->profile_set(dev, *bit); @@ -239,21 +245,44 @@ static const struct class platform_profile_class = { /** * _aggregate_choices - Aggregate the available profile choices * @dev: The device - * @data: The available profile choices + * @arg: struct aggregate_choices_data * * Return: 0 on success, -errno on failure */ -static int _aggregate_choices(struct device *dev, void *data) +static int _aggregate_choices(struct device *dev, void *arg) { + unsigned long tmp[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + struct aggregate_choices_data *data = arg; struct platform_profile_handler *handler; - unsigned long *aggregate = data; lockdep_assert_held(&profile_lock); handler = to_pprof_handler(dev); - if (test_bit(PLATFORM_PROFILE_LAST, aggregate)) - bitmap_copy(aggregate, handler->choices, PLATFORM_PROFILE_LAST); + bitmap_or(tmp, handler->choices, handler->hidden_choices, PLATFORM_PROFILE_LAST); + if (test_bit(PLATFORM_PROFILE_LAST, data->aggregate)) + bitmap_copy(data->aggregate, tmp, PLATFORM_PROFILE_LAST); else - bitmap_and(aggregate, handler->choices, aggregate, PLATFORM_PROFILE_LAST); + bitmap_and(data->aggregate, tmp, data->aggregate, PLATFORM_PROFILE_LAST); + data->count++; + + return 0; +} + +/** + * _remove_hidden_choices - Remove hidden choices from aggregate data + * @dev: The device + * @arg: struct aggregate_choices_data + * + * Return: 0 on success, -errno on failure + */ +static int _remove_hidden_choices(struct device *dev, void *arg) +{ + struct aggregate_choices_data *data = arg; + struct platform_profile_handler *handler; + + lockdep_assert_held(&profile_lock); + handler = to_pprof_handler(dev); + bitmap_andnot(data->aggregate, handler->choices, + handler->hidden_choices, PLATFORM_PROFILE_LAST); return 0; } @@ -270,22 +299,31 @@ static ssize_t platform_profile_choices_show(struct device *dev, struct device_attribute *attr, char *buf) { - unsigned long aggregate[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + struct aggregate_choices_data data = { + .aggregate = { [0 ... BITS_TO_LONGS(PLATFORM_PROFILE_LAST) - 1] = ~0UL }, + .count = 0, + }; int err; - set_bit(PLATFORM_PROFILE_LAST, aggregate); + set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { err = class_for_each_device(&platform_profile_class, NULL, - aggregate, _aggregate_choices); + &data, _aggregate_choices); if (err) return err; + if (data.count == 1) { + err = class_for_each_device(&platform_profile_class, NULL, + &data, _remove_hidden_choices); + if (err) + return err; + } } /* no profile handler registered any more */ - if (bitmap_empty(aggregate, PLATFORM_PROFILE_LAST)) + if (bitmap_empty(data.aggregate, PLATFORM_PROFILE_LAST)) return -EINVAL; - return _commmon_choices_show(aggregate, buf); + return _commmon_choices_show(data.aggregate, buf); } /** @@ -373,7 +411,10 @@ static ssize_t platform_profile_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + struct aggregate_choices_data data = { + .aggregate = { [0 ... BITS_TO_LONGS(PLATFORM_PROFILE_LAST) - 1] = ~0UL }, + .count = 0, + }; int ret; int i; @@ -381,13 +422,13 @@ static ssize_t platform_profile_store(struct device *dev, i = sysfs_match_string(profile_names, buf); if (i < 0 || i == PLATFORM_PROFILE_CUSTOM) return -EINVAL; - set_bit(PLATFORM_PROFILE_LAST, choices); + set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { ret = class_for_each_device(&platform_profile_class, NULL, - choices, _aggregate_choices); + &data, _aggregate_choices); if (ret) return ret; - if (!test_bit(i, choices)) + if (!test_bit(i, data.aggregate)) return -EOPNOTSUPP; ret = class_for_each_device(&platform_profile_class, NULL, &i, @@ -453,12 +494,15 @@ EXPORT_SYMBOL_GPL(platform_profile_notify); */ int platform_profile_cycle(void) { + struct aggregate_choices_data data = { + .aggregate = { [0 ... BITS_TO_LONGS(PLATFORM_PROFILE_LAST) - 1] = ~0UL }, + .count = 0, + }; enum platform_profile_option next = PLATFORM_PROFILE_LAST; enum platform_profile_option profile = PLATFORM_PROFILE_LAST; - unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; int err; - set_bit(PLATFORM_PROFILE_LAST, choices); + set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { err = class_for_each_device(&platform_profile_class, NULL, &profile, _aggregate_profiles); @@ -470,14 +514,14 @@ int platform_profile_cycle(void) return -EINVAL; err = class_for_each_device(&platform_profile_class, NULL, - choices, _aggregate_choices); + &data, _aggregate_choices); if (err) return err; /* never iterate into a custom if all drivers supported it */ - clear_bit(PLATFORM_PROFILE_CUSTOM, choices); + clear_bit(PLATFORM_PROFILE_CUSTOM, data.aggregate); - next = find_next_bit_wrap(choices, + next = find_next_bit_wrap(data.aggregate, PLATFORM_PROFILE_LAST, profile + 1); @@ -532,6 +576,14 @@ struct device *platform_profile_register(struct device *dev, const char *name, return ERR_PTR(-EINVAL); } + if (ops->hidden_choices) { + err = ops->hidden_choices(drvdata, pprof->hidden_choices); + if (err) { + dev_err(dev, "platform_profile hidden_choices failed\n"); + return ERR_PTR(err); + } + } + guard(mutex)(&profile_lock); /* create class interface for individual handler */ diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index 8ab5b0e8eb2c1..8c9df7dadd5d3 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -33,6 +33,8 @@ enum platform_profile_option { * @probe: Callback to setup choices available to the new class device. These * choices will only be enforced when setting a new profile, not when * getting the current one. + * @hidden_choices: Callback to setup choices that are not visible to the user + * but can be set by the driver. * @profile_get: Callback that will be called when showing the current platform * profile in sysfs. * @profile_set: Callback that will be called when storing a new platform @@ -40,6 +42,7 @@ enum platform_profile_option { */ struct platform_profile_ops { int (*probe)(void *drvdata, unsigned long *choices); + int (*hidden_choices)(void *drvdata, unsigned long *choices); int (*profile_get)(struct device *dev, enum platform_profile_option *profile); int (*profile_set)(struct device *dev, enum platform_profile_option profile); }; From 44e94fece5170ed9110564efec592d0e88830a28 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 11:01:54 -0600 Subject: [PATCH 203/503] platform/x86/amd: pmf: Add 'quiet' to hidden choices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When amd-pmf and asus-wmi are both bound no low power option shows up in sysfs. Add a hidden choice for amd-pmf to support 'quiet' mode to let both bind. Fixes: 688834743d67 ("ACPI: platform_profile: Allow multiple handlers") Suggested-by: Antheas Kapenekakis Signed-off-by: Mario Limonciello Tested-by: Antheas Kapenekakis Tested-by: Derek J. Clark Acked-by: Ilpo Järvinen Link: https://patch.msgid.link/20250228170155.2623386-3-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/amd/pmf/sps.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index e6cf0b22dac33..3a0079c17cb17 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -303,6 +303,7 @@ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf) mode = POWER_MODE_BALANCED_POWER; break; case PLATFORM_PROFILE_LOW_POWER: + case PLATFORM_PROFILE_QUIET: mode = POWER_MODE_POWER_SAVER; break; default: @@ -387,6 +388,13 @@ static int amd_pmf_profile_set(struct device *dev, return 0; } +static int amd_pmf_hidden_choices(void *drvdata, unsigned long *choices) +{ + set_bit(PLATFORM_PROFILE_QUIET, choices); + + return 0; +} + static int amd_pmf_profile_probe(void *drvdata, unsigned long *choices) { set_bit(PLATFORM_PROFILE_LOW_POWER, choices); @@ -398,6 +406,7 @@ static int amd_pmf_profile_probe(void *drvdata, unsigned long *choices) static const struct platform_profile_ops amd_pmf_profile_ops = { .probe = amd_pmf_profile_probe, + .hidden_choices = amd_pmf_hidden_choices, .profile_get = amd_pmf_profile_get, .profile_set = amd_pmf_profile_set, }; From 9a43102daf64dd0d172d8b39836dbc1dba4da1ea Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 11:01:55 -0600 Subject: [PATCH 204/503] platform/x86/amd: pmf: Add balanced-performance to hidden choices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acer's WMI driver uses balanced-performance but AMD-PMF doesn't. In case a machine binds with both drivers let amd-pmf use balanced-performance as well. Fixes: 688834743d67 ("ACPI: platform_profile: Allow multiple handlers") Suggested-by: Antheas Kapenekakis Signed-off-by: Mario Limonciello Tested-by: Antheas Kapenekakis Tested-by: Derek J. Clark Acked-by: Ilpo Järvinen Link: https://patch.msgid.link/20250228170155.2623386-4-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/amd/pmf/sps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index 3a0079c17cb17..d3083383f11fb 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -297,6 +297,7 @@ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf) switch (pmf->current_profile) { case PLATFORM_PROFILE_PERFORMANCE: + case PLATFORM_PROFILE_BALANCED_PERFORMANCE: mode = POWER_MODE_PERFORMANCE; break; case PLATFORM_PROFILE_BALANCED: @@ -391,6 +392,7 @@ static int amd_pmf_profile_set(struct device *dev, static int amd_pmf_hidden_choices(void *drvdata, unsigned long *choices) { set_bit(PLATFORM_PROFILE_QUIET, choices); + set_bit(PLATFORM_PROFILE_BALANCED_PERFORMANCE, choices); return 0; } From 723aa55c08c9d1e0734e39a815fd41272eac8269 Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Fri, 14 Feb 2025 19:04:18 +0800 Subject: [PATCH 205/503] HID: i2c-hid: improve i2c_hid_get_report error message We have two places to print "failed to set a report to ...", use "get a report from" instead of "set a report to", it makes people who knows less about the module to know where the error happened. Before: i2c_hid_acpi i2c-FTSC1000:00: failed to set a report to device: -11 After: i2c_hid_acpi i2c-FTSC1000:00: failed to get a report from device: -11 Signed-off-by: Wentao Guan Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 75544448c2393..d3912e3f2f13a 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -290,7 +290,7 @@ static int i2c_hid_get_report(struct i2c_hid *ihid, ihid->rawbuf, recv_len + sizeof(__le16)); if (error) { dev_err(&ihid->client->dev, - "failed to set a report to device: %d\n", error); + "failed to get a report from device: %d\n", error); return error; } From 221cea1003d8a412e5ec64a58df7ab19b654f490 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Sun, 23 Feb 2025 22:36:30 -0700 Subject: [PATCH 206/503] HID: apple: disable Fn key handling on the Omoton KB066 Remove the fixup to make the Omoton KB066's F6 key F6 when not holding Fn. That was really just a hack to allow typing F6 in fnmode>0, and it didn't fix any of the other F keys that were likewise untypable in fnmode>0. Instead, because the Omoton's Fn key is entirely internal to the keyboard, completely disable Fn key translation when an Omoton is detected, which will prevent the hid-apple driver from interfering with the keyboard's built-in Fn key handling. All of the F keys, including F6, are then typable when Fn is held. The Omoton KB066 and the Apple A1255 both have HID product code 05ac:022c. The self-reported name of every original A1255 when they left the factory was "Apple Wireless Keyboard". By default, Mac OS changes the name to "'s keyboard" when pairing with the keyboard, but Mac OS allows the user to set the internal name of Apple keyboards to anything they like. The Omoton KB066's name, on the other hand, is not configurable: It is always "Bluetooth Keyboard". Because that name is so generic that a user might conceivably use the same name for a real Apple keyboard, detect Omoton keyboards based on both having that exact name and having HID product code 022c. Fixes: 819083cb6eed ("HID: apple: fix up the F6 key on the Omoton KB066 keyboard") Signed-off-by: Alex Henrie Reviewed-by: Aditya Garg Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 49812a76b7edd..d900dd05c335c 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -378,6 +378,12 @@ static bool apple_is_non_apple_keyboard(struct hid_device *hdev) return false; } +static bool apple_is_omoton_kb066(struct hid_device *hdev) +{ + return hdev->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI && + strcmp(hdev->name, "Bluetooth Keyboard") == 0; +} + static inline void apple_setup_key_translation(struct input_dev *input, const struct apple_key_translation *table) { @@ -546,9 +552,6 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input, } } - if (usage->hid == 0xc0301) /* Omoton KB066 quirk */ - code = KEY_F6; - if (usage->code != code) { input_event_with_scancode(input, usage->type, code, usage->hid, value); @@ -728,7 +731,7 @@ static int apple_input_configured(struct hid_device *hdev, { struct apple_sc *asc = hid_get_drvdata(hdev); - if ((asc->quirks & APPLE_HAS_FN) && !asc->fn_found) { + if (((asc->quirks & APPLE_HAS_FN) && !asc->fn_found) || apple_is_omoton_kb066(hdev)) { hid_info(hdev, "Fn key not found (Apple Wireless Keyboard clone?), disabling Fn key handling\n"); asc->quirks &= ~APPLE_HAS_FN; } From 2ff5baa9b5275e3acafdf7f2089f74cccb2f38d1 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Mon, 24 Feb 2025 20:30:30 +0300 Subject: [PATCH 207/503] HID: appleir: Fix potential NULL dereference at raw event handle Syzkaller reports a NULL pointer dereference issue in input_event(). BUG: KASAN: null-ptr-deref in instrument_atomic_read include/linux/instrumented.h:68 [inline] BUG: KASAN: null-ptr-deref in _test_bit include/asm-generic/bitops/instrumented-non-atomic.h:141 [inline] BUG: KASAN: null-ptr-deref in is_event_supported drivers/input/input.c:67 [inline] BUG: KASAN: null-ptr-deref in input_event+0x42/0xa0 drivers/input/input.c:395 Read of size 8 at addr 0000000000000028 by task syz-executor199/2949 CPU: 0 UID: 0 PID: 2949 Comm: syz-executor199 Not tainted 6.13.0-rc4-syzkaller-00076-gf097a36ef88d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 kasan_report+0xd9/0x110 mm/kasan/report.c:602 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0xef/0x1a0 mm/kasan/generic.c:189 instrument_atomic_read include/linux/instrumented.h:68 [inline] _test_bit include/asm-generic/bitops/instrumented-non-atomic.h:141 [inline] is_event_supported drivers/input/input.c:67 [inline] input_event+0x42/0xa0 drivers/input/input.c:395 input_report_key include/linux/input.h:439 [inline] key_down drivers/hid/hid-appleir.c:159 [inline] appleir_raw_event+0x3e5/0x5e0 drivers/hid/hid-appleir.c:232 __hid_input_report.constprop.0+0x312/0x440 drivers/hid/hid-core.c:2111 hid_ctrl+0x49f/0x550 drivers/hid/usbhid/hid-core.c:484 __usb_hcd_giveback_urb+0x389/0x6e0 drivers/usb/core/hcd.c:1650 usb_hcd_giveback_urb+0x396/0x450 drivers/usb/core/hcd.c:1734 dummy_timer+0x17f7/0x3960 drivers/usb/gadget/udc/dummy_hcd.c:1993 __run_hrtimer kernel/time/hrtimer.c:1739 [inline] __hrtimer_run_queues+0x20a/0xae0 kernel/time/hrtimer.c:1803 hrtimer_run_softirq+0x17d/0x350 kernel/time/hrtimer.c:1820 handle_softirqs+0x206/0x8d0 kernel/softirq.c:561 __do_softirq kernel/softirq.c:595 [inline] invoke_softirq kernel/softirq.c:435 [inline] __irq_exit_rcu+0xfa/0x160 kernel/softirq.c:662 irq_exit_rcu+0x9/0x30 kernel/softirq.c:678 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1049 [inline] sysvec_apic_timer_interrupt+0x90/0xb0 arch/x86/kernel/apic/apic.c:1049 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 __mod_timer+0x8f6/0xdc0 kernel/time/timer.c:1185 add_timer+0x62/0x90 kernel/time/timer.c:1295 schedule_timeout+0x11f/0x280 kernel/time/sleep_timeout.c:98 usbhid_wait_io+0x1c7/0x380 drivers/hid/usbhid/hid-core.c:645 usbhid_init_reports+0x19f/0x390 drivers/hid/usbhid/hid-core.c:784 hiddev_ioctl+0x1133/0x15b0 drivers/hid/usbhid/hiddev.c:794 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl fs/ioctl.c:892 [inline] __x64_sys_ioctl+0x190/0x200 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f This happens due to the malformed report items sent by the emulated device which results in a report, that has no fields, being added to the report list. Due to this appleir_input_configured() is never called, hidinput_connect() fails which results in the HID_CLAIMED_INPUT flag is not being set. However, it does not make appleir_probe() fail and lets the event callback to be called without the associated input device. Thus, add a check for the HID_CLAIMED_INPUT flag and leave the event hook early if the driver didn't claim any input_dev for some reason. Moreover, some other hid drivers accessing input_dev in their event callbacks do have similar checks, too. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 9a4a5574ce42 ("HID: appleir: add support for Apple ir devices") Cc: stable@vger.kernel.org Signed-off-by: Daniil Dulov Signed-off-by: Jiri Kosina --- drivers/hid/hid-appleir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-appleir.c b/drivers/hid/hid-appleir.c index 8deded1857254..c45e5aa569d25 100644 --- a/drivers/hid/hid-appleir.c +++ b/drivers/hid/hid-appleir.c @@ -188,7 +188,7 @@ static int appleir_raw_event(struct hid_device *hid, struct hid_report *report, static const u8 flatbattery[] = { 0x25, 0x87, 0xe0 }; unsigned long flags; - if (len != 5) + if (len != 5 || !(hid->claimed & HID_CLAIMED_INPUT)) goto out; if (!memcmp(data, keydown, sizeof(keydown))) { From a6a4f4e9b8018806cca30049b59a1c3c8b513701 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 27 Feb 2025 22:33:57 +0000 Subject: [PATCH 208/503] HID: debug: Fix spelling mistake "Messanger" -> "Messenger" There is a spelling mistake in a literal string. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 541d682af15aa..8433306148d57 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -3450,7 +3450,7 @@ static const char *keys[KEY_MAX + 1] = { [KEY_MACRO_RECORD_START] = "MacroRecordStart", [KEY_MACRO_RECORD_STOP] = "MacroRecordStop", [KEY_MARK_WAYPOINT] = "MarkWayPoint", [KEY_MEDIA_REPEAT] = "MediaRepeat", - [KEY_MEDIA_TOP_MENU] = "MediaTopMenu", [KEY_MESSENGER] = "Messanger", + [KEY_MEDIA_TOP_MENU] = "MediaTopMenu", [KEY_MESSENGER] = "Messenger", [KEY_NAV_CHART] = "NavChar", [KEY_NAV_INFO] = "NavInfo", [KEY_NEWS] = "News", [KEY_NEXT_ELEMENT] = "NextElement", [KEY_NEXT_FAVORITE] = "NextFavorite", [KEY_NOTIFICATION_CENTER] = "NotificationCenter", From e53fc232a65f7488ab75d03a5b95f06aaada7262 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Thu, 27 Feb 2025 15:41:33 -0800 Subject: [PATCH 209/503] HID: hid-steam: Fix use-after-free when detaching device When a hid-steam device is removed it must clean up the client_hdev used for intercepting hidraw access. This can lead to scheduling deferred work to reattach the input device. Though the cleanup cancels the deferred work, this was done before the client_hdev itself is cleaned up, so it gets rescheduled. This patch fixes the ordering to make sure the deferred work is properly canceled. Reported-by: syzbot+0154da2d403396b2bd59@syzkaller.appspotmail.com Fixes: 79504249d7e2 ("HID: hid-steam: Move hidraw input (un)registering to work") Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- drivers/hid/hid-steam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index c9e65e9088b31..10460b7bde1a2 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1327,11 +1327,11 @@ static void steam_remove(struct hid_device *hdev) return; } + hid_destroy_device(steam->client_hdev); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->work_connect); cancel_work_sync(&steam->rumble_work); cancel_work_sync(&steam->unregister_work); - hid_destroy_device(steam->client_hdev); steam->client_hdev = NULL; steam->client_opened = 0; if (steam->quirks & STEAM_QUIRK_WIRELESS) { From 0132c406705a466b95854ce1058f3d8354f90a42 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 28 Feb 2025 08:41:39 +0000 Subject: [PATCH 210/503] HID: intel-thc-hid: Fix spelling mistake "intput" -> "input" There is a spelling mistake in a dev_err_once message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c index 7373238ceb18b..918050af73e55 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c @@ -107,7 +107,7 @@ static int quickspi_get_device_descriptor(struct quickspi_device *qsdev) return 0; } - dev_err_once(qsdev->dev, "Unexpected intput report type: %d\n", input_rep_type); + dev_err_once(qsdev->dev, "Unexpected input report type: %d\n", input_rep_type); return -EINVAL; } From db52926fb0be40e1d588a346df73f5ea3a34a4c6 Mon Sep 17 00:00:00 2001 From: Even Xu Date: Tue, 4 Mar 2025 11:22:55 +0800 Subject: [PATCH 211/503] HID: Intel-thc-hid: Intel-quickspi: Correct device state after S4 During S4 retore flow, quickspi device was resetted by driver and state was changed to RESETTED. It is needed to be change to ENABLED state after S4 re-initialization finished, otherwise, device will run in wrong state and HID input data will be dropped. Signed-off-by: Even Xu Fixes: 6912aaf3fd24 ("HID: intel-thc-hid: intel-quickspi: Add PM implementation") Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c index 4641e818dfa44..6b2c7620be2b1 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c @@ -909,6 +909,8 @@ static int quickspi_restore(struct device *device) thc_change_ltr_mode(qsdev->thc_hw, THC_LTR_MODE_ACTIVE); + qsdev->state = QUICKSPI_ENABLED; + return 0; } From 5eb3dc1396aa7e315486b24df80df782912334b7 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Thu, 27 Feb 2025 11:33:40 +0100 Subject: [PATCH 212/503] net: ipa: Fix v4.7 resource group names In the downstream IPA driver there's only one group defined for source and destination, and the destination group doesn't have a _DPL suffix. Fixes: b310de784bac ("net: ipa: add IPA v4.7 support") Signed-off-by: Luca Weiss Reviewed-by: Alex Elder Link: https://patch.msgid.link/20250227-ipa-v4-7-fixes-v1-1-a88dd8249d8a@fairphone.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/data/ipa_data-v4.7.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index c8c23d9be961b..7e315779e6648 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -28,12 +28,10 @@ enum ipa_resource_type { enum ipa_rsrc_group_id { /* Source resource group identifiers */ IPA_RSRC_GROUP_SRC_UL_DL = 0, - IPA_RSRC_GROUP_SRC_UC_RX_Q, IPA_RSRC_GROUP_SRC_COUNT, /* Last in set; not a source group */ /* Destination resource group identifiers */ - IPA_RSRC_GROUP_DST_UL_DL_DPL = 0, - IPA_RSRC_GROUP_DST_UNUSED_1, + IPA_RSRC_GROUP_DST_UL_DL = 0, IPA_RSRC_GROUP_DST_COUNT, /* Last; not a destination group */ }; @@ -81,7 +79,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .resource_group = IPA_RSRC_GROUP_DST_UL_DL, .aggregation = true, .status_enable = true, .rx = { @@ -128,7 +126,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .resource_group = IPA_RSRC_GROUP_DST_UL_DL, .qmap = true, .aggregation = true, .rx = { @@ -197,12 +195,12 @@ static const struct ipa_resource ipa_resource_src[] = { /* Destination resource configuration data for an SoC having IPA v4.7 */ static const struct ipa_resource ipa_resource_dst[] = { [IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = { - .limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = { + .limits[IPA_RSRC_GROUP_DST_UL_DL] = { .min = 7, .max = 7, }, }, [IPA_RESOURCE_TYPE_DST_DPS_DMARS] = { - .limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = { + .limits[IPA_RSRC_GROUP_DST_UL_DL] = { .min = 2, .max = 2, }, }, From 6a2843aaf551d87beb92d774f7d5b8ae007fe774 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Thu, 27 Feb 2025 11:33:41 +0100 Subject: [PATCH 213/503] net: ipa: Fix QSB data for v4.7 As per downstream reference, max_writes should be 12 and max_reads should be 13. Fixes: b310de784bac ("net: ipa: add IPA v4.7 support") Signed-off-by: Luca Weiss Reviewed-by: Alex Elder Link: https://patch.msgid.link/20250227-ipa-v4-7-fixes-v1-2-a88dd8249d8a@fairphone.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/data/ipa_data-v4.7.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index 7e315779e6648..e63dcf8d45567 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -38,8 +38,8 @@ enum ipa_rsrc_group_id { /* QSB configuration data for an SoC having IPA v4.7 */ static const struct ipa_qsb_data ipa_qsb_data[] = { [IPA_QSB_MASTER_DDR] = { - .max_writes = 8, - .max_reads = 0, /* no limit (hardware max) */ + .max_writes = 12, + .max_reads = 13, .max_reads_beats = 120, }, }; From 934e69669e32eb653234898424ae007bae2f636e Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Thu, 27 Feb 2025 11:33:42 +0100 Subject: [PATCH 214/503] net: ipa: Enable checksum for IPA_ENDPOINT_AP_MODEM_{RX,TX} for v4.7 Enable the checksum option for these two endpoints in order to allow mobile data to actually work. Without this, no packets seem to make it through the IPA. Fixes: b310de784bac ("net: ipa: add IPA v4.7 support") Signed-off-by: Luca Weiss Reviewed-by: Alex Elder Link: https://patch.msgid.link/20250227-ipa-v4-7-fixes-v1-3-a88dd8249d8a@fairphone.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/data/ipa_data-v4.7.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index e63dcf8d45567..41f212209993f 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -104,6 +104,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .filter_support = true, .config = { .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, + .checksum = true, .qmap = true, .status_enable = true, .tx = { @@ -127,6 +128,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .config = { .resource_group = IPA_RSRC_GROUP_DST_UL_DL, + .checksum = true, .qmap = true, .aggregation = true, .rx = { From 4c2d14c40a68678d885eab4008a0129646805bae Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Fri, 28 Feb 2025 22:14:08 +0800 Subject: [PATCH 215/503] ppp: Fix KMSAN uninit-value warning with bpf Syzbot caught an "KMSAN: uninit-value" warning [1], which is caused by the ppp driver not initializing a 2-byte header when using socket filter. The following code can generate a PPP filter BPF program: ''' struct bpf_program fp; pcap_t *handle; handle = pcap_open_dead(DLT_PPP_PPPD, 65535); pcap_compile(handle, &fp, "ip and outbound", 0, 0); bpf_dump(&fp, 1); ''' Its output is: ''' (000) ldh [2] (001) jeq #0x21 jt 2 jf 5 (002) ldb [0] (003) jeq #0x1 jt 4 jf 5 (004) ret #65535 (005) ret #0 ''' Wen can find similar code at the following link: https://github.com/ppp-project/ppp/blob/master/pppd/options.c#L1680 The maintainer of this code repository is also the original maintainer of the ppp driver. As you can see the BPF program skips 2 bytes of data and then reads the 'Protocol' field to determine if it's an IP packet. Then it read the first byte of the first 2 bytes to determine the direction. The issue is that only the first byte indicating direction is initialized in current ppp driver code while the second byte is not initialized. For normal BPF programs generated by libpcap, uninitialized data won't be used, so it's not a problem. However, for carefully crafted BPF programs, such as those generated by syzkaller [2], which start reading from offset 0, the uninitialized data will be used and caught by KMSAN. [1] https://syzkaller.appspot.com/bug?extid=853242d9c9917165d791 [2] https://syzkaller.appspot.com/text?tag=ReproC&x=11994913980000 Cc: Paul Mackerras Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+853242d9c9917165d791@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/000000000000dea025060d6bc3bc@google.com/ Signed-off-by: Jiayuan Chen Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250228141408.393864-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 4583e15ad03a0..1420c4efa48e6 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -72,6 +72,17 @@ #define PPP_PROTO_LEN 2 #define PPP_LCP_HDRLEN 4 +/* The filter instructions generated by libpcap are constructed + * assuming a four-byte PPP header on each packet, where the last + * 2 bytes are the protocol field defined in the RFC and the first + * byte of the first 2 bytes indicates the direction. + * The second byte is currently unused, but we still need to initialize + * it to prevent crafted BPF programs from reading them which would + * cause reading of uninitialized data. + */ +#define PPP_FILTER_OUTBOUND_TAG 0x0100 +#define PPP_FILTER_INBOUND_TAG 0x0000 + /* * An instance of /dev/ppp can be associated with either a ppp * interface unit or a ppp channel. In both cases, file->private_data @@ -1762,10 +1773,10 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) if (proto < 0x8000) { #ifdef CONFIG_PPP_FILTER - /* check if we should pass this packet */ - /* the filter instructions are constructed assuming - a four-byte PPP header on each packet */ - *(u8 *)skb_push(skb, 2) = 1; + /* check if the packet passes the pass and active filters. + * See comment for PPP_FILTER_OUTBOUND_TAG above. + */ + *(__be16 *)skb_push(skb, 2) = htons(PPP_FILTER_OUTBOUND_TAG); if (ppp->pass_filter && bpf_prog_run(ppp->pass_filter, skb) == 0) { if (ppp->debug & 1) @@ -2482,14 +2493,13 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) /* network protocol frame - give it to the kernel */ #ifdef CONFIG_PPP_FILTER - /* check if the packet passes the pass and active filters */ - /* the filter instructions are constructed assuming - a four-byte PPP header on each packet */ if (ppp->pass_filter || ppp->active_filter) { if (skb_unclone(skb, GFP_ATOMIC)) goto err; - - *(u8 *)skb_push(skb, 2) = 0; + /* Check if the packet passes the pass and active filters. + * See comment for PPP_FILTER_INBOUND_TAG above. + */ + *(__be16 *)skb_push(skb, 2) = htons(PPP_FILTER_INBOUND_TAG); if (ppp->pass_filter && bpf_prog_run(ppp->pass_filter, skb) == 0) { if (ppp->debug & 1) From 637399bf7e77797811adf340090b561a8f9d1213 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Sat, 1 Mar 2025 15:11:13 +0100 Subject: [PATCH 216/503] net: ethtool: netlink: Allow NULL nlattrs when getting a phy_device ethnl_req_get_phydev() is used to lookup a phy_device, in the case an ethtool netlink command targets a specific phydev within a netdev's topology. It takes as a parameter a const struct nlattr *header that's used for error handling : if (!phydev) { NL_SET_ERR_MSG_ATTR(extack, header, "no phy matching phyindex"); return ERR_PTR(-ENODEV); } In the notify path after a ->set operation however, there's no request attributes available. The typical callsite for the above function looks like: phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_XXX_HEADER], info->extack); So, when tb is NULL (such as in the ethnl notify path), we have a nice crash. It turns out that there's only the PLCA command that is in that case, as the other phydev-specific commands don't have a notification. This commit fixes the crash by passing the cmd index and the nlattr array separately, allowing NULL-checking it directly inside the helper. Fixes: c15e065b46dc ("net: ethtool: Allow passing a phy index for some commands") Signed-off-by: Maxime Chevallier Reviewed-by: Kory Maincent Reported-by: Parthiban Veerasooran Link: https://patch.msgid.link/20250301141114.97204-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/cabletest.c | 8 ++++---- net/ethtool/linkstate.c | 2 +- net/ethtool/netlink.c | 6 +++--- net/ethtool/netlink.h | 5 +++-- net/ethtool/phy.c | 2 +- net/ethtool/plca.c | 6 +++--- net/ethtool/pse-pd.c | 4 ++-- net/ethtool/stats.c | 2 +- net/ethtool/strset.c | 2 +- 9 files changed, 19 insertions(+), 18 deletions(-) diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index f22051f33868a..84096f6b0236e 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -72,8 +72,8 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); - phydev = ethnl_req_get_phydev(&req_info, - tb[ETHTOOL_A_CABLE_TEST_HEADER], + phydev = ethnl_req_get_phydev(&req_info, tb, + ETHTOOL_A_CABLE_TEST_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; @@ -339,8 +339,8 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) goto out_dev_put; rtnl_lock(); - phydev = ethnl_req_get_phydev(&req_info, - tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER], + phydev = ethnl_req_get_phydev(&req_info, tb, + ETHTOOL_A_CABLE_TEST_TDR_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index af19e1bed303f..05a5f72c99fab 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -103,7 +103,7 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_LINKSTATE_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_LINKSTATE_HEADER, info->extack); if (IS_ERR(phydev)) { ret = PTR_ERR(phydev); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index b4c45207fa32e..734849a573691 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -211,7 +211,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, } struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, - const struct nlattr *header, + struct nlattr **tb, unsigned int header, struct netlink_ext_ack *extack) { struct phy_device *phydev; @@ -225,8 +225,8 @@ struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, return req_info->dev->phydev; phydev = phy_link_topo_get_phy(req_info->dev, req_info->phy_index); - if (!phydev) { - NL_SET_ERR_MSG_ATTR(extack, header, + if (!phydev && tb) { + NL_SET_ERR_MSG_ATTR(extack, tb[header], "no phy matching phyindex"); return ERR_PTR(-ENODEV); } diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index ff69ca0715dea..ec6ab5443a6f2 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -275,7 +275,8 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) * ethnl_req_get_phydev() - Gets the phy_device targeted by this request, * if any. Must be called under rntl_lock(). * @req_info: The ethnl request to get the phy from. - * @header: The netlink header, used for error reporting. + * @tb: The netlink attributes array, for error reporting. + * @header: The netlink header index, used for error reporting. * @extack: The netlink extended ACK, for error reporting. * * The caller must hold RTNL, until it's done interacting with the returned @@ -289,7 +290,7 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) * is returned. */ struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, - const struct nlattr *header, + struct nlattr **tb, unsigned int header, struct netlink_ext_ack *extack); /** diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index ed8f690f6bac8..e067cc234419d 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -125,7 +125,7 @@ static int ethnl_phy_parse_request(struct ethnl_req_info *req_base, struct phy_req_info *req_info = PHY_REQINFO(req_base); struct phy_device *phydev; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PHY_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PHY_HEADER, extack); if (!phydev) return 0; diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c index d95d92f173a6d..e1f7820a6158f 100644 --- a/net/ethtool/plca.c +++ b/net/ethtool/plca.c @@ -62,7 +62,7 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { @@ -152,7 +152,7 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) bool mod = false; int ret; - phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) @@ -211,7 +211,7 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 2819e2ba6be2d..4f6b99eab2a6c 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -64,7 +64,7 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PSE_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PSE_HEADER, info->extack); if (IS_ERR(phydev)) return -ENODEV; @@ -261,7 +261,7 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER], + phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PSE_HEADER, info->extack); ret = ethnl_set_pse_validate(phydev, info); if (ret) diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index 038a2558f0520..3ca8eb2a3b314 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -138,7 +138,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_STATS_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_STATS_HEADER, info->extack); if (IS_ERR(phydev)) return PTR_ERR(phydev); diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 6b76c05caba4d..f6a67109beda1 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -309,7 +309,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base, return 0; } - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_HEADER_FLAGS], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_HEADER_FLAGS, info->extack); /* phydev can be NULL, check for errors only */ From 022bfe24aad8937705704ff2e414b100cf0f2e1a Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Mon, 3 Mar 2025 18:10:13 +0100 Subject: [PATCH 217/503] mptcp: fix 'scheduling while atomic' in mptcp_pm_nl_append_new_local_addr If multiple connection requests attempt to create an implicit mptcp endpoint in parallel, more than one caller may end up in mptcp_pm_nl_append_new_local_addr because none found the address in local_addr_list during their call to mptcp_pm_nl_get_local_id. In this case, the concurrent new_local_addr calls may delete the address entry created by the previous caller. These deletes use synchronize_rcu, but this is not permitted in some of the contexts where this function may be called. During packet recv, the caller may be in a rcu read critical section and have preemption disabled. An example stack: BUG: scheduling while atomic: swapper/2/0/0x00000302 Call Trace: dump_stack_lvl (lib/dump_stack.c:117 (discriminator 1)) dump_stack (lib/dump_stack.c:124) __schedule_bug (kernel/sched/core.c:5943) schedule_debug.constprop.0 (arch/x86/include/asm/preempt.h:33 kernel/sched/core.c:5970) __schedule (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:207 kernel/sched/features.h:29 kernel/sched/core.c:6621) schedule (arch/x86/include/asm/preempt.h:84 kernel/sched/core.c:6804 kernel/sched/core.c:6818) schedule_timeout (kernel/time/timer.c:2160) wait_for_completion (kernel/sched/completion.c:96 kernel/sched/completion.c:116 kernel/sched/completion.c:127 kernel/sched/completion.c:148) __wait_rcu_gp (include/linux/rcupdate.h:311 kernel/rcu/update.c:444) synchronize_rcu (kernel/rcu/tree.c:3609) mptcp_pm_nl_append_new_local_addr (net/mptcp/pm_netlink.c:966 net/mptcp/pm_netlink.c:1061) mptcp_pm_nl_get_local_id (net/mptcp/pm_netlink.c:1164) mptcp_pm_get_local_id (net/mptcp/pm.c:420) subflow_check_req (net/mptcp/subflow.c:98 net/mptcp/subflow.c:213) subflow_v4_route_req (net/mptcp/subflow.c:305) tcp_conn_request (net/ipv4/tcp_input.c:7216) subflow_v4_conn_request (net/mptcp/subflow.c:651) tcp_rcv_state_process (net/ipv4/tcp_input.c:6709) tcp_v4_do_rcv (net/ipv4/tcp_ipv4.c:1934) tcp_v4_rcv (net/ipv4/tcp_ipv4.c:2334) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205 (discriminator 1)) ip_local_deliver_finish (include/linux/rcupdate.h:813 net/ipv4/ip_input.c:234) ip_local_deliver (include/linux/netfilter.h:314 include/linux/netfilter.h:308 net/ipv4/ip_input.c:254) ip_sublist_rcv_finish (include/net/dst.h:461 net/ipv4/ip_input.c:580) ip_sublist_rcv (net/ipv4/ip_input.c:640) ip_list_rcv (net/ipv4/ip_input.c:675) __netif_receive_skb_list_core (net/core/dev.c:5583 net/core/dev.c:5631) netif_receive_skb_list_internal (net/core/dev.c:5685 net/core/dev.c:5774) napi_complete_done (include/linux/list.h:37 include/net/gro.h:449 include/net/gro.h:444 net/core/dev.c:6114) igb_poll (drivers/net/ethernet/intel/igb/igb_main.c:8244) igb __napi_poll (net/core/dev.c:6582) net_rx_action (net/core/dev.c:6653 net/core/dev.c:6787) handle_softirqs (kernel/softirq.c:553) __irq_exit_rcu (kernel/softirq.c:588 kernel/softirq.c:427 kernel/softirq.c:636) irq_exit_rcu (kernel/softirq.c:651) common_interrupt (arch/x86/kernel/irq.c:247 (discriminator 14)) This problem seems particularly prevalent if the user advertises an endpoint that has a different external vs internal address. In the case where the external address is advertised and multiple connections already exist, multiple subflow SYNs arrive in parallel which tends to trigger the race during creation of the first local_addr_list entries which have the internal address instead. Fix by skipping the replacement of an existing implicit local address if called via mptcp_pm_nl_get_local_id. Fixes: d045b9eb95a9 ("mptcp: introduce implicit endpoints") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Signed-off-by: Krister Johansen Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250303-net-mptcp-fix-sched-while-atomic-v1-1-f6a216c5a74c@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c0e47f4f7b1aa..7868207c4e9d9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -977,7 +977,7 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, struct mptcp_pm_addr_entry *entry, - bool needs_id) + bool needs_id, bool replace) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -1017,6 +1017,17 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, if (entry->addr.id) goto out; + /* allow callers that only need to look up the local + * addr's id to skip replacement. This allows them to + * avoid calling synchronize_rcu in the packet recv + * path. + */ + if (!replace) { + kfree(entry); + ret = cur->addr.id; + goto out; + } + pernet->addrs--; entry->addr.id = cur->addr.id; list_del_rcu(&cur->list); @@ -1165,7 +1176,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); if (ret < 0) kfree(entry); @@ -1433,7 +1444,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) } } ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, - !mptcp_pm_has_addr_attr_id(attr, info)); + !mptcp_pm_has_addr_attr_id(attr, info), + true); if (ret < 0) { GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); goto out_free; From b33a534610067ade2bdaf2052900aaad99701353 Mon Sep 17 00:00:00 2001 From: Oscar Maes Date: Mon, 3 Mar 2025 16:56:19 +0100 Subject: [PATCH 218/503] vlan: enforce underlying device type Currently, VLAN devices can be created on top of non-ethernet devices. Besides the fact that it doesn't make much sense, this also causes a bug which leaks the address of a kernel function to usermode. When creating a VLAN device, we initialize GARP (garp_init_applicant) and MRP (mrp_init_applicant) for the underlying device. As part of the initialization process, we add the multicast address of each applicant to the underlying device, by calling dev_mc_add. __dev_mc_add uses dev->addr_len to determine the length of the new multicast address. This causes an out-of-bounds read if dev->addr_len is greater than 6, since the multicast addresses provided by GARP and MRP are only 6 bytes long. This behaviour can be reproduced using the following commands: ip tunnel add gretest mode ip6gre local ::1 remote ::2 dev lo ip l set up dev gretest ip link add link gretest name vlantest type vlan id 100 Then, the following command will display the address of garp_pdu_rcv: ip maddr show | grep 01:80:c2:00:00:21 Fix the bug by enforcing the type of the underlying device during VLAN device initialization. Fixes: 22bedad3ce11 ("net: convert multicast list to list_head") Reported-by: syzbot+91161fe81857b396c8a0@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/000000000000ca9a81061a01ec20@google.com/ Signed-off-by: Oscar Maes Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250303155619.8918-1-oscmaes92@gmail.com Signed-off-by: Jakub Kicinski --- net/8021q/vlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index e45187b882206..41be38264493d 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -131,7 +131,8 @@ int vlan_check_real_dev(struct net_device *real_dev, { const char *name = real_dev->name; - if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { + if (real_dev->features & NETIF_F_VLAN_CHALLENGED || + real_dev->type != ARPHRD_ETHER) { pr_info("VLANs not supported on %s\n", name); NL_SET_ERR_MSG_MOD(extack, "VLANs not supported on device"); return -EOPNOTSUPP; From 0d3e0dfd68fb9e6b0ec865be9f3377cc3ff55733 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 5 Mar 2025 07:00:05 +0200 Subject: [PATCH 219/503] x86/sgx: Fix size overflows in sgx_encl_create() The total size calculated for EPC can overflow u64 given the added up page for SECS. Further, the total size calculated for shmem can overflow even when the EPC size stays within limits of u64, given that it adds the extra space for 128 byte PCMD structures (one for each page). Address this by pre-evaluating the micro-architectural requirement of SGX: the address space size must be power of two. This is eventually checked up by ECREATE but the pre-check has the additional benefit of making sure that there is some space for additional data. Fixes: 888d24911787 ("x86/sgx: Add SGX_IOC_ENCLAVE_CREATE") Reported-by: Dan Carpenter Signed-off-by: Jarkko Sakkinen Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Cc: Peter Zijlstra Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20250305050006.43896-1-jarkko@kernel.org Closes: https://lore.kernel.org/linux-sgx/c87e01a0-e7dd-4749-a348-0980d3444f04@stanley.mountain/ --- arch/x86/kernel/cpu/sgx/ioctl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index b65ab214bdf57..776a20172867e 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -64,6 +64,13 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) struct file *backing; long ret; + /* + * ECREATE would detect this too, but checking here also ensures + * that the 'encl_size' calculations below can never overflow. + */ + if (!is_power_of_2(secs->size)) + return -EINVAL; + va_page = sgx_encl_grow(encl, true); if (IS_ERR(va_page)) return PTR_ERR(va_page); From ca0dedaff92307591f66c9206933fbdfe87add10 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 5 Mar 2025 13:54:34 +0800 Subject: [PATCH 220/503] ALSA: hda/realtek: update ALC222 depop optimize Add ALC222 its own depop functions for alc_init and alc_shutup. [note: this fixes pop noise issues on the models with two headphone jacks -- tiwai ] Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 76 +++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 697a38e41e166..4ca457e7ca9dd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3843,6 +3843,79 @@ static void alc225_shutup(struct hda_codec *codec) } } +static void alc222_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = alc_get_hp_pin(spec); + bool hp1_pin_sense, hp2_pin_sense; + + if (!hp_pin) + return; + + msleep(30); + + hp1_pin_sense = snd_hda_jack_detect(codec, hp_pin); + hp2_pin_sense = snd_hda_jack_detect(codec, 0x14); + + if (hp1_pin_sense || hp2_pin_sense) { + msleep(2); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + msleep(75); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); + + msleep(75); + } +} + +static void alc222_shutup(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = alc_get_hp_pin(spec); + bool hp1_pin_sense, hp2_pin_sense; + + if (!hp_pin) + hp_pin = 0x21; + + hp1_pin_sense = snd_hda_jack_detect(codec, hp_pin); + hp2_pin_sense = snd_hda_jack_detect(codec, 0x14); + + if (hp1_pin_sense || hp2_pin_sense) { + msleep(2); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + + msleep(75); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + + msleep(75); + } + alc_auto_setup_eapd(codec, false); + alc_shutup_pins(codec); +} + static void alc_default_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -11907,8 +11980,11 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC300; spec->gen.mixer_nid = 0; /* no loopback on ALC300 */ break; + case 0x10ec0222: case 0x10ec0623: spec->codec_variant = ALC269_TYPE_ALC623; + spec->shutup = alc222_shutup; + spec->init_hook = alc222_init; break; case 0x10ec0700: case 0x10ec0701: From 1ee5aa765c22a0577ec552d460bf2035300b4b51 Mon Sep 17 00:00:00 2001 From: Hoku Ishibe Date: Sun, 23 Feb 2025 21:05:17 -0500 Subject: [PATCH 221/503] ALSA: hda: intel: Add Dell ALC3271 to power_save denylist Dell XPS 13 7390 with the Realtek ALC3271 codec experiences persistent humming noise when the power_save mode is enabled. This issue occurs when the codec enters power saving mode, leading to unwanted noise from the speakers. This patch adds the affected model (PCI ID 0x1028:0x0962) to the power_save denylist to ensure power_save is disabled by default, preventing power-off related noise issues. Steps to Reproduce 1. Boot the system with `snd_hda_intel` loaded. 2. Verify that `power_save` mode is enabled: ```sh cat /sys/module/snd_hda_intel/parameters/power_save ```` output: 10 (default power save timeout) 3. Wait for the power save timeout 4. Observe a persistent humming noise from the speakers 5. Disable `power_save` manually: ```sh echo 0 | sudo tee /sys/module/snd_hda_intel/parameters/power_save ```` 6. Confirm that the noise disappears immediately. This issue has been observed on my system, and this patch successfully eliminates the unwanted noise. If other users experience similar issues, additional reports would be helpful. Signed-off-by: Hoku Ishibe Cc: Link: https://patch.msgid.link/20250224020517.51035-1-me@hokuishi.be Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 67540e0373099..e67c22c59f02b 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2232,6 +2232,8 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x1631, 0xe017, "Packard Bell NEC IMEDIA 5204", 0), /* KONTRON SinglePC may cause a stall at runtime resume */ SND_PCI_QUIRK(0x1734, 0x1232, "KONTRON SinglePC", 0), + /* Dell ALC3271 */ + SND_PCI_QUIRK(0x1028, 0x0962, "Dell ALC3271", 0), {} }; From 50dc696c3a482ea35bd0691f728d47e40b668483 Mon Sep 17 00:00:00 2001 From: Aiden Ma Date: Tue, 4 Mar 2025 19:54:01 +0800 Subject: [PATCH 222/503] doc: correcting two prefix errors in idmappings.rst Add the 'k' prefix to id 21000. And id `u1000` in the third idmapping should be mapped to `k31000`, not `u31000`. Signed-off-by: Aiden Ma Link: https://lore.kernel.org/r/tencent_4E7B1F143E8051530C21FCADF4E014DCBB06@qq.com Signed-off-by: Christian Brauner --- Documentation/filesystems/idmappings.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst index 77930c77fcfe6..2a206129f8284 100644 --- a/Documentation/filesystems/idmappings.rst +++ b/Documentation/filesystems/idmappings.rst @@ -63,8 +63,8 @@ what id ``k11000`` corresponds to in the second or third idmapping. The straightforward algorithm to use is to apply the inverse of the first idmapping, mapping ``k11000`` up to ``u1000``. Afterwards, we can map ``u1000`` down using either the second idmapping mapping or third idmapping mapping. The second -idmapping would map ``u1000`` down to ``21000``. The third idmapping would map -``u1000`` down to ``u31000``. +idmapping would map ``u1000`` down to ``k21000``. The third idmapping would map +``u1000`` down to ``k31000``. If we were given the same task for the following three idmappings:: From 9ba93cb8212d62bccd8b41b8adb6656abf37280a Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Wed, 5 Mar 2025 10:28:41 +0530 Subject: [PATCH 223/503] platform/x86/amd/pmf: Propagate PMF-TA return codes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the amd_pmf_invoke_cmd_init() function within the PMF driver ensure that the actual result from the PMF-TA is returned rather than a generic EIO. This change allows for proper handling of errors originating from the PMF-TA. Reviewed-by: Mario Limonciello Co-developed-by: Patil Rajesh Reddy Signed-off-by: Patil Rajesh Reddy Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250305045842.4117767-1-Shyam-sundar.S-k@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/tee-if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 8c88769ea1d87..b404764550c4c 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -323,7 +323,7 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) } else { dev_err(dev->dev, "ta invoke cmd init failed err: %x\n", res); dev->smart_pc_enabled = false; - return -EIO; + return res; } return 0; From 376a8c2a144397d9cf2a67d403dd64f4a7ff9104 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Wed, 5 Mar 2025 10:28:42 +0530 Subject: [PATCH 224/503] platform/x86/amd/pmf: Update PMF Driver for Compatibility with new PMF-TA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PMF driver allocates a shared memory buffer using tee_shm_alloc_kernel_buf() for communication with the PMF-TA. The latest PMF-TA version introduces new structures with OEM debug information and additional policy input conditions for evaluating the policy binary. Consequently, the shared memory size must be increased to ensure compatibility between the PMF driver and the updated PMF-TA. To do so, introduce the new PMF-TA UUID and update the PMF shared memory configuration to ensure compatibility with the latest PMF-TA version. Additionally, export the TA UUID. These updates will result in modifications to the prototypes of amd_pmf_tee_init() and amd_pmf_ta_open_session(). Link: https://lore.kernel.org/all/55ac865f-b1c7-fa81-51c4-d211c7963e7e@linux.intel.com/ Reviewed-by: Mario Limonciello Co-developed-by: Patil Rajesh Reddy Signed-off-by: Patil Rajesh Reddy Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250305045842.4117767-2-Shyam-sundar.S-k@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/pmf.h | 5 ++- drivers/platform/x86/amd/pmf/tee-if.c | 50 +++++++++++++++++++-------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h index 41b2b91b8fdc6..e6bdee68ccf34 100644 --- a/drivers/platform/x86/amd/pmf/pmf.h +++ b/drivers/platform/x86/amd/pmf/pmf.h @@ -106,9 +106,12 @@ struct cookie_header { #define PMF_TA_IF_VERSION_MAJOR 1 #define TA_PMF_ACTION_MAX 32 #define TA_PMF_UNDO_MAX 8 -#define TA_OUTPUT_RESERVED_MEM 906 +#define TA_OUTPUT_RESERVED_MEM 922 #define MAX_OPERATION_PARAMS 4 +#define TA_ERROR_CRYPTO_INVALID_PARAM 0x20002 +#define TA_ERROR_CRYPTO_BIN_TOO_LARGE 0x2000d + #define PMF_IF_V1 1 #define PMF_IF_V2 2 diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index b404764550c4c..ceaff1ebb7b93 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -27,8 +27,11 @@ module_param(pb_side_load, bool, 0444); MODULE_PARM_DESC(pb_side_load, "Sideload policy binaries debug policy failures"); #endif -static const uuid_t amd_pmf_ta_uuid = UUID_INIT(0x6fd93b77, 0x3fb8, 0x524d, - 0xb1, 0x2d, 0xc5, 0x29, 0xb1, 0x3d, 0x85, 0x43); +static const uuid_t amd_pmf_ta_uuid[] = { UUID_INIT(0xd9b39bf2, 0x66bd, 0x4154, 0xaf, 0xb8, 0x8a, + 0xcc, 0x2b, 0x2b, 0x60, 0xd6), + UUID_INIT(0x6fd93b77, 0x3fb8, 0x524d, 0xb1, 0x2d, 0xc5, + 0x29, 0xb1, 0x3d, 0x85, 0x43), + }; static const char *amd_pmf_uevent_as_str(unsigned int state) { @@ -321,7 +324,7 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) */ schedule_delayed_work(&dev->pb_work, msecs_to_jiffies(pb_actions_ms * 3)); } else { - dev_err(dev->dev, "ta invoke cmd init failed err: %x\n", res); + dev_dbg(dev->dev, "ta invoke cmd init failed err: %x\n", res); dev->smart_pc_enabled = false; return res; } @@ -390,12 +393,12 @@ static int amd_pmf_amdtee_ta_match(struct tee_ioctl_version_data *ver, const voi return ver->impl_id == TEE_IMPL_ID_AMDTEE; } -static int amd_pmf_ta_open_session(struct tee_context *ctx, u32 *id) +static int amd_pmf_ta_open_session(struct tee_context *ctx, u32 *id, const uuid_t *uuid) { struct tee_ioctl_open_session_arg sess_arg = {}; int rc; - export_uuid(sess_arg.uuid, &amd_pmf_ta_uuid); + export_uuid(sess_arg.uuid, uuid); sess_arg.clnt_login = TEE_IOCTL_LOGIN_PUBLIC; sess_arg.num_params = 0; @@ -434,7 +437,7 @@ static int amd_pmf_register_input_device(struct amd_pmf_dev *dev) return 0; } -static int amd_pmf_tee_init(struct amd_pmf_dev *dev) +static int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid) { u32 size; int ret; @@ -445,7 +448,7 @@ static int amd_pmf_tee_init(struct amd_pmf_dev *dev) return PTR_ERR(dev->tee_ctx); } - ret = amd_pmf_ta_open_session(dev->tee_ctx, &dev->session_id); + ret = amd_pmf_ta_open_session(dev->tee_ctx, &dev->session_id, uuid); if (ret) { dev_err(dev->dev, "Failed to open TA session (%d)\n", ret); ret = -EINVAL; @@ -489,7 +492,8 @@ static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev) int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) { - int ret; + bool status; + int ret, i; ret = apmf_check_smart_pc(dev); if (ret) { @@ -502,10 +506,6 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) return -ENODEV; } - ret = amd_pmf_tee_init(dev); - if (ret) - return ret; - INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd); ret = amd_pmf_set_dram_addr(dev, true); @@ -534,8 +534,30 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) goto error; } - ret = amd_pmf_start_policy_engine(dev); - if (ret) + for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) { + ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]); + if (ret) + return ret; + + ret = amd_pmf_start_policy_engine(dev); + switch (ret) { + case TA_PMF_TYPE_SUCCESS: + status = true; + break; + case TA_ERROR_CRYPTO_INVALID_PARAM: + case TA_ERROR_CRYPTO_BIN_TOO_LARGE: + amd_pmf_tee_deinit(dev); + status = false; + break; + default: + goto error; + } + + if (status) + break; + } + + if (!status && !pb_side_load) goto error; if (pb_side_load) From 12f65d1203507f7db3ba59930fe29a3b8eee9945 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Mon, 24 Feb 2025 23:31:26 +0900 Subject: [PATCH 225/503] gpio: aggregator: protect driver attr handlers against module unload Both new_device_store and delete_device_store touch module global resources (e.g. gpio_aggregator_lock). To prevent race conditions with module unload, a reference needs to be held. Add try_module_get() in these handlers. For new_device_store, this eliminates what appears to be the most dangerous scenario: if an id is allocated from gpio_aggregator_idr but platform_device_register has not yet been called or completed, a concurrent module unload could fail to unregister/delete the device, leaving behind a dangling platform device/GPIO forwarder. This can result in various issues. The following simple reproducer demonstrates these problems: #!/bin/bash while :; do # note: whether 'gpiochip0 0' exists or not does not matter. echo 'gpiochip0 0' > /sys/bus/platform/drivers/gpio-aggregator/new_device done & while :; do modprobe gpio-aggregator modprobe -r gpio-aggregator done & wait Starting with the following warning, several kinds of warnings will appear and the system may become unstable: ------------[ cut here ]------------ list_del corruption, ffff888103e2e980->next is LIST_POISON1 (dead000000000100) WARNING: CPU: 1 PID: 1327 at lib/list_debug.c:56 __list_del_entry_valid_or_report+0xa3/0x120 [...] RIP: 0010:__list_del_entry_valid_or_report+0xa3/0x120 [...] Call Trace: ? __list_del_entry_valid_or_report+0xa3/0x120 ? __warn.cold+0x93/0xf2 ? __list_del_entry_valid_or_report+0xa3/0x120 ? report_bug+0xe6/0x170 ? __irq_work_queue_local+0x39/0xe0 ? handle_bug+0x58/0x90 ? exc_invalid_op+0x13/0x60 ? asm_exc_invalid_op+0x16/0x20 ? __list_del_entry_valid_or_report+0xa3/0x120 gpiod_remove_lookup_table+0x22/0x60 new_device_store+0x315/0x350 [gpio_aggregator] kernfs_fop_write_iter+0x137/0x1f0 vfs_write+0x262/0x430 ksys_write+0x60/0xd0 do_syscall_64+0x6c/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] ---[ end trace 0000000000000000 ]--- Fixes: 828546e24280 ("gpio: Add GPIO Aggregator") Cc: stable@vger.kernel.org Signed-off-by: Koichiro Den Link: https://lore.kernel.org/r/20250224143134.3024598-2-koichiro.den@canonical.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aggregator.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 65f41cc3eafcc..d668ddb2e81d3 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -119,10 +119,15 @@ static ssize_t new_device_store(struct device_driver *driver, const char *buf, struct platform_device *pdev; int res, id; + if (!try_module_get(THIS_MODULE)) + return -ENOENT; + /* kernfs guarantees string termination, so count + 1 is safe */ aggr = kzalloc(sizeof(*aggr) + count + 1, GFP_KERNEL); - if (!aggr) - return -ENOMEM; + if (!aggr) { + res = -ENOMEM; + goto put_module; + } memcpy(aggr->args, buf, count + 1); @@ -161,6 +166,7 @@ static ssize_t new_device_store(struct device_driver *driver, const char *buf, } aggr->pdev = pdev; + module_put(THIS_MODULE); return count; remove_table: @@ -175,6 +181,8 @@ static ssize_t new_device_store(struct device_driver *driver, const char *buf, kfree(aggr->lookups); free_ga: kfree(aggr); +put_module: + module_put(THIS_MODULE); return res; } @@ -203,13 +211,19 @@ static ssize_t delete_device_store(struct device_driver *driver, if (error) return error; + if (!try_module_get(THIS_MODULE)) + return -ENOENT; + mutex_lock(&gpio_aggregator_lock); aggr = idr_remove(&gpio_aggregator_idr, id); mutex_unlock(&gpio_aggregator_lock); - if (!aggr) + if (!aggr) { + module_put(THIS_MODULE); return -ENOENT; + } gpio_aggregator_free(aggr); + module_put(THIS_MODULE); return count; } static DRIVER_ATTR_WO(delete_device); From f02c41f87cfe61440c18bf77d1ef0a884b9ee2b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Tue, 21 Jan 2025 14:58:33 +0100 Subject: [PATCH 226/503] gpio: rcar: Use raw_spinlock to protect register access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use raw_spinlock in order to fix spurious messages about invalid context when spinlock debugging is enabled. The lock is only used to serialize register access. [ 4.239592] ============================= [ 4.239595] [ BUG: Invalid wait context ] [ 4.239599] 6.13.0-rc7-arm64-renesas-05496-gd088502a519f #35 Not tainted [ 4.239603] ----------------------------- [ 4.239606] kworker/u8:5/76 is trying to lock: [ 4.239609] ffff0000091898a0 (&p->lock){....}-{3:3}, at: gpio_rcar_config_interrupt_input_mode+0x34/0x164 [ 4.239641] other info that might help us debug this: [ 4.239643] context-{5:5} [ 4.239646] 5 locks held by kworker/u8:5/76: [ 4.239651] #0: ffff0000080fb148 ((wq_completion)async){+.+.}-{0:0}, at: process_one_work+0x190/0x62c [ 4.250180] OF: /soc/sound@ec500000/ports/port@0/endpoint: Read of boolean property 'frame-master' with a value. [ 4.254094] #1: ffff80008299bd80 ((work_completion)(&entry->work)){+.+.}-{0:0}, at: process_one_work+0x1b8/0x62c [ 4.254109] #2: ffff00000920c8f8 [ 4.258345] OF: /soc/sound@ec500000/ports/port@1/endpoint: Read of boolean property 'bitclock-master' with a value. [ 4.264803] (&dev->mutex){....}-{4:4}, at: __device_attach_async_helper+0x3c/0xdc [ 4.264820] #3: ffff00000a50ca40 (request_class#2){+.+.}-{4:4}, at: __setup_irq+0xa0/0x690 [ 4.264840] #4: [ 4.268872] OF: /soc/sound@ec500000/ports/port@1/endpoint: Read of boolean property 'frame-master' with a value. [ 4.273275] ffff00000a50c8c8 (lock_class){....}-{2:2}, at: __setup_irq+0xc4/0x690 [ 4.296130] renesas_sdhi_internal_dmac ee100000.mmc: mmc1 base at 0x00000000ee100000, max clock rate 200 MHz [ 4.304082] stack backtrace: [ 4.304086] CPU: 1 UID: 0 PID: 76 Comm: kworker/u8:5 Not tainted 6.13.0-rc7-arm64-renesas-05496-gd088502a519f #35 [ 4.304092] Hardware name: Renesas Salvator-X 2nd version board based on r8a77965 (DT) [ 4.304097] Workqueue: async async_run_entry_fn [ 4.304106] Call trace: [ 4.304110] show_stack+0x14/0x20 (C) [ 4.304122] dump_stack_lvl+0x6c/0x90 [ 4.304131] dump_stack+0x14/0x1c [ 4.304138] __lock_acquire+0xdfc/0x1584 [ 4.426274] lock_acquire+0x1c4/0x33c [ 4.429942] _raw_spin_lock_irqsave+0x5c/0x80 [ 4.434307] gpio_rcar_config_interrupt_input_mode+0x34/0x164 [ 4.440061] gpio_rcar_irq_set_type+0xd4/0xd8 [ 4.444422] __irq_set_trigger+0x5c/0x178 [ 4.448435] __setup_irq+0x2e4/0x690 [ 4.452012] request_threaded_irq+0xc4/0x190 [ 4.456285] devm_request_threaded_irq+0x7c/0xf4 [ 4.459398] ata1: link resume succeeded after 1 retries [ 4.460902] mmc_gpiod_request_cd_irq+0x68/0xe0 [ 4.470660] mmc_start_host+0x50/0xac [ 4.474327] mmc_add_host+0x80/0xe4 [ 4.477817] tmio_mmc_host_probe+0x2b0/0x440 [ 4.482094] renesas_sdhi_probe+0x488/0x6f4 [ 4.486281] renesas_sdhi_internal_dmac_probe+0x60/0x78 [ 4.491509] platform_probe+0x64/0xd8 [ 4.495178] really_probe+0xb8/0x2a8 [ 4.498756] __driver_probe_device+0x74/0x118 [ 4.503116] driver_probe_device+0x3c/0x154 [ 4.507303] __device_attach_driver+0xd4/0x160 [ 4.511750] bus_for_each_drv+0x84/0xe0 [ 4.515588] __device_attach_async_helper+0xb0/0xdc [ 4.520470] async_run_entry_fn+0x30/0xd8 [ 4.524481] process_one_work+0x210/0x62c [ 4.528494] worker_thread+0x1ac/0x340 [ 4.532245] kthread+0x10c/0x110 [ 4.535476] ret_from_fork+0x10/0x20 Signed-off-by: Niklas Söderlund Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250121135833.3769310-1-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-rcar.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index 2ecee3269a0cc..8e0544e924886 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -40,7 +40,7 @@ struct gpio_rcar_info { struct gpio_rcar_priv { void __iomem *base; - spinlock_t lock; + raw_spinlock_t lock; struct device *dev; struct gpio_chip gpio_chip; unsigned int irq_parent; @@ -123,7 +123,7 @@ static void gpio_rcar_config_interrupt_input_mode(struct gpio_rcar_priv *p, * "Setting Level-Sensitive Interrupt Input Mode" */ - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); /* Configure positive or negative logic in POSNEG */ gpio_rcar_modify_bit(p, POSNEG, hwirq, !active_high_rising_edge); @@ -142,7 +142,7 @@ static void gpio_rcar_config_interrupt_input_mode(struct gpio_rcar_priv *p, if (!level_trigger) gpio_rcar_write(p, INTCLR, BIT(hwirq)); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static int gpio_rcar_irq_set_type(struct irq_data *d, unsigned int type) @@ -246,7 +246,7 @@ static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip, * "Setting General Input Mode" */ - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); /* Configure positive logic in POSNEG */ gpio_rcar_modify_bit(p, POSNEG, gpio, false); @@ -261,7 +261,7 @@ static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip, if (p->info.has_outdtsel && output) gpio_rcar_modify_bit(p, OUTDTSEL, gpio, false); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static int gpio_rcar_request(struct gpio_chip *chip, unsigned offset) @@ -347,7 +347,7 @@ static int gpio_rcar_get_multiple(struct gpio_chip *chip, unsigned long *mask, return 0; } - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); outputs = gpio_rcar_read(p, INOUTSEL); m = outputs & bankmask; if (m) @@ -356,7 +356,7 @@ static int gpio_rcar_get_multiple(struct gpio_chip *chip, unsigned long *mask, m = ~outputs & bankmask; if (m) val |= gpio_rcar_read(p, INDT) & m; - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); bits[0] = val; return 0; @@ -367,9 +367,9 @@ static void gpio_rcar_set(struct gpio_chip *chip, unsigned offset, int value) struct gpio_rcar_priv *p = gpiochip_get_data(chip); unsigned long flags; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); gpio_rcar_modify_bit(p, OUTDT, offset, value); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static void gpio_rcar_set_multiple(struct gpio_chip *chip, unsigned long *mask, @@ -386,12 +386,12 @@ static void gpio_rcar_set_multiple(struct gpio_chip *chip, unsigned long *mask, if (!bankmask) return; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); val = gpio_rcar_read(p, OUTDT); val &= ~bankmask; val |= (bankmask & bits[0]); gpio_rcar_write(p, OUTDT, val); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset, @@ -505,7 +505,7 @@ static int gpio_rcar_probe(struct platform_device *pdev) return -ENOMEM; p->dev = dev; - spin_lock_init(&p->lock); + raw_spin_lock_init(&p->lock); /* Get device configuration from DT node */ ret = gpio_rcar_parse_dt(p, &npins); From 6697f819a10b238ccf01998c3f203d65d8374696 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Fri, 22 Nov 2024 10:50:55 +0800 Subject: [PATCH 227/503] exfat: fix just enough dentries but allocate a new cluster to dir This commit fixes the condition for allocating cluster to parent directory to avoid allocating new cluster to parent directory when there are just enough empty directory entries at the end of the parent directory. Fixes: af02c72d0b62 ("exfat: convert exfat_find_empty_entry() to use dentry cache") Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 691dd77b6ab5f..5b16181a4c2e9 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -232,7 +232,7 @@ static int exfat_search_empty_slot(struct super_block *sb, dentry = 0; } - while (dentry + num_entries < total_entries && + while (dentry + num_entries <= total_entries && clu.dir != EXFAT_EOF_CLUSTER) { i = dentry & (dentries_per_clu - 1); From 9da33619e0ca53627641bc97d1b93ec741299111 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 31 Jan 2025 12:55:55 +0900 Subject: [PATCH 228/503] exfat: fix soft lockup in exfat_clear_bitmap bitmap clear loop will take long time in __exfat_free_cluster() if data size of file/dir enty is invalid. If cluster bit in bitmap is already clear, stop clearing bitmap go to out of loop. Fixes: 31023864e67a ("exfat: add fat entry operations") Reported-by: Kun Hu , Jiaji Qin Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/balloc.c | 10 ++++++++-- fs/exfat/exfat_fs.h | 2 +- fs/exfat/fatent.c | 11 +++++++---- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index ce9be95c9172f..9ff825f1502d5 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -141,7 +141,7 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync) return 0; } -void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) +int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) { int i, b; unsigned int ent_idx; @@ -150,13 +150,17 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) struct exfat_mount_options *opts = &sbi->options; if (!is_valid_cluster(sbi, clu)) - return; + return -EIO; ent_idx = CLUSTER_TO_BITMAP_ENT(clu); i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); + if (!test_bit_le(b, sbi->vol_amap[i]->b_data)) + return -EIO; + clear_bit_le(b, sbi->vol_amap[i]->b_data); + exfat_update_bh(sbi->vol_amap[i], sync); if (opts->discard) { @@ -171,6 +175,8 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) opts->discard = 0; } } + + return 0; } /* diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 78be6964a8a08..d30ce18a88b7a 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -456,7 +456,7 @@ int exfat_count_num_clusters(struct super_block *sb, int exfat_load_bitmap(struct super_block *sb); void exfat_free_bitmap(struct exfat_sb_info *sbi); int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync); -void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync); +int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync); unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu); int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count); int exfat_trim_fs(struct inode *inode, struct fstrim_range *range); diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 9e5492ac409b0..6f3651c6ca91e 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -175,6 +175,7 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain BITMAP_OFFSET_SECTOR_INDEX(sb, CLUSTER_TO_BITMAP_ENT(clu)); if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + int err; unsigned int last_cluster = p_chain->dir + p_chain->size - 1; do { bool sync = false; @@ -189,7 +190,9 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain cur_cmap_i = next_cmap_i; } - exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + err = exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + if (err) + break; clu++; num_clusters++; } while (num_clusters < p_chain->size); @@ -210,12 +213,13 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain cur_cmap_i = next_cmap_i; } - exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + if (exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode)))) + break; clu = n_clu; num_clusters++; if (err) - goto dec_used_clus; + break; if (num_clusters >= sbi->num_clusters - EXFAT_FIRST_CLUSTER) { /* @@ -229,7 +233,6 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain } while (clu != EXFAT_EOF_CLUSTER); } -dec_used_clus: sbi->used_clusters -= num_clusters; return 0; } From fda94a9919fd632033979ad7765a99ae3cab9289 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 11 Feb 2025 14:14:21 -0600 Subject: [PATCH 229/503] exfat: short-circuit zero-byte writes in exfat_file_write_iter When generic_write_checks() returns zero, it means that iov_iter_count() is zero, and there is no work to do. Simply return success like all other filesystems do, rather than proceeding down the write path, which today yields an -EFAULT in generic_perform_write() via the (fault_in_iov_iter_readable(i, bytes) == bytes) check when bytes == 0. Fixes: 11a347fb6cef ("exfat: change to get file size from DataLength") Reported-by: Noah Signed-off-by: Eric Sandeen Reviewed-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 05b51e7217838..807349d8ea050 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -587,7 +587,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) valid_size = ei->valid_size; ret = generic_write_checks(iocb, iter); - if (ret < 0) + if (ret <= 0) goto unlock; if (iocb->ki_flags & IOCB_DIRECT) { From 13940cef95491472760ca261b6713692ece9b946 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Sat, 8 Feb 2025 17:16:58 +0800 Subject: [PATCH 230/503] exfat: add a check for invalid data size Add a check for invalid data size to avoid corrupted filesystem from being further corrupted. Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/namei.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 5b16181a4c2e9..8b30027d82512 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -646,6 +646,11 @@ static int exfat_find(struct inode *dir, struct qstr *qname, info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size); info->size = le64_to_cpu(ep2->dentry.stream.size); + if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) { + exfat_fs_error(sb, "data size is invalid(%lld)", info->size); + return -EIO; + } + info->start_clu = le32_to_cpu(ep2->dentry.stream.start_clu); if (!is_valid_cluster(sbi, info->start_clu) && info->size) { exfat_warn(sb, "start_clu is invalid cluster(0x%x)", From 3c9231ea6497dfc50ac0ef69fff484da27d0df66 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Tue, 4 Mar 2025 08:44:29 +0800 Subject: [PATCH 231/503] net-timestamp: support TCP GSO case for a few missing flags When I read through the TSO codes, I found out that we probably miss initializing the tx_flags of last seg when TSO is turned off, which means at the following points no more timestamp (for this last one) will be generated. There are three flags to be handled in this patch: 1. SKBTX_HW_TSTAMP 2. SKBTX_BPF 3. SKBTX_SCHED_TSTAMP Note that SKBTX_BPF[1] was added in 6.14.0-rc2 by commit 6b98ec7e882af ("bpf: Add BPF_SOCK_OPS_TSTAMP_SCHED_CB callback") and only belongs to net-next branch material for now. The common issue of the above three flags can be fixed by this single patch. This patch initializes the tx_flags to SKBTX_ANY_TSTAMP like what the UDP GSO does to make the newly segmented last skb inherit the tx_flags so that requested timestamp will be generated in each certain layer, or else that last one has zero value of tx_flags which leads to no timestamp at all. Fixes: 4ed2d765dfacc ("net-timestamp: TCP timestamping") Signed-off-by: Jason Xing Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 2308665b51c53..2dfac79dc78b8 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -13,12 +13,15 @@ #include #include -static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, +static void tcp_gso_tstamp(struct sk_buff *skb, struct sk_buff *gso_skb, unsigned int seq, unsigned int mss) { + u32 flags = skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP; + u32 ts_seq = skb_shinfo(gso_skb)->tskey; + while (skb) { if (before(ts_seq, seq + mss)) { - skb_shinfo(skb)->tx_flags |= SKBTX_SW_TSTAMP; + skb_shinfo(skb)->tx_flags |= flags; skb_shinfo(skb)->tskey = ts_seq; return; } @@ -193,8 +196,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th = tcp_hdr(skb); seq = ntohl(th->seq); - if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP)) - tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss); + if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP)) + tcp_gso_tstamp(segs, gso_skb, seq, mss); newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta)); From 927e6bec5cf3624665b0a2e9f64a1d32f3d22cdd Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 5 Mar 2025 21:41:13 +0800 Subject: [PATCH 232/503] ASoC: rt1320: set wake_capable = 0 explicitly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "generic_new_peripheral_assigned: invalid dev_num 1, wake supported 1" is reported by our internal CI test. Rt1320's wake feature is not used in Linux and that's why it is not in the wake_capable_list[] list in intel_auxdevice.c. However, BIOS may set it as wake-capable. Overwrite wake_capable to 0 in the codec driver to align with wake_capable_list[]. Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Acked-by: Shuming Fan Link: https://patch.msgid.link/20250305134113.201326-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1320-sdw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/rt1320-sdw.c b/sound/soc/codecs/rt1320-sdw.c index 3510c3819074b..d83b236a04503 100644 --- a/sound/soc/codecs/rt1320-sdw.c +++ b/sound/soc/codecs/rt1320-sdw.c @@ -535,6 +535,9 @@ static int rt1320_read_prop(struct sdw_slave *slave) /* set the timeout values */ prop->clk_stop_timeout = 64; + /* BIOS may set wake_capable. Make sure it is 0 as wake events are disabled. */ + prop->wake_capable = 0; + return 0; } From 5ac60242b0173be83709603ebaf27a473f16c4e4 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Tue, 4 Mar 2025 14:34:26 -0700 Subject: [PATCH 233/503] ublk: set_params: properly check if parameters can be applied The parameters set by the set_params call are only applied to the block device in the start_dev call. So if a device has already been started, a subsequently issued set_params on that device will not have the desired effect, and should return an error. There is an existing check for this - set_params fails on devices in the LIVE state. But this check is not sufficient to cover the recovery case. In this case, the device will be in the QUIESCED or FAIL_IO states, so set_params will succeed. But this success is misleading, because the parameters will not be applied, since the device has already been started (by a previous ublk server). The bit UB_STATE_USED is set on completion of the start_dev; use it to detect and fail set_params commands which arrive too late to be applied (after start_dev). Signed-off-by: Uday Shankar Fixes: 0aa73170eba5 ("ublk_drv: add SET_PARAMS/GET_PARAMS control command") Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250304-set_params-v1-1-17b5e0887606@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 529085181f355..ca9a67b5b537a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2715,9 +2715,12 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, if (ph.len > sizeof(struct ublk_params)) ph.len = sizeof(struct ublk_params); - /* parameters can only be changed when device isn't live */ mutex_lock(&ub->mutex); - if (ub->dev_info.state == UBLK_S_DEV_LIVE) { + if (test_bit(UB_STATE_USED, &ub->state)) { + /* + * Parameters can only be changed when device hasn't + * been started yet + */ ret = -EACCES; } else if (copy_from_user(&ub->params, argp, ph.len)) { ret = -EFAULT; From e06472bab2a5393430cc2fbc3211cd3602422c1e Mon Sep 17 00:00:00 2001 From: Olivier Gayot Date: Wed, 5 Mar 2025 10:21:54 +0800 Subject: [PATCH 234/503] block: fix conversion of GPT partition name to 7-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The utf16_le_to_7bit function claims to, naively, convert a UTF-16 string to a 7-bit ASCII string. By naively, we mean that it: * drops the first byte of every character in the original UTF-16 string * checks if all characters are printable, and otherwise replaces them by exclamation mark "!". This means that theoretically, all characters outside the 7-bit ASCII range should be replaced by another character. Examples: * lower-case alpha (ɒ) 0x0252 becomes 0x52 (R) * ligature OE (œ) 0x0153 becomes 0x53 (S) * hangul letter pieup (ㅂ) 0x3142 becomes 0x42 (B) * upper-case gamma (Ɣ) 0x0194 becomes 0x94 (not printable) so gets replaced by "!" The result of this conversion for the GPT partition name is passed to user-space as PARTNAME via udev, which is confusing and feels questionable. However, there is a flaw in the conversion function itself. By dropping one byte of each character and using isprint() to check if the remaining byte corresponds to a printable character, we do not actually guarantee that the resulting character is 7-bit ASCII. This happens because we pass 8-bit characters to isprint(), which in the kernel returns 1 for many values > 0x7f - as defined in ctype.c. This results in many values which should be replaced by "!" to be kept as-is, despite not being valid 7-bit ASCII. Examples: * e with acute accent (é) 0x00E9 becomes 0xE9 - kept as-is because isprint(0xE9) returns 1. * euro sign (€) 0x20AC becomes 0xAC - kept as-is because isprint(0xAC) returns 1. This way has broken pyudev utility[1], fixes it by using a mask of 7 bits instead of 8 bits before calling isprint. Link: https://github.com/pyudev/pyudev/issues/490#issuecomment-2685794648 [1] Link: https://lore.kernel.org/linux-block/4cac90c2-e414-4ebb-ae62-2a4589d9dc6e@canonical.com/ Cc: Mulhern Cc: Davidlohr Bueso Cc: stable@vger.kernel.org Signed-off-by: Olivier Gayot Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250305022154.3903128-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/partitions/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 5e9be13a56a82..7acba66eed481 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -682,7 +682,7 @@ static void utf16_le_to_7bit(const __le16 *in, unsigned int size, u8 *out) out[size] = 0; while (i < size) { - u8 c = le16_to_cpu(in[i]) & 0xff; + u8 c = le16_to_cpu(in[i]) & 0x7f; if (c && !isprint(c)) c = '!'; From 3be83ee9de0298f8321aa0b148d8f9995102e40f Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 9 Dec 2024 15:08:53 +0100 Subject: [PATCH 235/503] ice: do not configure destination override for switchdev After switchdev is enabled and disabled later, LLDP packets sending stops, despite working perfectly fine before and during switchdev state. To reproduce (creating/destroying VF is what triggers the reconfiguration): devlink dev eswitch set pci/
mode switchdev echo '2' > /sys/class/net//device/sriov_numvfs echo '0' > /sys/class/net//device/sriov_numvfs This happens because LLDP relies on the destination override functionality. It needs to 1) set a flag in the descriptor, 2) set the VSI permission to make it valid. The permissions are set when the PF VSI is first configured, but switchdev then enables it for the uplink VSI (which is always the PF) once more when configured and disables when deconfigured, which leads to software-generated LLDP packets being blocked. Do not modify the destination override permissions when configuring switchdev, as the enabled state is the default configuration that is never modified. Fixes: 1a1c40df2e80 ("ice: set and release switchdev environment") Reviewed-by: Michal Swiatkowski Signed-off-by: Larysa Zaremba Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 6 ------ drivers/net/ethernet/intel/ice/ice_lib.c | 18 ------------------ drivers/net/ethernet/intel/ice/ice_lib.h | 4 ---- 3 files changed, 28 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index d649c197cf673..ed21d7f55ac11 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -49,9 +49,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) if (vlan_ops->dis_rx_filtering(uplink_vsi)) goto err_vlan_filtering; - if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override)) - goto err_override_uplink; - if (ice_vsi_update_local_lb(uplink_vsi, true)) goto err_override_local_lb; @@ -63,8 +60,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) err_up: ice_vsi_update_local_lb(uplink_vsi, false); err_override_local_lb: - ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); -err_override_uplink: vlan_ops->ena_rx_filtering(uplink_vsi); err_vlan_filtering: ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, @@ -275,7 +270,6 @@ static void ice_eswitch_release_env(struct ice_pf *pf) vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); ice_vsi_update_local_lb(uplink_vsi, false); - ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); vlan_ops->ena_rx_filtering(uplink_vsi); ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, ICE_FLTR_TX); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 38a1c8372180b..d0faa087793da 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3936,24 +3936,6 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx) ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); } -/** - * ice_vsi_ctx_set_allow_override - allow destination override on VSI - * @ctx: pointer to VSI ctx structure - */ -void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx) -{ - ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; -} - -/** - * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI - * @ctx: pointer to VSI ctx structure - */ -void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx) -{ - ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; -} - /** * ice_vsi_update_local_lb - update sw block in VSI with local loopback bit * @vsi: pointer to VSI structure diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index eabb35834a245..b4c9cb28a016e 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -105,10 +105,6 @@ ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)) void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx); void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx); - -void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx); - -void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx); int ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set); int ice_vsi_add_vlan_zero(struct ice_vsi *vsi); int ice_vsi_del_vlan_zero(struct ice_vsi *vsi); From 3b4035ddbfc8e4521f85569998a7569668cccf51 Mon Sep 17 00:00:00 2001 From: Zecheng Li Date: Tue, 4 Mar 2025 21:40:31 +0000 Subject: [PATCH 236/503] sched/fair: Fix potential memory corruption in child_cfs_rq_on_list child_cfs_rq_on_list attempts to convert a 'prev' pointer to a cfs_rq. This 'prev' pointer can originate from struct rq's leaf_cfs_rq_list, making the conversion invalid and potentially leading to memory corruption. Depending on the relative positions of leaf_cfs_rq_list and the task group (tg) pointer within the struct, this can cause a memory fault or access garbage data. The issue arises in list_add_leaf_cfs_rq, where both cfs_rq->leaf_cfs_rq_list and rq->leaf_cfs_rq_list are added to the same leaf list. Also, rq->tmp_alone_branch can be set to rq->leaf_cfs_rq_list. This adds a check `if (prev == &rq->leaf_cfs_rq_list)` after the main conditional in child_cfs_rq_on_list. This ensures that the container_of operation will convert a correct cfs_rq struct. This check is sufficient because only cfs_rqs on the same CPU are added to the list, so verifying the 'prev' pointer against the current rq's list head is enough. Fixes a potential memory corruption issue that due to current struct layout might not be manifesting as a crash but could lead to unpredictable behavior when the layout changes. Fixes: fdaba61ef8a2 ("sched/fair: Ensure that the CFS parent is added after unthrottling") Signed-off-by: Zecheng Li Reviewed-and-tested-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20250304214031.2882646-1-zecheng@google.com --- kernel/sched/fair.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1c0ef435a7aae..c798d27952431 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4045,15 +4045,17 @@ static inline bool child_cfs_rq_on_list(struct cfs_rq *cfs_rq) { struct cfs_rq *prev_cfs_rq; struct list_head *prev; + struct rq *rq = rq_of(cfs_rq); if (cfs_rq->on_list) { prev = cfs_rq->leaf_cfs_rq_list.prev; } else { - struct rq *rq = rq_of(cfs_rq); - prev = rq->tmp_alone_branch; } + if (prev == &rq->leaf_cfs_rq_list) + return false; + prev_cfs_rq = container_of(prev, struct cfs_rq, leaf_cfs_rq_list); return (prev_cfs_rq->tg->parent == cfs_rq->tg); From 23d97f18901ef5e4e264e3b1777fe65c760186b5 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Thu, 23 Jan 2025 09:15:39 +0100 Subject: [PATCH 237/503] ice: fix memory leak in aRFS after reset Fix aRFS (accelerated Receive Flow Steering) structures memory leak by adding a checker to verify if aRFS memory is already allocated while configuring VSI. aRFS objects are allocated in two cases: - as part of VSI initialization (at probe), and - as part of reset handling However, VSI reconfiguration executed during reset involves memory allocation one more time, without prior releasing already allocated resources. This led to the memory leak with the following signature: [root@os-delivery ~]# cat /sys/kernel/debug/kmemleak unreferenced object 0xff3c1ca7252e6000 (size 8192): comm "kworker/0:0", pid 8, jiffies 4296833052 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): [] __kmalloc_cache_noprof+0x275/0x340 [] ice_init_arfs+0x3a/0xe0 [ice] [] ice_vsi_cfg_def+0x607/0x850 [ice] [] ice_vsi_setup+0x5b/0x130 [ice] [] ice_init+0x1c1/0x460 [ice] [] ice_probe+0x2af/0x520 [ice] [] local_pci_probe+0x43/0xa0 [] work_for_cpu_fn+0x13/0x20 [] process_one_work+0x179/0x390 [] worker_thread+0x239/0x340 [] kthread+0xcc/0x100 [] ret_from_fork+0x2d/0x50 [] ret_from_fork_asm+0x1a/0x30 ... Fixes: 28bf26724fdb ("ice: Implement aRFS") Reviewed-by: Michal Swiatkowski Signed-off-by: Grzegorz Nitka Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 7cee365cc7d16..405ddd17de1bf 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -511,7 +511,7 @@ void ice_init_arfs(struct ice_vsi *vsi) struct hlist_head *arfs_fltr_list; unsigned int i; - if (!vsi || vsi->type != ICE_VSI_PF) + if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi)) return; arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list), From dce97cb0a3e34204c0b99345418a714eac85953f Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Thu, 2 Jan 2025 20:07:52 +0100 Subject: [PATCH 238/503] ice: Fix switchdev slow-path in LAG Ever since removing switchdev control VSI and using PF for port representor Tx/Rx, switchdev slow-path has been working improperly after failover in SR-IOV LAG. LAG assumes that the first uplink to be added to the aggregate will own VFs and have switchdev configured. After failing-over to the other uplink, representors are still configured to Tx through the uplink they are set up on, which fails because that uplink is now down. On failover, update all PRs on primary uplink to use the currently active uplink for Tx. Call netif_keep_dst(), as the secondary uplink might not be in switchdev mode. Also make sure to call ice_eswitch_set_target_vsi() if uplink is in LAG. On the Rx path, representors are already working properly, because default Tx from VFs is set to PF owning the eswitch. After failover the same PF is receiving traffic from VFs, even though link is down. Fixes: defd52455aee ("ice: do Tx through PF netdev in slow-path") Reviewed-by: Michal Swiatkowski Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lag.c | 27 +++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_txrx.c | 4 +++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 1ccb572ce285d..22371011c2492 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1000,6 +1000,28 @@ static void ice_lag_link(struct ice_lag *lag) netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n"); } +/** + * ice_lag_config_eswitch - configure eswitch to work with LAG + * @lag: lag info struct + * @netdev: active network interface device struct + * + * Updates all port representors in eswitch to use @netdev for Tx. + * + * Configures the netdev to keep dst metadata (also used in representor Tx). + * This is required for an uplink without switchdev mode configured. + */ +static void ice_lag_config_eswitch(struct ice_lag *lag, + struct net_device *netdev) +{ + struct ice_repr *repr; + unsigned long id; + + xa_for_each(&lag->pf->eswitch.reprs, id, repr) + repr->dst->u.port_info.lower_dev = netdev; + + netif_keep_dst(netdev); +} + /** * ice_lag_unlink - handle unlink event * @lag: LAG info struct @@ -1021,6 +1043,9 @@ static void ice_lag_unlink(struct ice_lag *lag) ice_lag_move_vf_nodes(lag, act_port, pri_port); lag->primary = false; lag->active_port = ICE_LAG_INVALID_PORT; + + /* Config primary's eswitch back to normal operation. */ + ice_lag_config_eswitch(lag, lag->netdev); } else { struct ice_lag *primary_lag; @@ -1419,6 +1444,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) ice_lag_move_vf_nodes(lag, prim_port, event_port); lag->active_port = event_port; + ice_lag_config_eswitch(lag, event_netdev); return; } @@ -1428,6 +1454,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) /* new active port */ ice_lag_move_vf_nodes(lag, lag->active_port, event_port); lag->active_port = event_port; + ice_lag_config_eswitch(lag, event_netdev); } else { /* port not set as currently active (e.g. new active port * has already claimed the nodes and filters diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 9c9ea4c1b93b7..380ba1e8b3b2c 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -2424,7 +2424,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) ICE_TXD_CTX_QW1_CMD_S); ice_tstamp(tx_ring, skb, first, &offload); - if (ice_is_switchdev_running(vsi->back) && vsi->type != ICE_VSI_SF) + if ((ice_is_switchdev_running(vsi->back) || + ice_lag_is_switchdev_running(vsi->back)) && + vsi->type != ICE_VSI_SF) ice_eswitch_set_target_vsi(skb, &offload); if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { From 374c9faac5a763a05bc3f68ad9f73dab3c6aec90 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 26 Feb 2025 16:37:31 +0800 Subject: [PATCH 239/503] drm/amd/display: Fix null check for pipe_ctx->plane_state in resource_build_scaling_params Null pointer dereference issue could occur when pipe_ctx->plane_state is null. The fix adds a check to ensure 'pipe_ctx->plane_state' is not null before accessing. This prevents a null pointer dereference. Found by code review. Fixes: 3be5262e353b ("drm/amd/display: Rename more dc_surface stuff to plane_state") Reviewed-by: Alex Hung Signed-off-by: Ma Ke Signed-off-by: Alex Deucher (cherry picked from commit 63e6a77ccf239337baa9b1e7787cde9fa0462092) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 520a34a42827b..a45037cb4cc01 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1455,7 +1455,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); /* Invalid input */ - if (!plane_state->dst_rect.width || + if (!plane_state || + !plane_state->dst_rect.width || !plane_state->dst_rect.height || !plane_state->src_rect.width || !plane_state->src_rect.height) { From 2a3e89a14864090ee4804fcec655ffc15fabf45c Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 19 Feb 2025 14:30:39 +0100 Subject: [PATCH 240/503] ice: register devlink prior to creating health reporters ice_health_init() was introduced in the commit 2a82874a3b7b ("ice: add Tx hang devlink health reporter"). The call to it should have been put after ice_devlink_register(). It went unnoticed until next reporter by Konrad, which receives events from FW. FW is reporting all events, also from prior driver load, and thus it is not unlikely to have something at the very beginning. And that results in a splat: [ 24.455950] ? devlink_recover_notify.constprop.0+0x198/0x1b0 [ 24.455973] devlink_health_report+0x5d/0x2a0 [ 24.455976] ? __pfx_ice_health_status_lookup_compare+0x10/0x10 [ice] [ 24.456044] ice_process_health_status_event+0x1b7/0x200 [ice] Do the analogous thing for deinit patch. Fixes: 85d6164ec56d ("ice: add fw and port health reporters") Reviewed-by: Aleksandr Loktionov Reviewed-by: Michal Swiatkowski Reviewed-by: Konrad Knitter Signed-off-by: Przemek Kitszel Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c3a0fb97c5ee4..e13bd5a6cb6c4 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5065,16 +5065,16 @@ static int ice_init_devlink(struct ice_pf *pf) return err; ice_devlink_init_regions(pf); - ice_health_init(pf); ice_devlink_register(pf); + ice_health_init(pf); return 0; } static void ice_deinit_devlink(struct ice_pf *pf) { - ice_devlink_unregister(pf); ice_health_deinit(pf); + ice_devlink_unregister(pf); ice_devlink_destroy_regions(pf); ice_devlink_unregister_params(pf); } From fd617ea3b79d2116d53f76cdb5a3601c0ba6e42f Mon Sep 17 00:00:00 2001 From: Andrew Martin Date: Fri, 28 Feb 2025 11:26:48 -0500 Subject: [PATCH 241/503] drm/amdkfd: Fix NULL Pointer Dereference in KFD queue Through KFD IOCTL Fuzzing we encountered a NULL pointer derefrence when calling kfd_queue_acquire_buffers. Fixes: 629568d25fea ("drm/amdkfd: Validate queue cwsr area and eop buffer size") Signed-off-by: Andrew Martin Reviewed-by: Philip Yang Signed-off-by: Andrew Martin Signed-off-by: Alex Deucher (cherry picked from commit 049e5bf3c8406f87c3d8e1958e0a16804fa1d530) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index ecccd7adbab4d..24396a2c77bd0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -266,8 +266,8 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope /* EOP buffer is not required for all ASICs */ if (properties->eop_ring_buffer_address) { if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { - pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", - properties->eop_buf_bo->tbo.base.size, + pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n", + properties->eop_ring_buffer_size, topo_dev->node_props.eop_buffer_size); err = -EINVAL; goto out_err_unreserve; From c27c66afc449b80f3b4b84d123358c0248f2cf63 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 5 Mar 2025 07:08:09 -1000 Subject: [PATCH 242/503] fs/pipe: Fix pipe_occupancy() with 16-bit indexes The pipe_occupancy() logic implicitly relied on the natural unsigned modulo arithmetic in C, but that doesn't work for the new 'pipe_index_t' case, since any arithmetic will be done in 'int' (and here we had also made it 'unsigned int' due to the function call boundary). So make the modulo arithmetic explicit by casting the result to the proper type. Cc: Oleg Nesterov Cc: Mateusz Guzik Cc: Manfred Spraul Cc: Christian Brauner Cc: Swapnil Sapkal Cc: Alexey Gladkov Cc: K Prateek Nayak Link: https://lore.kernel.org/all/CAHk-=wjyHsGLx=rxg6PKYBNkPYAejgo7=CbyL3=HGLZLsAaJFQ@mail.gmail.com/ Fixes: 3d252160b818 ("fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex") Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 3cc4f8eab853f..1f013ed7577ef 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -192,7 +192,7 @@ static inline bool pipe_empty(unsigned int head, unsigned int tail) */ static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) { - return head - tail; + return (pipe_index_t)(head - tail); } /** From cfced12f5100e50d56bc587299393fd33c1169a9 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Wed, 5 Mar 2025 11:23:01 +0000 Subject: [PATCH 243/503] include/linux/pipe_fs_i: Add htmldoc annotation for "head_tail" member Add htmldoc annotation for the newly introduced "head_tail" member describing it to be a union of the pipe_inode_info's @head and @tail members. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20250305204609.5e64768e@canb.auug.org.au/ Fixes: 3d252160b818 ("fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex") Signed-off-by: K Prateek Nayak Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 1f013ed7577ef..05ccbc5d01294 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -65,6 +65,7 @@ union pipe_index { * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption + * @head_tail: unsigned long union of @head and @tail * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) From 0eba2a7e858907a746ba69cd002eb9eb4dbd7bf3 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 28 Feb 2025 15:14:56 +0000 Subject: [PATCH 244/503] ASoC: ops: Consistently treat platform_max as control value This reverts commit 9bdd10d57a88 ("ASoC: ops: Shift tested values in snd_soc_put_volsw() by +min"), and makes some additional related updates. There are two ways the platform_max could be interpreted; the maximum register value, or the maximum value the control can be set to. The patch moved from treating the value as a control value to a register one. When the patch was applied it was technically correct as snd_soc_limit_volume() also used the register interpretation. However, even then most of the other usages treated platform_max as a control value, and snd_soc_limit_volume() has since been updated to also do so in commit fb9ad24485087 ("ASoC: ops: add correct range check for limiting volume"). That patch however, missed updating snd_soc_put_volsw() back to the control interpretation, and fixing snd_soc_info_volsw_range(). The control interpretation makes more sense as limiting is typically done from the machine driver, so it is appropriate to use the customer facing representation rather than the internal codec representation. Update all the code to consistently use this interpretation of platform_max. Finally, also add some comments to the soc_mixer_control struct to hopefully avoid further patches switching between the two approaches. Fixes: fb9ad24485087 ("ASoC: ops: add correct range check for limiting volume") Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250228151456.3703342-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc.h | 5 ++++- sound/soc/soc-ops.c | 15 +++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index fcdb5adfcd5ec..b3e84bc47c6fd 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1261,7 +1261,10 @@ void snd_soc_close_delayed_work(struct snd_soc_pcm_runtime *rtd); /* mixer control */ struct soc_mixer_control { - int min, max, platform_max; + /* Minimum and maximum specified as written to the hardware */ + int min, max; + /* Limited maximum value specified as presented through the control */ + int platform_max; int reg, rreg; unsigned int shift, rshift; unsigned int sign_bit; diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 19928f098d8dc..b0e4e4168f38d 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -337,7 +337,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, if (ucontrol->value.integer.value[0] < 0) return -EINVAL; val = ucontrol->value.integer.value[0]; - if (mc->platform_max && ((int)val + min) > mc->platform_max) + if (mc->platform_max && val > mc->platform_max) return -EINVAL; if (val > max - min) return -EINVAL; @@ -350,7 +350,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, if (ucontrol->value.integer.value[1] < 0) return -EINVAL; val2 = ucontrol->value.integer.value[1]; - if (mc->platform_max && ((int)val2 + min) > mc->platform_max) + if (mc->platform_max && val2 > mc->platform_max) return -EINVAL; if (val2 > max - min) return -EINVAL; @@ -503,17 +503,16 @@ int snd_soc_info_volsw_range(struct snd_kcontrol *kcontrol, { struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value; - int platform_max; - int min = mc->min; + int max; - if (!mc->platform_max) - mc->platform_max = mc->max; - platform_max = mc->platform_max; + max = mc->max - mc->min; + if (mc->platform_max && mc->platform_max < max) + max = mc->platform_max; uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->count = snd_soc_volsw_is_stereo(mc) ? 2 : 1; uinfo->value.integer.min = 0; - uinfo->value.integer.max = platform_max - min; + uinfo->value.integer.max = max; return 0; } From 29ffeb73b216ce3eff10229eb077cf9b7812119d Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Mon, 17 Jun 2019 23:46:27 +0200 Subject: [PATCH 245/503] drm/radeon: Fix rs400_gpu_init for ATI mobility radeon Xpress 200M MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit num_gb_pipes was set to a wrong value using r420_pipe_config This have lead to HyperZ glitches on fast Z clearing. Closes: https://bugs.freedesktop.org/show_bug.cgi?id=110897 Reviewed-by: Marek Olšák Signed-off-by: Richard Thier Signed-off-by: Alex Deucher (cherry picked from commit 044e59a85c4d84e3c8d004c486e5c479640563a6) Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/r300.c | 3 ++- drivers/gpu/drm/radeon/radeon_asic.h | 1 + drivers/gpu/drm/radeon/rs400.c | 18 ++++++++++++++++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 05c13102a8cb8..d22889fbfa9c8 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -359,7 +359,8 @@ int r300_mc_wait_for_idle(struct radeon_device *rdev) return -1; } -static void r300_gpu_init(struct radeon_device *rdev) +/* rs400_gpu_init also calls this! */ +void r300_gpu_init(struct radeon_device *rdev) { uint32_t gb_tile_config, tmp; diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 1e00f6b99f94b..8f5e07834fcc6 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -165,6 +165,7 @@ void r200_set_safe_registers(struct radeon_device *rdev); */ extern int r300_init(struct radeon_device *rdev); extern void r300_fini(struct radeon_device *rdev); +extern void r300_gpu_init(struct radeon_device *rdev); extern int r300_suspend(struct radeon_device *rdev); extern int r300_resume(struct radeon_device *rdev); extern int r300_asic_reset(struct radeon_device *rdev, bool hard); diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index d6c18fd740ec6..13cd0a688a65c 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -256,8 +256,22 @@ int rs400_mc_wait_for_idle(struct radeon_device *rdev) static void rs400_gpu_init(struct radeon_device *rdev) { - /* FIXME: is this correct ? */ - r420_pipes_init(rdev); + /* Earlier code was calling r420_pipes_init and then + * rs400_mc_wait_for_idle(rdev). The problem is that + * at least on my Mobility Radeon Xpress 200M RC410 card + * that ends up in this code path ends up num_gb_pipes == 3 + * while the card seems to have only one pipe. With the + * r420 pipe initialization method. + * + * Problems shown up as HyperZ glitches, see: + * https://bugs.freedesktop.org/show_bug.cgi?id=110897 + * + * Delegating initialization to r300 code seems to work + * and results in proper pipe numbers. The rs400 cards + * are said to be not r400, but r300 kind of cards. + */ + r300_gpu_init(rdev); + if (rs400_mc_wait_for_idle(rdev)) { pr_warn("rs400: Failed to wait MC idle while programming pipes. Bad things might happen. %08x\n", RREG32(RADEON_MC_STATUS)); From da552bda987420e877500fdd90bd0172e3bf412b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 28 Feb 2025 17:02:11 +0800 Subject: [PATCH 246/503] drm/amd/pm: always allow ih interrupt from fw always allow ih interrupt from fw on smu v14 based on the interface requirement Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit a3199eba46c54324193607d9114a1e321292d7a1) Cc: stable@vger.kernel.org # 6.12.x --- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 9b2f4fe1578b8..ddb6444406d28 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -1895,16 +1895,6 @@ static int smu_v14_0_allow_ih_interrupt(struct smu_context *smu) NULL); } -static int smu_v14_0_process_pending_interrupt(struct smu_context *smu) -{ - int ret = 0; - - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT)) - ret = smu_v14_0_allow_ih_interrupt(smu); - - return ret; -} - int smu_v14_0_enable_thermal_alert(struct smu_context *smu) { int ret = 0; @@ -1916,7 +1906,7 @@ int smu_v14_0_enable_thermal_alert(struct smu_context *smu) if (ret) return ret; - return smu_v14_0_process_pending_interrupt(smu); + return smu_v14_0_allow_ih_interrupt(smu); } int smu_v14_0_disable_thermal_alert(struct smu_context *smu) From 0d2d0f3d93ddd6556f23c917d910becd9925ddeb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 5 Mar 2025 07:35:40 -1000 Subject: [PATCH 247/503] fs/pipe: remove buggy and unused 'helper' function While looking for incorrect users of the pipe head/tail fields (see commit c27c66afc449: "fs/pipe: Fix pipe_occupancy() with 16-bit indexes"), I found a bug in pipe_discard_from() that looked entirely broken. However, the fix is trivial: this buggy function isn't actually called by anything, so let's just remove it ASAP. Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 05ccbc5d01294..e572e6fc4f81f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -281,15 +281,6 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, return buf->ops->try_steal(pipe, buf); } -static inline void pipe_discard_from(struct pipe_inode_info *pipe, - unsigned int old_head) -{ - unsigned int mask = pipe->ring_size - 1; - - while (pipe->head > old_head) - pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); -} - /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE From 528361c49962708a60f51a1afafeb00987cebedf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 5 Mar 2025 18:52:59 +0300 Subject: [PATCH 248/503] nvme-tcp: fix signedness bug in nvme_tcp_init_connection() The kernel_recvmsg() function returns an int which could be either negative error codes or the number of bytes received. The problem is that the condition: if (ret < sizeof(*icresp)) { is type promoted to type unsigned long and negative values are treated as high positive values which is success, when they should be treated as failure. Handle invalid positive returns separately from negative error codes to avoid this problem. Fixes: 578539e09690 ("nvme-tcp: fix connect failure on receiving partial ICResp PDU") Signed-off-by: Dan Carpenter Reviewed-by: Caleb Sander Mateos Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 23f11527d29d0..327f3f2f5399c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1521,11 +1521,11 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) msg.msg_flags = MSG_WAITALL; ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); - if (ret < sizeof(*icresp)) { + if (ret >= 0 && ret < sizeof(*icresp)) + ret = -ECONNRESET; + if (ret < 0) { pr_warn("queue %d: failed to receive icresp, error %d\n", nvme_tcp_queue_id(queue), ret); - if (ret >= 0) - ret = -ECONNRESET; goto free_icresp; } ret = -ENOTCONN; From f2c11231b57b5163bf16cdfd65271d53d61dd996 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:45 +0000 Subject: [PATCH 249/503] ALSA: hda/realtek: Add support for ASUS ROG Strix G814 Laptop using CS35L41 HDA Add support for ASUS G814PH/PM/PP and G814FH/FM/FP. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C. Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-2-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4ca457e7ca9dd..e7612ab492172 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10784,6 +10784,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), + SND_PCI_QUIRK(0x1043, 0x3e00, "ASUS G814FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x3e20, "ASUS G814PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e30, "ASUS TP3607SA", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3ee0, "ASUS Strix G815_JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3ef0, "ASUS Strix G635LR_LW_LX", ALC287_FIXUP_TAS2781_I2C), From 16dc157346dd4404b02b42e73b88604be3652039 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:46 +0000 Subject: [PATCH 250/503] ALSA: hda/realtek: Add support for ASUS ROG Strix GA603 Laptops using CS35L41 HDA Add support for ASUS GA603KP, GA603KM and GA603KH. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-3-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e7612ab492172..aa9b6e474f69b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10784,6 +10784,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), + SND_PCI_QUIRK(0x1043, 0x3d78, "ASUS GA603KH", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x3d88, "ASUS GA603KM", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e00, "ASUS G814FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e20, "ASUS G814PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e30, "ASUS TP3607SA", ALC287_FIXUP_TAS2781_I2C), From 9120b2b4ad0dad2f6bbb6bcacd0456f806fda62d Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:47 +0000 Subject: [PATCH 251/503] ALSA: hda/realtek: Add support for ASUS ROG Strix G614 Laptops using CS35L41 HDA Add support for ASUS G614PH/PM/PP and G614FH/FM/FP. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-4-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index aa9b6e474f69b..c2ebcd2958c3f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10676,7 +10676,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), + SND_PCI_QUIRK(0x1043, 0x1054, "ASUS G614FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x1074, "ASUS G614PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK), SND_PCI_QUIRK(0x1043, 0x10a4, "ASUS TP3407SA", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), From 859a11917001424776e1cca02b762efcabb4044e Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:48 +0000 Subject: [PATCH 252/503] ALSA: hda/realtek: Add support for various ASUS Laptops using CS35L41 HDA Add support for ASUS B3405CVA, B5405CVA, B5605CVA, B3605CVA. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with SPI Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-5-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c2ebcd2958c3f..bf89900ec5f50 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10692,6 +10692,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1271, "ASUS X430UN", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1294, "ASUS B3405CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), @@ -10779,6 +10780,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f63, "ASUS P5405CSA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x3011, "ASUS B5605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), @@ -10797,6 +10799,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3f10, "ASUS Strix G835LR_LW_LX", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3f20, "ASUS Strix G615LR_LW", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3f30, "ASUS Strix G815LR_LW", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3fd0, "ASUS B3605CVA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3ff0, "ASUS B5405CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), From 7ab61d0a9a35e32497bcf2233310fec79ee3338f Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:49 +0000 Subject: [PATCH 253/503] ALSA: hda/realtek: Add support for ASUS B3405 and B3605 Laptops using CS35L41 HDA Add support for ASUS B3405CCA / P3405CCA, B3605CCA / P3605CCA, B3405CCA, B3605CCA. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with SPI Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-6-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bf89900ec5f50..53d2a267d7989 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10696,6 +10696,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x12b4, "ASUS B3405CCA / P3405CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -10782,7 +10783,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3011, "ASUS B5605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), + SND_PCI_QUIRK(0x1043, 0x3061, "ASUS B3405CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30c1, "ASUS B3605CCA / P3605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), + SND_PCI_QUIRK(0x1043, 0x31f1, "ASUS B3605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), From c86dd79a7c338fff9bebb9503857e07db9845eca Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:50 +0000 Subject: [PATCH 254/503] ALSA: hda/realtek: Add support for ASUS B5405 and B5605 Laptops using CS35L41 HDA Add support for ASUS B5605CCA and B5405CCA. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with SPI Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-7-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 53d2a267d7989..f67ab69c9997c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10784,8 +10784,12 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3011, "ASUS B5605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x3061, "ASUS B3405CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3071, "ASUS B5405CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x30c1, "ASUS B3605CCA / P3605CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30d1, "ASUS B5405CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30e1, "ASUS B5605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), + SND_PCI_QUIRK(0x1043, 0x31e1, "ASUS B5605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x31f1, "ASUS B3605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), From 8463d2adbe1901247937fcdfe4b525130f6db10b Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:51 +0000 Subject: [PATCH 255/503] ALSA: hda/realtek: Add support for ASUS Zenbook UM3406KA Laptops using CS35L41 HDA Laptop uses 2 CS35L41 Amps with HDA, using External boost with I2C Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-8-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f67ab69c9997c..d2a1f836dbbf7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10686,6 +10686,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x10d3, "ASUS K6500ZC", ALC294_FIXUP_ASUS_SPK), SND_PCI_QUIRK(0x1043, 0x1154, "ASUS TP3607SH", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x1194, "ASUS UM3406KA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1204, "ASUS Strix G615JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1214, "ASUS Strix G615LH_LM_LP", ALC287_FIXUP_TAS2781_I2C), From f2052a4a62465c0037aef7ea7426bffdb3531e41 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Mon, 3 Mar 2025 13:11:21 +0000 Subject: [PATCH 256/503] clk: samsung: gs101: fix synchronous external abort in samsung_clk_save() EARLY_WAKEUP_SW_TRIG_*_SET and EARLY_WAKEUP_SW_TRIG_*_CLEAR registers are only writeable. Attempting to read these registers during samsung_clk_save() causes a synchronous external abort. Remove these 8 registers from cmu_top_clk_regs[] array so that system suspend gets further. Note: the code path can be exercised using the following command: echo mem > /sys/power/state Fixes: 2c597bb7d66a ("clk: samsung: clk-gs101: Add cmu_top, cmu_misc and cmu_apm support") Signed-off-by: Peter Griffin Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250303-clk-suspend-fix-v1-1-c2edaf66260f@linaro.org Signed-off-by: Krzysztof Kozlowski --- drivers/clk/samsung/clk-gs101.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/clk/samsung/clk-gs101.c b/drivers/clk/samsung/clk-gs101.c index 86b39edba1227..08b867ae3ed9d 100644 --- a/drivers/clk/samsung/clk-gs101.c +++ b/drivers/clk/samsung/clk-gs101.c @@ -382,17 +382,9 @@ static const unsigned long cmu_top_clk_regs[] __initconst = { EARLY_WAKEUP_DPU_DEST, EARLY_WAKEUP_CSIS_DEST, EARLY_WAKEUP_SW_TRIG_APM, - EARLY_WAKEUP_SW_TRIG_APM_SET, - EARLY_WAKEUP_SW_TRIG_APM_CLEAR, EARLY_WAKEUP_SW_TRIG_CLUSTER0, - EARLY_WAKEUP_SW_TRIG_CLUSTER0_SET, - EARLY_WAKEUP_SW_TRIG_CLUSTER0_CLEAR, EARLY_WAKEUP_SW_TRIG_DPU, - EARLY_WAKEUP_SW_TRIG_DPU_SET, - EARLY_WAKEUP_SW_TRIG_DPU_CLEAR, EARLY_WAKEUP_SW_TRIG_CSIS, - EARLY_WAKEUP_SW_TRIG_CSIS_SET, - EARLY_WAKEUP_SW_TRIG_CSIS_CLEAR, CLK_CON_MUX_MUX_CLKCMU_BO_BUS, CLK_CON_MUX_MUX_CLKCMU_BUS0_BUS, CLK_CON_MUX_MUX_CLKCMU_BUS1_BUS, From 53517a70873c7a91675f7244768aad5006cc45de Mon Sep 17 00:00:00 2001 From: Varada Pavani Date: Tue, 25 Feb 2025 18:49:18 +0530 Subject: [PATCH 257/503] clk: samsung: update PLL locktime for PLL142XX used on FSD platform Currently PLL142XX locktime is 270. As per spec, it should be 150. Hence update PLL142XX controller locktime to 150. Cc: stable@vger.kernel.org Fixes: 4f346005aaed ("clk: samsung: fsd: Add initial clock support") Signed-off-by: Varada Pavani Link: https://lore.kernel.org/r/20250225131918.50925-3-v.pavani@samsung.com Signed-off-by: Krzysztof Kozlowski --- drivers/clk/samsung/clk-pll.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clk/samsung/clk-pll.c b/drivers/clk/samsung/clk-pll.c index 2e94bba6c3966..023a25af73c47 100644 --- a/drivers/clk/samsung/clk-pll.c +++ b/drivers/clk/samsung/clk-pll.c @@ -206,6 +206,7 @@ static const struct clk_ops samsung_pll3000_clk_ops = { */ /* Maximum lock time can be 270 * PDIV cycles */ #define PLL35XX_LOCK_FACTOR (270) +#define PLL142XX_LOCK_FACTOR (150) #define PLL35XX_MDIV_MASK (0x3FF) #define PLL35XX_PDIV_MASK (0x3F) @@ -272,7 +273,11 @@ static int samsung_pll35xx_set_rate(struct clk_hw *hw, unsigned long drate, } /* Set PLL lock time. */ - writel_relaxed(rate->pdiv * PLL35XX_LOCK_FACTOR, + if (pll->type == pll_142xx) + writel_relaxed(rate->pdiv * PLL142XX_LOCK_FACTOR, + pll->lock_reg); + else + writel_relaxed(rate->pdiv * PLL35XX_LOCK_FACTOR, pll->lock_reg); /* Change PLL PMS values */ From e775e2a060d99180edc5366fb9f4299d0f07b66c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 28 Feb 2025 08:30:55 +0100 Subject: [PATCH 258/503] drm/xe/vm: Validate userptr during gpu vma prefetching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a userptr vma subject to prefetching was already invalidated or invalidated during the prefetch operation, the operation would repeatedly return -EAGAIN which would typically cause an infinite loop. Validate the userptr to ensure this doesn't happen. v2: - Don't fallthrough from UNMAP to PREFETCH (Matthew Brost) Fixes: 5bd24e78829a ("drm/xe/vm: Subclass userptr vmas") Fixes: 617eebb9c480 ("drm/xe: Fix array of binds") Cc: Matthew Brost Cc: # v6.9+ Suggested-by: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-2-thomas.hellstrom@linux.intel.com (cherry picked from commit 03c346d4d0d85d210d549d43c8cfb3dfb7f20e0a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 30259eba450b5..d2cd227e4d694 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2286,8 +2286,17 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, break; } case DRM_GPUVA_OP_UNMAP: + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + break; case DRM_GPUVA_OP_PREFETCH: - /* FIXME: Need to skip some prefetch ops */ + vma = gpuva_to_vma(op->base.prefetch.va); + + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + if (err) + return err; + } + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); break; default: From 1414d95d5805b1dc221d22db9b8dc5287ef083bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 28 Feb 2025 08:30:56 +0100 Subject: [PATCH 259/503] drm/xe/vm: Fix a misplaced #endif MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a (harmless) misplaced #endif leading to declarations appearing multiple times. Fixes: 0eb2a18a8fad ("drm/xe: Implement VM snapshot support for BO's and userptr") Cc: Maarten Lankhorst Cc: José Roberto de Souza Cc: # v6.12+ Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-3-thomas.hellstrom@linux.intel.com (cherry picked from commit fcc20a4c752214b3e25632021c57d7d1d71ee1dd) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 23adb74428815..256a837c2704a 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -274,9 +274,9 @@ static inline void vm_dbg(const struct drm_device *dev, const char *format, ...) { /* noop */ } #endif -#endif struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm); void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); +#endif From 84211b1c0db6b9dbe0020fa97192fb9661617f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 28 Feb 2025 08:30:57 +0100 Subject: [PATCH 260/503] drm/xe: Fix fault mode invalidation with unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix fault mode invalidation racing with unbind leading to the PTE zapping potentially traversing an invalid page-table tree. Do this by holding the notifier lock across PTE zapping. This might transfer any contention waiting on the notifier seqlock read side to the notifier lock read side, but that shouldn't be a major problem. At the same time get rid of the open-coded invalidation in the bind code by relying on the notifier even when the vma bind is not yet committed. Finally let userptr invalidation call a dedicated xe_vm function performing a full invalidation. Fixes: e8babb280b5e ("drm/xe: Convert multiple bind ops into single job") Cc: Thomas Hellström Cc: Matthew Brost Cc: Matthew Auld Cc: # v6.12+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-4-thomas.hellstrom@linux.intel.com (cherry picked from commit 100a5b8dadfca50d91d9a4c9fc01431b42a25cab) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 38 ++++---------- drivers/gpu/drm/xe/xe_vm.c | 85 +++++++++++++++++++++----------- drivers/gpu/drm/xe/xe_vm.h | 8 +++ drivers/gpu/drm/xe/xe_vm_types.h | 4 +- 4 files changed, 75 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 1ddcc7e79a93e..12a627a23eb45 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1213,42 +1213,22 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, return 0; uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; + if (xe_pt_userptr_inject_eagain(uvma)) + xe_vma_userptr_force_invalidate(uvma); - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; + notifier_seq = uvma->userptr.notifier_seq; if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) + notifier_seq)) return 0; - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm)) return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } + /* + * Just continue the operation since exec or rebind worker + * will take care of rebinding. + */ return 0; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d2cd227e4d694..d54aaa5eaff38 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -579,51 +579,26 @@ static void preempt_rebind_work_func(struct work_struct *w) trace_xe_vm_rebind_worker_exit(vm); } -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) +static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) { - struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); - struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); + struct xe_userptr *userptr = &uvma->userptr; struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); struct dma_resv_iter cursor; struct dma_fence *fence; long err; - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - /* No need to stop gpu access if the userptr is not yet bound. */ - if (!userptr->initial_bind) { - up_write(&vm->userptr.notifier_lock); - return true; - } - /* * Tell exec and rebind worker they need to repin and rebind this * userptr. */ if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { spin_lock(&vm->userptr.invalidated_lock); list_move_tail(&userptr->invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); } - up_write(&vm->userptr.notifier_lock); - /* * Preempt fences turn into schedule disables, pipeline these. * Note that even in fault mode, we need to wait for binds and @@ -641,11 +616,35 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, false, MAX_SCHEDULE_TIMEOUT); XE_WARN_ON(err <= 0); - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { err = xe_vm_invalidate_vma(vma); XE_WARN_ON(err); } +} +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + + xe_assert(vm->xe, xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + + down_write(&vm->userptr.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + __vma_userptr_invalidate(vm, uvma); + up_write(&vm->userptr.notifier_lock); trace_xe_vma_userptr_invalidate_complete(vma); return true; @@ -655,6 +654,34 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { .invalidate = vma_userptr_invalidate, }; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +/** + * xe_vma_userptr_force_invalidate() - force invalidate a userptr + * @uvma: The userptr vma to invalidate + * + * Perform a forced userptr invalidation for testing purposes. + */ +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + /* Protect against concurrent userptr pinning */ + lockdep_assert_held(&vm->lock); + /* Protect against concurrent notifiers */ + lockdep_assert_held(&vm->userptr.notifier_lock); + /* + * Protect against concurrent instances of this function and + * the critical exec sections + */ + xe_vm_assert_held(vm); + + if (!mmu_interval_read_retry(&uvma->userptr.notifier, + uvma->userptr.notifier_seq)) + uvma->userptr.notifier_seq -= 2; + __vma_userptr_invalidate(vm, uvma); +} +#endif + int xe_vm_userptr_pin(struct xe_vm *vm) { struct xe_userptr_vma *uvma, *next; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 256a837c2704a..b882bfb31bd05 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -279,4 +279,12 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm); void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); +#else +static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ +} +#endif #endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 7f9a303e51d89..d2511819cdf43 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -227,8 +227,8 @@ struct xe_vm { * up for revalidation. Protected from access with the * @invalidated_lock. Removing items from the list * additionally requires @lock in write mode, and adding - * items to the list requires the @userptr.notifer_lock in - * write mode. + * items to the list requires either the @userptr.notifer_lock in + * write mode, OR @lock in write mode. */ struct list_head invalidated; } userptr; From ae482ec8cd1a85bde3307f71921a7780086fbec0 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 28 Feb 2025 08:30:58 +0100 Subject: [PATCH 261/503] drm/xe: Add staging tree for VM binds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Concurrent VM bind staging and zapping of PTEs from a userptr notifier do not work because the view of PTEs is not stable. VM binds cannot acquire the notifier lock during staging, as memory allocations are required. To resolve this race condition, use a staging tree for VM binds that is committed only under the userptr notifier lock during the final step of the bind. This ensures a consistent view of the PTEs in the userptr notifier. A follow up may only use staging for VM in fault mode as this is the only mode in which the above race exists. v3: - Drop zap PTE change (Thomas) - s/xe_pt_entry/xe_pt_entry_staging (Thomas) Suggested-by: Thomas Hellström Cc: Fixes: e8babb280b5e ("drm/xe: Convert multiple bind ops into single job") Fixes: a708f6501c69 ("drm/xe: Update PT layer with better error handling") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-5-thomas.hellstrom@linux.intel.com Signed-off-by: Thomas Hellström (cherry picked from commit 6f39b0c5ef0385eae586760d10b9767168037aa5) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 58 +++++++++++++++++++++++---------- drivers/gpu/drm/xe/xe_pt_walk.c | 3 +- drivers/gpu/drm/xe/xe_pt_walk.h | 4 +++ 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 12a627a23eb45..dc24baa840924 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -28,6 +28,8 @@ struct xe_pt_dir { struct xe_pt pt; /** @children: Array of page-table child nodes */ struct xe_ptw *children[XE_PDES]; + /** @staging: Array of page-table staging nodes */ + struct xe_ptw *staging[XE_PDES]; }; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) @@ -48,9 +50,10 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) return container_of(pt, struct xe_pt_dir, pt); } -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) +static struct xe_pt * +xe_pt_entry_staging(struct xe_pt_dir *pt_dir, unsigned int index) { - return container_of(pt_dir->children[index], struct xe_pt, base); + return container_of(pt_dir->staging[index], struct xe_pt, base); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -125,6 +128,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, } pt->bo = bo; pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; + pt->base.staging = level ? as_xe_pt_dir(pt)->staging : NULL; if (vm->xef) xe_drm_client_add_bo(vm->xef->client, pt->bo); @@ -206,8 +210,8 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, + if (xe_pt_entry_staging(pt_dir, i)) + xe_pt_destroy(xe_pt_entry_staging(pt_dir, i), flags, deferred); } } @@ -376,8 +380,10 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, /* Continue building a non-connected subtree. */ struct iosys_map *map = &parent->bo->vmap; - if (unlikely(xe_child)) + if (unlikely(xe_child)) { parent->base.children[offset] = &xe_child->base; + parent->base.staging[offset] = &xe_child->base; + } xe_pt_write(xe_walk->vm->xe, map, offset, pte); parent->num_live++; @@ -614,6 +620,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .ops = &xe_pt_stage_bind_ops, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, + .staging = true, }, .vm = xe_vma_vm(vma), .tile = tile, @@ -873,7 +880,7 @@ static void xe_pt_cancel_bind(struct xe_vma *vma, } } -static void xe_pt_commit_locks_assert(struct xe_vma *vma) +static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); @@ -885,6 +892,16 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) xe_vm_assert_held(vm); } +static void xe_pt_commit_locks_assert(struct xe_vma *vma) +{ + struct xe_vm *vm = xe_vma_vm(vma); + + xe_pt_commit_prepare_locks_assert(vma); + + if (xe_vma_is_userptr(vma)) + lockdep_assert_held_read(&vm->userptr.notifier_lock); +} + static void xe_pt_commit(struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 num_entries, struct llist_head *deferred) @@ -895,13 +912,17 @@ static void xe_pt_commit(struct xe_vma *vma, for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; + struct xe_pt_dir *pt_dir; if (!pt->level) continue; + pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { struct xe_pt *oldpte = entries[i].pt_entries[j].pt; + int j_ = j + entries[i].ofs; + pt_dir->children[j_] = pt_dir->staging[j_]; xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred); } } @@ -913,7 +934,7 @@ static void xe_pt_abort_bind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = num_entries - 1; i >= 0; --i) { struct xe_pt *pt = entries[i].pt; @@ -928,10 +949,10 @@ static void xe_pt_abort_bind(struct xe_vma *vma, pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = xe_pt_entry(pt_dir, j_); + struct xe_pt *newpte = xe_pt_entry_staging(pt_dir, j_); struct xe_pt *oldpte = entries[i].pt_entries[j].pt; - pt_dir->children[j_] = oldpte ? &oldpte->base : 0; + pt_dir->staging[j_] = oldpte ? &oldpte->base : 0; xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL); } } @@ -943,7 +964,7 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma, { u32 i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; @@ -961,10 +982,10 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma, struct xe_pt *newpte = entries[i].pt_entries[j].pt; struct xe_pt *oldpte = NULL; - if (xe_pt_entry(pt_dir, j_)) - oldpte = xe_pt_entry(pt_dir, j_); + if (xe_pt_entry_staging(pt_dir, j_)) + oldpte = xe_pt_entry_staging(pt_dir, j_); - pt_dir->children[j_] = &newpte->base; + pt_dir->staging[j_] = &newpte->base; entries[i].pt_entries[j].pt = oldpte; } } @@ -1494,6 +1515,7 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, .ops = &xe_pt_stage_unbind_ops, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, + .staging = true, }, .tile = tile, .modified_start = xe_vma_start(vma), @@ -1535,7 +1557,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = num_entries - 1; i >= 0; --i) { struct xe_vm_pgtable_update *entry = &entries[i]; @@ -1548,7 +1570,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma, continue; for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) - pt_dir->children[j] = + pt_dir->staging[j] = entries[i].pt_entries[j - entry->ofs].pt ? &entries[i].pt_entries[j - entry->ofs].pt->base : NULL; } @@ -1561,7 +1583,7 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = 0; i < num_entries; ++i) { struct xe_vm_pgtable_update *entry = &entries[i]; @@ -1575,8 +1597,8 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, pt_dir = as_xe_pt_dir(pt); for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) { entry->pt_entries[j - entry->ofs].pt = - xe_pt_entry(pt_dir, j); - pt_dir->children[j] = NULL; + xe_pt_entry_staging(pt_dir, j); + pt_dir->staging[j] = NULL; } } } diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c index b8b3d2aea4923..be602a763ff32 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.c +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -74,7 +74,8 @@ int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, u64 addr, u64 end, struct xe_pt_walk *walk) { pgoff_t offset = xe_pt_offset(addr, level, walk); - struct xe_ptw **entries = parent->children ? parent->children : NULL; + struct xe_ptw **entries = walk->staging ? (parent->staging ?: NULL) : + (parent->children ?: NULL); const struct xe_pt_walk_ops *ops = walk->ops; enum page_walk_action action; struct xe_ptw *child; diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h index 5ecc4d2f0f653..5c02c244f7de3 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.h +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -11,12 +11,14 @@ /** * struct xe_ptw - base class for driver pagetable subclassing. * @children: Pointer to an array of children if any. + * @staging: Pointer to an array of staging if any. * * Drivers could subclass this, and if it's a page-directory, typically * embed an array of xe_ptw pointers. */ struct xe_ptw { struct xe_ptw **children; + struct xe_ptw **staging; }; /** @@ -41,6 +43,8 @@ struct xe_pt_walk { * as shared pagetables. */ bool shared_pt_mode; + /** @staging: Walk staging PT structure */ + bool staging; }; /** From e3e2e7fc4cd8414c9a966ef1b344db543f8614f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Mar 2025 18:33:40 +0100 Subject: [PATCH 262/503] drm/xe/hmm: Style- and include fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add proper #ifndef around the xe_hmm.h header, proper spacing and since the documentation mostly follows kerneldoc format, make it kerneldoc. Also prepare for upcoming -stable fixes. Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Cc: Oak Zeng Cc: # v6.10+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250304173342.22009-2-thomas.hellstrom@linux.intel.com (cherry picked from commit bbe2b06b55bc061c8fcec034ed26e88287f39143) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hmm.c | 9 +++------ drivers/gpu/drm/xe/xe_hmm.h | 5 +++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 2e4ae61567d8d..6ddcf88d8a393 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -19,11 +19,10 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end) return (end - start) >> PAGE_SHIFT; } -/* +/** * xe_mark_range_accessed() - mark a range is accessed, so core mm * have such information for memory eviction or write back to * hard disk - * * @range: the range to mark * @write: if write to this range, we mark pages in this range * as dirty @@ -43,11 +42,10 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) } } -/* +/** * xe_build_sg() - build a scatter gather table for all the physical pages/pfn * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table * and will be used to program GPU page table later. - * * @xe: the xe device who will access the dma-address in sg table * @range: the hmm range that we build the sg table from. range->hmm_pfns[] * has the pfn numbers of pages that back up this hmm address range. @@ -112,9 +110,8 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, return ret; } -/* +/** * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr - * * @uvma: the userptr vma which hold the scatter gather table * * With function xe_userptr_populate_range, we allocate storage of diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h index 909dc2bdcd97e..9602cb7d976dd 100644 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ b/drivers/gpu/drm/xe/xe_hmm.h @@ -3,9 +3,14 @@ * Copyright © 2024 Intel Corporation */ +#ifndef _XE_HMM_H_ +#define _XE_HMM_H_ + #include struct xe_userptr_vma; int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); + void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); +#endif From 0a98219bcc961edd3388960576e4353e123b4a51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Mar 2025 18:33:41 +0100 Subject: [PATCH 263/503] drm/xe/hmm: Don't dereference struct page pointers without notifier lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pnfs that we obtain from hmm_range_fault() point to pages that we don't have a reference on, and the guarantee that they are still in the cpu page-tables is that the notifier lock must be held and the notifier seqno is still valid. So while building the sg table and marking the pages accesses / dirty we need to hold this lock with a validated seqno. However, the lock is reclaim tainted which makes sg_alloc_table_from_pages_segment() unusable, since it internally allocates memory. Instead build the sg-table manually. For the non-iommu case this might lead to fewer coalesces, but if that's a problem it can be fixed up later in the resource cursor code. For the iommu case, the whole sg-table may still be coalesced to a single contigous device va region. This avoids marking pages that we don't own dirty and accessed, and it also avoid dereferencing struct pages that we don't own. v2: - Use assert to check whether hmm pfns are valid (Matthew Auld) - Take into account that large pages may cross range boundaries (Matthew Auld) v3: - Don't unnecessarily check for a non-freed sg-table. (Matthew Auld) - Add a missing up_read() in an error path. (Matthew Auld) Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Cc: Oak Zeng Cc: # v6.10+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250304173342.22009-3-thomas.hellstrom@linux.intel.com (cherry picked from commit ea3e66d280ce2576664a862693d1da8fd324c317) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hmm.c | 112 +++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 6ddcf88d8a393..be284b852307e 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -42,6 +42,42 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) } } +static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, + struct hmm_range *range, struct rw_semaphore *notifier_sem) +{ + unsigned long i, npages, hmm_pfn; + unsigned long num_chunks = 0; + int ret; + + /* HMM docs says this is needed. */ + ret = down_read_interruptible(notifier_sem); + if (ret) + return ret; + + if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { + up_read(notifier_sem); + return -EAGAIN; + } + + npages = xe_npages_in_range(range->start, range->end); + for (i = 0; i < npages;) { + unsigned long len; + + hmm_pfn = range->hmm_pfns[i]; + xe_assert(xe, hmm_pfn & HMM_PFN_VALID); + + len = 1UL << hmm_pfn_to_map_order(hmm_pfn); + + /* If order > 0 the page may extend beyond range->start */ + len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); + i += len; + num_chunks++; + } + up_read(notifier_sem); + + return sg_alloc_table(st, num_chunks, GFP_KERNEL); +} + /** * xe_build_sg() - build a scatter gather table for all the physical pages/pfn * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table @@ -50,6 +86,7 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) * @range: the hmm range that we build the sg table from. range->hmm_pfns[] * has the pfn numbers of pages that back up this hmm address range. * @st: pointer to the sg table. + * @notifier_sem: The xe notifier lock. * @write: whether we write to this range. This decides dma map direction * for system pages. If write we map it bi-diretional; otherwise * DMA_TO_DEVICE @@ -76,38 +113,41 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) * Returns 0 if successful; -ENOMEM if fails to allocate memory */ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, - struct sg_table *st, bool write) + struct sg_table *st, + struct rw_semaphore *notifier_sem, + bool write) { + unsigned long npages = xe_npages_in_range(range->start, range->end); struct device *dev = xe->drm.dev; - struct page **pages; - u64 i, npages; - int ret; + struct scatterlist *sgl; + struct page *page; + unsigned long i, j; - npages = xe_npages_in_range(range->start, range->end); - pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; + lockdep_assert_held(notifier_sem); - for (i = 0; i < npages; i++) { - pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); - xe_assert(xe, !is_device_private_page(pages[i])); - } + i = 0; + for_each_sg(st->sgl, sgl, st->nents, j) { + unsigned long hmm_pfn, size; - ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0, npages << PAGE_SHIFT, - xe_sg_segment_size(dev), GFP_KERNEL); - if (ret) - goto free_pages; + hmm_pfn = range->hmm_pfns[i]; + page = hmm_pfn_to_page(hmm_pfn); + xe_assert(xe, !is_device_private_page(page)); + + size = 1UL << hmm_pfn_to_map_order(hmm_pfn); + size -= page_to_pfn(page) & (size - 1); + i += size; - ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); - if (ret) { - sg_free_table(st); - st = NULL; + if (unlikely(j == st->nents - 1)) { + if (i > npages) + size -= (i - npages); + sg_mark_end(sgl); + } + sg_set_page(sgl, page, size << PAGE_SHIFT, 0); } + xe_assert(xe, i == npages); -free_pages: - kvfree(pages); - return ret; + return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); } /** @@ -237,16 +277,36 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, if (ret) goto free_pfns; - ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write); + ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); if (ret) goto free_pfns; + ret = down_read_interruptible(&vm->userptr.notifier_lock); + if (ret) + goto free_st; + + if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { + ret = -EAGAIN; + goto out_unlock; + } + + ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, + &vm->userptr.notifier_lock, write); + if (ret) + goto out_unlock; + xe_mark_range_accessed(&hmm_range, write); userptr->sg = &userptr->sgt; userptr->notifier_seq = hmm_range.notifier_seq; + up_read(&vm->userptr.notifier_lock); + kvfree(pfns); + return 0; +out_unlock: + up_read(&vm->userptr.notifier_lock); +free_st: + sg_free_table(&userptr->sgt); free_pfns: kvfree(pfns); return ret; } - From 333b8906336174478efbbfc1e24a89e3397ffe65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Mar 2025 18:33:42 +0100 Subject: [PATCH 264/503] drm/xe/userptr: Unmap userptrs in the mmu notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If userptr pages are freed after a call to the xe mmu notifier, the device will not be blocked out from theoretically accessing these pages unless they are also unmapped from the iommu, and this violates some aspects of the iommu-imposed security. Ensure that userptrs are unmapped in the mmu notifier to mitigate this. A naive attempt would try to free the sg table, but the sg table itself may be accessed by a concurrent bind operation, so settle for only unmapping. v3: - Update lockdep asserts. - Fix a typo (Matthew Auld) Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Cc: Oak Zeng Cc: Matthew Auld Cc: # v6.10+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250304173342.22009-4-thomas.hellstrom@linux.intel.com (cherry picked from commit ba767b9d01a2c552d76cf6f46b125d50ec4147a6) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hmm.c | 51 ++++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_hmm.h | 2 ++ drivers/gpu/drm/xe/xe_vm.c | 4 +++ drivers/gpu/drm/xe/xe_vm_types.h | 4 +++ 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index be284b852307e..392102515f3d8 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -150,6 +150,45 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); } +static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + lockdep_assert_held_write(&vm->lock); + lockdep_assert_held(&vm->userptr.notifier_lock); + + mutex_lock(&userptr->unmap_mutex); + xe_assert(vm->xe, !userptr->mapped); + userptr->mapped = true; + mutex_unlock(&userptr->unmap_mutex); +} + +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + bool write = !xe_vma_read_only(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + + if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && + !lockdep_is_held_type(&vm->lock, 0) && + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { + /* Don't unmap in exec critical section. */ + xe_vm_assert_held(vm); + /* Don't unmap while mapping the sg. */ + lockdep_assert_held(&vm->lock); + } + + mutex_lock(&userptr->unmap_mutex); + if (userptr->sg && userptr->mapped) + dma_unmap_sgtable(xe->drm.dev, userptr->sg, + write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + userptr->mapped = false; + mutex_unlock(&userptr->unmap_mutex); +} + /** * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr * @uvma: the userptr vma which hold the scatter gather table @@ -161,16 +200,9 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) { struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - bool write = !xe_vma_read_only(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - struct device *dev = xe->drm.dev; - - xe_assert(xe, userptr->sg); - dma_unmap_sgtable(dev, userptr->sg, - write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); + xe_hmm_userptr_unmap(uvma); sg_free_table(userptr->sg); userptr->sg = NULL; } @@ -297,6 +329,7 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, xe_mark_range_accessed(&hmm_range, write); userptr->sg = &userptr->sgt; + xe_hmm_userptr_set_mapped(uvma); userptr->notifier_seq = hmm_range.notifier_seq; up_read(&vm->userptr.notifier_lock); kvfree(pfns); diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h index 9602cb7d976dd..0ea98d8e7bbc7 100644 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ b/drivers/gpu/drm/xe/xe_hmm.h @@ -13,4 +13,6 @@ struct xe_userptr_vma; int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); + +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d54aaa5eaff38..ec6ec18ab3faa 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -620,6 +620,8 @@ static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uv err = xe_vm_invalidate_vma(vma); XE_WARN_ON(err); } + + xe_hmm_userptr_unmap(uvma); } static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, @@ -1039,6 +1041,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, INIT_LIST_HEAD(&userptr->invalidate_link); INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; + mutex_init(&userptr->unmap_mutex); err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, @@ -1080,6 +1083,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) * them anymore */ mmu_interval_notifier_remove(&userptr->notifier); + mutex_destroy(&userptr->unmap_mutex); xe_vm_put(vm); } else if (xe_vma_is_null(vma)) { xe_vm_put(vm); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index d2511819cdf43..a4b4091cfd0da 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -59,12 +59,16 @@ struct xe_userptr { struct sg_table *sg; /** @notifier_seq: notifier sequence number */ unsigned long notifier_seq; + /** @unmap_mutex: Mutex protecting dma-unmapping */ + struct mutex unmap_mutex; /** * @initial_bind: user pointer has been bound at least once. * write: vm->userptr.notifier_lock in read mode and vm->resv held. * read: vm->userptr.notifier_lock in write mode or vm->resv held. */ bool initial_bind; + /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */ + bool mapped; #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) u32 divisor; #endif From bcb0fda3c2da9fe4721d3e73d80e778c038e7d27 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Mar 2025 14:03:34 -0700 Subject: [PATCH 265/503] io_uring/rw: ensure reissue path is correctly handled for IOPOLL The IOPOLL path posts CQEs when the io_kiocb is marked as completed, so it cannot rely on the usual retry that non-IOPOLL requests do for read/write requests. If -EAGAIN is received and the request should be retried, go through the normal completion path and let the normal flush logic catch it and reissue it, like what is done for !IOPOLL reads or writes. Fixes: d803d123948f ("io_uring/rw: handle -EAGAIN retry at IO completion time") Reported-by: John Garry Link: https://lore.kernel.org/io-uring/2b43ccfa-644d-4a09-8f8f-39ad71810f41@oracle.com/ Signed-off-by: Jens Axboe --- io_uring/rw.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 9edc6baebd01c..e5528cebcd066 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -560,11 +560,10 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) if (kiocb->ki_flags & IOCB_WRITE) io_req_end_write(req); if (unlikely(res != req->cqe.res)) { - if (res == -EAGAIN && io_rw_should_reissue(req)) { + if (res == -EAGAIN && io_rw_should_reissue(req)) req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; - return; - } - req->cqe.res = res; + else + req->cqe.res = res; } /* order with io_iopoll_complete() checking ->iopoll_completed */ From df08c94baafb001de6cf44bb7098bb557f36c335 Mon Sep 17 00:00:00 2001 From: Nicklas Bo Jensen Date: Thu, 27 Feb 2025 13:32:34 +0000 Subject: [PATCH 266/503] netfilter: nf_conncount: garbage collection is not skipped when jiffies wrap around nf_conncount is supposed to skip garbage collection if it has already run garbage collection in the same jiffy. Unfortunately, this is broken when jiffies wrap around which this patch fixes. The problem is that last_gc in the nf_conncount_list struct is an u32, but jiffies is an unsigned long which is 8 bytes on my systems. When those two are compared it only works until last_gc wraps around. See bug report: https://bugzilla.netfilter.org/show_bug.cgi?id=1778 for more details. Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC") Signed-off-by: Nicklas Bo Jensen Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 4890af4dc263f..ebe38ed2e6f4f 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -132,7 +132,7 @@ static int __nf_conncount_add(struct net *net, struct nf_conn *found_ct; unsigned int collect = 0; - if (time_is_after_eq_jiffies((unsigned long)list->last_gc)) + if ((u32)jiffies == list->last_gc) goto add_new_node; /* check the saved connections */ @@ -234,7 +234,7 @@ bool nf_conncount_gc_list(struct net *net, bool ret = false; /* don't bother if we just did GC */ - if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc))) + if ((u32)jiffies == READ_ONCE(list->last_gc)) return false; /* don't bother if other cpu is already doing GC */ From 374908a15af4cd60862ebc51a6e012ace2212c76 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 3 Mar 2025 18:10:30 +0100 Subject: [PATCH 267/503] rust: remove leftover mentions of the `alloc` crate In commit 392e34b6bc22 ("kbuild: rust: remove the `alloc` crate and `GlobalAlloc`") we stopped using the upstream `alloc` crate. Thus remove a few leftover mentions treewide. Cc: stable@vger.kernel.org # Also to 6.12.y after the `alloc` backport lands Fixes: 392e34b6bc22 ("kbuild: rust: remove the `alloc` crate and `GlobalAlloc`") Reviewed-by: Danilo Krummrich Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250303171030.1081134-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- Documentation/rust/quick-start.rst | 2 +- rust/kernel/lib.rs | 2 +- scripts/rustdoc_test_gen.rs | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst index 4aa50e5fcb8c0..6d2607870ba44 100644 --- a/Documentation/rust/quick-start.rst +++ b/Documentation/rust/quick-start.rst @@ -145,7 +145,7 @@ Rust standard library source **************************** The Rust standard library source is required because the build system will -cross-compile ``core`` and ``alloc``. +cross-compile ``core``. If ``rustup`` is being used, run:: diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 398242f92a961..7697c60b2d1a6 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -6,7 +6,7 @@ //! usage by Rust code in the kernel and is shared by all of them. //! //! In other words, all the rest of the Rust code in the kernel (e.g. kernel -//! modules written in Rust) depends on [`core`], [`alloc`] and this crate. +//! modules written in Rust) depends on [`core`] and this crate. //! //! If you need a kernel C API that is not ported or wrapped yet here, then //! do so first instead of bypassing this crate. diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index 5ebd42ae4a3fd..76aaa8329413d 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -15,8 +15,8 @@ //! - Test code should be able to define functions and call them, without having to carry //! the context. //! -//! - Later on, we may want to be able to test non-kernel code (e.g. `core`, `alloc` or -//! third-party crates) which likely use the standard library `assert*!` macros. +//! - Later on, we may want to be able to test non-kernel code (e.g. `core` or third-party +//! crates) which likely use the standard library `assert*!` macros. //! //! For this reason, instead of the passed context, `kunit_get_current_test()` is used instead //! (i.e. `current->kunit_test`). From df27cef153603b18a7d094b53cc3d5264ff32797 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Wed, 5 Mar 2025 13:29:01 +0000 Subject: [PATCH 268/503] rust: init: fix `Zeroable` implementation for `Option>` and `Option>` According to [1], `NonNull` and `#[repr(transparent)]` wrapper types such as our custom `KBox` have the null pointer optimization only if `T: Sized`. Thus remove the `Zeroable` implementation for the unsized case. Link: https://doc.rust-lang.org/stable/std/option/index.html#representation [1] Reported-by: Alice Ryhl Closes: https://lore.kernel.org/rust-for-linux/CAH5fLghL+qzrD8KiCF1V3vf2YcC6aWySzkmaE2Zzrnh1gKj-hw@mail.gmail.com/ Cc: stable@vger.kernel.org # v6.12+ (a custom patch will be needed for 6.6.y) Fixes: 38cde0bd7b67 ("rust: init: add `Zeroable` trait and `init::zeroed` function") Signed-off-by: Benno Lossin Reviewed-by: Alice Ryhl Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250305132836.2145476-1-benno.lossin@proton.me [ Added Closes tag and moved up the Reported-by one. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/init.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 7fd1ea8265a55..8bbd5e3398fcb 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -1418,17 +1418,14 @@ impl_zeroable! { // SAFETY: `T: Zeroable` and `UnsafeCell` is `repr(transparent)`. {} UnsafeCell, - // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee). + // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee: + // https://doc.rust-lang.org/stable/std/option/index.html#representation). Option, Option, Option, Option, Option, Option, Option, Option, Option, Option, Option, Option, - - // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee). - // - // In this case we are allowed to use `T: ?Sized`, since all zeros is the `None` variant. - {} Option>, - {} Option>, + {} Option>, + {} Option>, // SAFETY: `null` pointer is valid. // From ff64846bee0e7e3e7bc9363ebad3bab42dd27e24 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Thu, 13 Feb 2025 06:34:18 -0500 Subject: [PATCH 269/503] rust: alloc: satisfy POSIX alignment requirement ISO C's `aligned_alloc` is partially implementation-defined; on some systems it inherits stricter requirements from POSIX's `posix_memalign`. This causes the call added in commit dd09538fb409 ("rust: alloc: implement `Cmalloc` in module allocator_test") to fail on macOS because it doesn't meet the requirements of `posix_memalign`. Adjust the call to meet the POSIX requirement and add a comment. This fixes failures in `make rusttest` on macOS. Acked-by: Danilo Krummrich Cc: stable@vger.kernel.org Fixes: dd09538fb409 ("rust: alloc: implement `Cmalloc` in module allocator_test") Signed-off-by: Tamir Duberstein Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20250213-aligned-alloc-v7-1-d2a2d0be164b@gmail.com [ Added Cc: stable. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/allocator_test.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index e3240d16040bd..c37d4c0c64e9f 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -62,6 +62,24 @@ unsafe impl Allocator for Cmalloc { )); } + // ISO C (ISO/IEC 9899:2011) defines `aligned_alloc`: + // + // > The value of alignment shall be a valid alignment supported by the implementation + // [...]. + // + // As an example of the "supported by the implementation" requirement, POSIX.1-2001 (IEEE + // 1003.1-2001) defines `posix_memalign`: + // + // > The value of alignment shall be a power of two multiple of sizeof (void *). + // + // and POSIX-based implementations of `aligned_alloc` inherit this requirement. At the time + // of writing, this is known to be the case on macOS (but not in glibc). + // + // Satisfy the stricter requirement to avoid spurious test failures on some platforms. + let min_align = core::mem::size_of::<*const crate::ffi::c_void>(); + let layout = layout.align_to(min_align).map_err(|_| AllocError)?; + let layout = layout.pad_to_align(); + // SAFETY: Returns either NULL or a pointer to a memory allocation that satisfies or // exceeds the given size and alignment requirements. let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) } as *mut u8; From f2e413f00ebec10a8725b1b75b1b523c561bd403 Mon Sep 17 00:00:00 2001 From: Alban Kurti Date: Thu, 6 Feb 2025 21:07:52 +0000 Subject: [PATCH 270/503] rust: docs: add missing newline to printing macro examples Fix adding a newline at the end of the usage of pr_info! in the documentation Fixes: e3c3d34507c7 ("docs: rust: Add description of Rust documentation test as KUnit ones") Reported-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1139 Signed-off-by: Alban Kurti Reviewed-by: David Gow Link: https://lore.kernel.org/r/20250206-printing_fix-v3-1-a85273b501ae@invicto.ai [ Replaced Closes with Link since it fixes part of the issue. - Miguel ] Signed-off-by: Miguel Ojeda --- Documentation/rust/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/rust/testing.rst b/Documentation/rust/testing.rst index 568b71b415a45..180b886e0f1ee 100644 --- a/Documentation/rust/testing.rst +++ b/Documentation/rust/testing.rst @@ -97,7 +97,7 @@ operator are also supported as usual, e.g.: /// ``` /// # use kernel::{spawn_work_item, workqueue}; - /// spawn_work_item!(workqueue::system(), || pr_info!("x"))?; + /// spawn_work_item!(workqueue::system(), || pr_info!("x\n"))?; /// # Ok::<(), Error>(()) /// ``` From 6f5c36f56d475732981dcf624e0ac0cc7c8984c8 Mon Sep 17 00:00:00 2001 From: Alban Kurti Date: Thu, 6 Feb 2025 21:07:53 +0000 Subject: [PATCH 271/503] rust: error: add missing newline to pr_warn! calls Added missing newline at the end of pr_warn! usage so the log is not missed. Fixes: 6551a7fe0acb ("rust: error: Add Error::from_errno{_unchecked}()") Reported-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1139 Signed-off-by: Alban Kurti Link: https://lore.kernel.org/r/20250206-printing_fix-v3-2-a85273b501ae@invicto.ai [ Replaced Closes with Link since it fixes part of the issue. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index f6ecf09cb65f4..a194d83e6835c 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -107,7 +107,7 @@ impl Error { } else { // TODO: Make it a `WARN_ONCE` once available. crate::pr_warn!( - "attempted to create `Error` with out of range `errno`: {}", + "attempted to create `Error` with out of range `errno`: {}\n", errno ); code::EINVAL From ccc2f5a436fbb0ae1fb598932a9b8e48423c1959 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 4 Mar 2025 09:50:23 +0100 Subject: [PATCH 272/503] net: dsa: mt7530: Fix traffic flooding for MMIO devices On MMIO devices (e.g. MT7988 or EN7581) unicast traffic received on lanX port is flooded on all other user ports if the DSA switch is configured without VLAN support since PORT_MATRIX in PCR regs contains all user ports. Similar to MDIO devices (e.g. MT7530 and MT7531) fix the issue defining default VLAN-ID 0 for MT7530 MMIO devices. Fixes: 110c18bfed414 ("net: dsa: mt7530: introduce driver for MT7988 built-in switch") Signed-off-by: Lorenzo Bianconi Reviewed-by: Chester A. Unal Link: https://patch.msgid.link/20250304-mt7988-flooding-fix-v1-1-905523ae83e9@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 1c83af805209c..5883eb93efb11 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2591,7 +2591,8 @@ mt7531_setup_common(struct dsa_switch *ds) if (ret < 0) return ret; - return 0; + /* Setup VLAN ID 0 for VLAN-unaware bridges */ + return mt7530_setup_vlan0(priv); } static int @@ -2687,11 +2688,6 @@ mt7531_setup(struct dsa_switch *ds) if (ret) return ret; - /* Setup VLAN ID 0 for VLAN-unaware bridges */ - ret = mt7530_setup_vlan0(priv); - if (ret) - return ret; - ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; From 0a7565ee6ec31eb16c0476adbfc1af3f2271cb6b Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Thu, 13 Feb 2025 19:38:50 -0800 Subject: [PATCH 273/503] Revert "selftests/mm: remove local __NR_* definitions" This reverts commit a5c6bc590094a1a73cf6fa3f505e1945d2bf2461. The general approach described in commit e076eaca5906 ("selftests: break the dependency upon local header files") was taken one step too far here: it should not have been extended to include the syscall numbers. This is because doing so would require per-arch support in tools/include/uapi, and no such support exists. This revert fixes two separate reports of test failures, from Dave Hansen[1], and Li Wang[2]. An excerpt of Dave's report: Before this commit (a5c6bc590094a1a73cf6fa3f505e1945d2bf2461) things are fine. But after, I get: running PKEY tests for unsupported CPU/OS An excerpt of Li's report: I just found that mlock2_() return a wrong value in mlock2-test [1] https://lore.kernel.org/dc585017-6740-4cab-a536-b12b37a7582d@intel.com [2] https://lore.kernel.org/CAEemH2eW=UMu9+turT2jRie7+6ewUazXmA6kL+VBo3cGDGU6RA@mail.gmail.com Link: https://lkml.kernel.org/r/20250214033850.235171-1-jhubbard@nvidia.com Fixes: a5c6bc590094 ("selftests/mm: remove local __NR_* definitions") Signed-off-by: John Hubbard Cc: Dave Hansen Cc: Li Wang Cc: David Hildenbrand Cc: Jeff Xu Cc: Andrei Vagin Cc: Axel Rasmussen Cc: Christian Brauner Cc: Kees Cook Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Muhammad Usama Anjum Cc: Peter Xu Cc: Rich Felker Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/hugepage-mremap.c | 2 +- tools/testing/selftests/mm/ksm_functional_tests.c | 8 +++++++- tools/testing/selftests/mm/memfd_secret.c | 14 +++++++++++++- tools/testing/selftests/mm/mkdirty.c | 8 +++++++- tools/testing/selftests/mm/mlock2.h | 1 - tools/testing/selftests/mm/protection_keys.c | 2 +- tools/testing/selftests/mm/uffd-common.c | 4 ++++ tools/testing/selftests/mm/uffd-stress.c | 15 ++++++++++++++- tools/testing/selftests/mm/uffd-unit-tests.c | 14 +++++++++++++- 9 files changed, 60 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index ada9156cc497b..c463d1c09c9b4 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -15,7 +15,7 @@ #define _GNU_SOURCE #include #include -#include +#include #include #include #include /* Definition of O_* constants */ diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 66b4e111b5a27..b61803e36d1cf 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -369,6 +369,7 @@ static void test_unmerge_discarded(void) munmap(map, size); } +#ifdef __NR_userfaultfd static void test_unmerge_uffd_wp(void) { struct uffdio_writeprotect uffd_writeprotect; @@ -429,6 +430,7 @@ static void test_unmerge_uffd_wp(void) unmap: munmap(map, size); } +#endif /* Verify that KSM can be enabled / queried with prctl. */ static void test_prctl(void) @@ -684,7 +686,9 @@ int main(int argc, char **argv) exit(test_child_ksm()); } +#ifdef __NR_userfaultfd tests++; +#endif ksft_print_header(); ksft_set_plan(tests); @@ -696,7 +700,9 @@ int main(int argc, char **argv) test_unmerge(); test_unmerge_zero_pages(); test_unmerge_discarded(); +#ifdef __NR_userfaultfd test_unmerge_uffd_wp(); +#endif test_prot_none(); diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 74c911aa3aea9..9a0597310a765 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include @@ -28,6 +28,8 @@ #define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__) #define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__) +#ifdef __NR_memfd_secret + #define PATTERN 0x55 static const int prot = PROT_READ | PROT_WRITE; @@ -332,3 +334,13 @@ int main(int argc, char *argv[]) ksft_finished(); } + +#else /* __NR_memfd_secret */ + +int main(int argc, char *argv[]) +{ + printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_memfd_secret */ diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index af2fce496912b..09feeb4536460 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -9,7 +9,7 @@ */ #include #include -#include +#include #include #include #include @@ -265,6 +265,7 @@ static void test_pte_mapped_thp(void) munmap(mmap_mem, mmap_size); } +#ifdef __NR_userfaultfd static void test_uffdio_copy(void) { struct uffdio_register uffdio_register; @@ -322,6 +323,7 @@ static void test_uffdio_copy(void) munmap(dst, pagesize); free(src); } +#endif /* __NR_userfaultfd */ int main(void) { @@ -334,7 +336,9 @@ int main(void) thpsize / 1024); tests += 3; } +#ifdef __NR_userfaultfd tests += 1; +#endif /* __NR_userfaultfd */ ksft_print_header(); ksft_set_plan(tests); @@ -364,7 +368,9 @@ int main(void) if (thpsize) test_pte_mapped_thp(); /* Placing a fresh page via userfaultfd may set the PTE dirty. */ +#ifdef __NR_userfaultfd test_uffdio_copy(); +#endif /* __NR_userfaultfd */ err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 1e5731bab499a..4417eaa5cfb78 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -3,7 +3,6 @@ #include #include #include -#include static int mlock2_(void *start, size_t len, int flags) { diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index a4683f2476f27..35565af308af6 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 717539eddf987..7ad6ba660c7d6 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -673,7 +673,11 @@ int uffd_open_dev(unsigned int flags) int uffd_open_sys(unsigned int flags) { +#ifdef __NR_userfaultfd return syscall(__NR_userfaultfd, flags); +#else + return -1; +#endif } int uffd_open(unsigned int flags) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index a4b83280998ab..944d559ade21f 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -33,10 +33,11 @@ * pthread_mutex_lock will also verify the atomicity of the memory * transfer (UFFDIO_COPY). */ -#include + #include "uffd-common.h" uint64_t features; +#ifdef __NR_userfaultfd #define BOUNCE_RANDOM (1<<0) #define BOUNCE_RACINGFAULTS (1<<1) @@ -471,3 +472,15 @@ int main(int argc, char **argv) nr_pages, nr_pages_per_cpu); return userfaultfd_stress(); } + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 9ff71fa1f9bf0..74c8bc02b5063 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -5,11 +5,12 @@ * Copyright (C) 2015-2023 Red Hat, Inc. */ -#include #include "uffd-common.h" #include "../../../../mm/gup_test.h" +#ifdef __NR_userfaultfd + /* The unit test doesn't need a large or random size, make it 32MB for now */ #define UFFD_TEST_MEM_SIZE (32UL << 20) @@ -1558,3 +1559,14 @@ int main(int argc, char *argv[]) return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS; } +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("Skipping %s (missing __NR_userfaultfd)\n", __file__); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ From 349db086a66051bc6114b64b4446787c20ac3f00 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 17 Feb 2025 10:23:04 -0800 Subject: [PATCH 274/503] selftests/damon/damos_quota_goal: handle minimum quota that cannot be further reduced damos_quota_goal.py selftest see if DAMOS quota goals tuning feature increases or reduces the effective size quota for given score as expected. The tuning feature sets the minimum quota size as one byte, so if the effective size quota is already one, we cannot expect it further be reduced. However the test is not aware of the edge case, and fails since it shown no expected change of the effective quota. Handle the case by updating the failure logic for no change to see if it was the case, and simply skips to next test input. Link: https://lkml.kernel.org/r/20250217182304.45215-1-sj@kernel.org Fixes: f1c07c0a1662 ("selftests/damon: add a test for DAMOS quota goal") Signed-off-by: SeongJae Park Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202502171423.b28a918d-lkp@intel.com Cc: Shuah Khan Cc: [6.10.x] Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/damos_quota_goal.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py index 18246f3b62f7e..f76e0412b564c 100755 --- a/tools/testing/selftests/damon/damos_quota_goal.py +++ b/tools/testing/selftests/damon/damos_quota_goal.py @@ -63,6 +63,9 @@ def main(): if last_effective_bytes != 0 else -1.0)) if last_effective_bytes == goal.effective_bytes: + # effective quota was already minimum that cannot be more reduced + if expect_increase is False and last_effective_bytes == 1: + continue print('efective bytes not changed: %d' % goal.effective_bytes) exit(1) From 7277dd0a0ba4f8259f7abe37c4b7280fbfc2a182 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Tue, 18 Feb 2025 00:00:17 +0800 Subject: [PATCH 275/503] m68k: sun3: add check for __pgd_alloc() Add check for the return value of __pgd_alloc() in pgd_alloc() to prevent null pointer dereference. Link: https://lkml.kernel.org/r/20250217160017.2375536-1-haoxiang_li2024@163.com Fixes: a9b3c355c2e6 ("asm-generic: pgalloc: provide generic __pgd_{alloc,free}") Signed-off-by: Haoxiang Li Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Cc: Dave Hansen Cc: Kevin Brodsky Cc: Qi Zheng Cc: Sam Creasey Cc: Signed-off-by: Andrew Morton --- arch/m68k/include/asm/sun3_pgalloc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index f1ae4ed890db5..80afc3a187249 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -44,8 +44,10 @@ static inline pgd_t * pgd_alloc(struct mm_struct *mm) pgd_t *new_pgd; new_pgd = __pgd_alloc(mm, 0); - memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE); - memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT)); + if (likely(new_pgd != NULL)) { + memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE); + memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT)); + } return new_pgd; } From a564ccfe300fa6a065beda06ab7f3c140d6b4d63 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 17 Feb 2025 10:49:24 +0800 Subject: [PATCH 276/503] arm: pgtable: fix NULL pointer dereference issue When update_mmu_cache_range() is called by update_mmu_cache(), the vmf parameter is NULL, which will cause a NULL pointer dereference issue in adjust_pte(): Unable to handle kernel NULL pointer dereference at virtual address 00000030 when read Hardware name: Atmel AT91SAM9 PC is at update_mmu_cache_range+0x1e0/0x278 LR is at pte_offset_map_rw_nolock+0x18/0x2c Call trace: update_mmu_cache_range from remove_migration_pte+0x29c/0x2ec remove_migration_pte from rmap_walk_file+0xcc/0x130 rmap_walk_file from remove_migration_ptes+0x90/0xa4 remove_migration_ptes from migrate_pages_batch+0x6d4/0x858 migrate_pages_batch from migrate_pages+0x188/0x488 migrate_pages from compact_zone+0x56c/0x954 compact_zone from compact_node+0x90/0xf0 compact_node from kcompactd+0x1d4/0x204 kcompactd from kthread+0x120/0x12c kthread from ret_from_fork+0x14/0x38 Exception stack(0xc0d8bfb0 to 0xc0d8bff8) To fix it, do not rely on whether 'ptl' is equal to decide whether to hold the pte lock, but decide it by whether CONFIG_SPLIT_PTE_PTLOCKS is enabled. In addition, if two vmas map to the same PTE page, there is no need to hold the pte lock again, otherwise a deadlock will occur. Just add the need_lock parameter to let adjust_pte() know this information. Link: https://lkml.kernel.org/r/20250217024924.57996-1-zhengqi.arch@bytedance.com Fixes: fc9c45b71f43 ("arm: adjust_pte() use pte_offset_map_rw_nolock()") Signed-off-by: Qi Zheng Reported-by: Ezra Buehler Closes: https://lore.kernel.org/lkml/CAM1KZSmZ2T_riHvay+7cKEFxoPgeVpHkVFTzVVEQ1BO0cLkHEQ@mail.gmail.com/ Acked-by: David Hildenbrand Tested-by: Ezra Buehler Cc: Hugh Dickins Cc: Muchun Song Cc: Qi Zheng Cc: Russel King Cc: Ryan Roberts Cc: Signed-off-by: Andrew Morton --- arch/arm/mm/fault-armv.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 2bec87c3327d2..39fd5df733178 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -62,7 +62,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, } static int adjust_pte(struct vm_area_struct *vma, unsigned long address, - unsigned long pfn, struct vm_fault *vmf) + unsigned long pfn, bool need_lock) { spinlock_t *ptl; pgd_t *pgd; @@ -99,12 +99,11 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, if (!pte) return 0; - /* - * If we are using split PTE locks, then we need to take the page - * lock here. Otherwise we are using shared mm->page_table_lock - * which is already locked, thus cannot take it. - */ - if (ptl != vmf->ptl) { + if (need_lock) { + /* + * Use nested version here to indicate that we are already + * holding one similar spinlock. + */ spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) { pte_unmap_unlock(pte, ptl); @@ -114,7 +113,7 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, ret = do_adjust_pte(vma, address, pfn, pte); - if (ptl != vmf->ptl) + if (need_lock) spin_unlock(ptl); pte_unmap(pte); @@ -123,9 +122,10 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, static void make_coherent(struct address_space *mapping, struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, unsigned long pfn, - struct vm_fault *vmf) + unsigned long addr, pte_t *ptep, unsigned long pfn) { + const unsigned long pmd_start_addr = ALIGN_DOWN(addr, PMD_SIZE); + const unsigned long pmd_end_addr = pmd_start_addr + PMD_SIZE; struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; unsigned long offset; @@ -141,6 +141,14 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, */ flush_dcache_mmap_lock(mapping); vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { + /* + * If we are using split PTE locks, then we need to take the pte + * lock. Otherwise we are using shared mm->page_table_lock which + * is already locked, thus cannot take it. + */ + bool need_lock = IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS); + unsigned long mpnt_addr; + /* * If this VMA is not in our MM, we can ignore it. * Note that we intentionally mask out the VMA @@ -151,7 +159,12 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, if (!(mpnt->vm_flags & VM_MAYSHARE)) continue; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; - aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf); + mpnt_addr = mpnt->vm_start + offset; + + /* Avoid deadlocks by not grabbing the same PTE lock again. */ + if (mpnt_addr >= pmd_start_addr && mpnt_addr < pmd_end_addr) + need_lock = false; + aliases += adjust_pte(mpnt, mpnt_addr, pfn, need_lock); } flush_dcache_mmap_unlock(mapping); if (aliases) @@ -194,7 +207,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, __flush_dcache_folio(mapping, folio); if (mapping) { if (cache_is_vivt()) - make_coherent(mapping, vma, addr, ptep, pfn, vmf); + make_coherent(mapping, vma, addr, ptep, pfn); else if (vma->vm_flags & VM_EXEC) __flush_icache_all(); } From b81679b1633aa43c0d973adfa816d78c1ed0d032 Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Mon, 17 Feb 2025 09:43:27 +0800 Subject: [PATCH 277/503] mm: memory-failure: update ttu flag inside unmap_poisoned_folio Patch series "mm: memory_failure: unmap poisoned folio during migrate properly", v3. Fix two bugs during folio migration if the folio is poisoned. This patch (of 3): Commit 6da6b1d4a7df ("mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON") introduce TTU_HWPOISON to replace TTU_IGNORE_HWPOISON in order to stop send SIGBUS signal when accessing an error page after a memory error on a clean folio. However during page migration, anon folio must be set with TTU_HWPOISON during unmap_*(). For pagecache we need some policy just like the one in hwpoison_user_mappings to set this flag. So move this policy from hwpoison_user_mappings to unmap_poisoned_folio to handle this warning properly. Warning will be produced during unamp poison folio with the following log: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 365 at mm/rmap.c:1847 try_to_unmap_one+0x8fc/0xd3c Modules linked in: CPU: 1 UID: 0 PID: 365 Comm: bash Tainted: G W 6.13.0-rc1-00018-gacdb4bbda7ab #42 Tainted: [W]=WARN Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : try_to_unmap_one+0x8fc/0xd3c lr : try_to_unmap_one+0x3dc/0xd3c Call trace: try_to_unmap_one+0x8fc/0xd3c (P) try_to_unmap_one+0x3dc/0xd3c (L) rmap_walk_anon+0xdc/0x1f8 rmap_walk+0x3c/0x58 try_to_unmap+0x88/0x90 unmap_poisoned_folio+0x30/0xa8 do_migrate_range+0x4a0/0x568 offline_pages+0x5a4/0x670 memory_block_action+0x17c/0x374 memory_subsys_offline+0x3c/0x78 device_offline+0xa4/0xd0 state_store+0x8c/0xf0 dev_attr_store+0x18/0x2c sysfs_kf_write+0x44/0x54 kernfs_fop_write_iter+0x118/0x1a8 vfs_write+0x3a8/0x4bc ksys_write+0x6c/0xf8 __arm64_sys_write+0x1c/0x28 invoke_syscall+0x44/0x100 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x30/0xd0 el0t_64_sync_handler+0xc8/0xcc el0t_64_sync+0x198/0x19c ---[ end trace 0000000000000000 ]--- [mawupeng1@huawei.com: unmap_poisoned_folio(): remove shadowed local `mapping', per Miaohe] Link: https://lkml.kernel.org/r/20250219060653.3849083-1-mawupeng1@huawei.com Link: https://lkml.kernel.org/r/20250217014329.3610326-1-mawupeng1@huawei.com Link: https://lkml.kernel.org/r/20250217014329.3610326-2-mawupeng1@huawei.com Fixes: 6da6b1d4a7df ("mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON") Signed-off-by: Ma Wupeng Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Acked-by: Miaohe Lin Cc: Ma Wupeng Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/internal.h | 5 ++-- mm/memory-failure.c | 63 ++++++++++++++++++++++----------------------- mm/memory_hotplug.c | 3 ++- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 109ef30fee11f..20b3535935a31 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1115,7 +1115,7 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask) * mm/memory-failure.c */ #ifdef CONFIG_MEMORY_FAILURE -void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu); +int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill); void shake_folio(struct folio *folio); extern int hwpoison_filter(struct page *p); @@ -1138,8 +1138,9 @@ unsigned long page_mapped_in_vma(const struct page *page, struct vm_area_struct *vma); #else -static inline void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) +static inline int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) { + return -EBUSY; } #endif diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 995a15eb67e2c..327e02fdc029d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1556,11 +1556,35 @@ static int get_hwpoison_page(struct page *p, unsigned long flags) return ret; } -void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) +int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) { - if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) { - struct address_space *mapping; + enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON; + struct address_space *mapping; + + if (folio_test_swapcache(folio)) { + pr_err("%#lx: keeping poisoned page in swap cache\n", pfn); + ttu &= ~TTU_HWPOISON; + } + /* + * Propagate the dirty bit from PTEs to struct page first, because we + * need this to decide if we should kill or just drop the page. + * XXX: the dirty test could be racy: set_page_dirty() may not always + * be called inside page lock (it's recommended but not enforced). + */ + mapping = folio_mapping(folio); + if (!must_kill && !folio_test_dirty(folio) && mapping && + mapping_can_writeback(mapping)) { + if (folio_mkclean(folio)) { + folio_set_dirty(folio); + } else { + ttu &= ~TTU_HWPOISON; + pr_info("%#lx: corrupted page was clean: dropped without side effects\n", + pfn); + } + } + + if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) { /* * For hugetlb folios in shared mappings, try_to_unmap * could potentially call huge_pmd_unshare. Because of @@ -1572,7 +1596,7 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) if (!mapping) { pr_info("%#lx: could not lock mapping for mapped hugetlb folio\n", folio_pfn(folio)); - return; + return -EBUSY; } try_to_unmap(folio, ttu|TTU_RMAP_LOCKED); @@ -1580,6 +1604,8 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) } else { try_to_unmap(folio, ttu); } + + return folio_mapped(folio) ? -EBUSY : 0; } /* @@ -1589,8 +1615,6 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) static bool hwpoison_user_mappings(struct folio *folio, struct page *p, unsigned long pfn, int flags) { - enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON; - struct address_space *mapping; LIST_HEAD(tokill); bool unmap_success; int forcekill; @@ -1613,29 +1637,6 @@ static bool hwpoison_user_mappings(struct folio *folio, struct page *p, if (!folio_mapped(folio)) return true; - if (folio_test_swapcache(folio)) { - pr_err("%#lx: keeping poisoned page in swap cache\n", pfn); - ttu &= ~TTU_HWPOISON; - } - - /* - * Propagate the dirty bit from PTEs to struct page first, because we - * need this to decide if we should kill or just drop the page. - * XXX: the dirty test could be racy: set_page_dirty() may not always - * be called inside page lock (it's recommended but not enforced). - */ - mapping = folio_mapping(folio); - if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping && - mapping_can_writeback(mapping)) { - if (folio_mkclean(folio)) { - folio_set_dirty(folio); - } else { - ttu &= ~TTU_HWPOISON; - pr_info("%#lx: corrupted page was clean: dropped without side effects\n", - pfn); - } - } - /* * First collect all the processes that have the page * mapped in dirty form. This has to be done before try_to_unmap, @@ -1643,9 +1644,7 @@ static bool hwpoison_user_mappings(struct folio *folio, struct page *p, */ collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED); - unmap_poisoned_folio(folio, ttu); - - unmap_success = !folio_mapped(folio); + unmap_success = !unmap_poisoned_folio(folio, pfn, flags & MF_MUST_KILL); if (!unmap_success) pr_err("%#lx: failed to unmap page (folio mapcount=%d)\n", pfn, folio_mapcount(folio)); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e3655f07dd6e3..e7e47838fd494 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1833,7 +1833,8 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (WARN_ON(folio_test_lru(folio))) folio_isolate_lru(folio); if (folio_mapped(folio)) - unmap_poisoned_folio(folio, TTU_IGNORE_MLOCK); + unmap_poisoned_folio(folio, pfn, false); + continue; } From 773b9a6aa6d38894b95088e3ed6f8a701d9f50fd Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Mon, 17 Feb 2025 09:43:28 +0800 Subject: [PATCH 278/503] mm: memory-hotplug: check folio ref count first in do_migrate_range If a folio has an increased reference count, folio_try_get() will acquire it, perform necessary operations, and then release it. In the case of a poisoned folio without an elevated reference count (which is unlikely for memory-failure), folio_try_get() will simply bypass it. Therefore, relocate the folio_try_get() function, responsible for checking and acquiring this reference count at first. Link: https://lkml.kernel.org/r/20250217014329.3610326-3-mawupeng1@huawei.com Signed-off-by: Ma Wupeng Acked-by: David Hildenbrand Acked-by: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/memory_hotplug.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e7e47838fd494..a6abd8d4a09c6 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1822,12 +1822,12 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (folio_test_large(folio)) pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; - /* - * HWPoison pages have elevated reference counts so the migration would - * fail on them. It also doesn't make any sense to migrate them in the - * first place. Still try to unmap such a page in case it is still mapped - * (keep the unmap as the catch all safety net). - */ + if (!folio_try_get(folio)) + continue; + + if (unlikely(page_folio(page) != folio)) + goto put_folio; + if (folio_test_hwpoison(folio) || (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { if (WARN_ON(folio_test_lru(folio))) @@ -1835,14 +1835,8 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (folio_mapped(folio)) unmap_poisoned_folio(folio, pfn, false); - continue; - } - - if (!folio_try_get(folio)) - continue; - - if (unlikely(page_folio(page) != folio)) goto put_folio; + } if (!isolate_folio_to_list(folio, &source)) { if (__ratelimit(&migrate_rs)) { From af288a426c3e3552b62595c6138ec6371a17dbba Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Mon, 17 Feb 2025 09:43:29 +0800 Subject: [PATCH 279/503] hwpoison, memory_hotplug: lock folio before unmap hwpoisoned folio Commit b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined) add page poison checks in do_migrate_range in order to make offline hwpoisoned page possible by introducing isolate_lru_page and try_to_unmap for hwpoisoned page. However folio lock must be held before calling try_to_unmap. Add it to fix this problem. Warning will be produced if folio is not locked during unmap: ------------[ cut here ]------------ kernel BUG at ./include/linux/swapops.h:400! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 4 UID: 0 PID: 411 Comm: bash Tainted: G W 6.13.0-rc1-00016-g3c434c7ee82a-dirty #41 Tainted: [W]=WARN Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : try_to_unmap_one+0xb08/0xd3c lr : try_to_unmap_one+0x3dc/0xd3c Call trace: try_to_unmap_one+0xb08/0xd3c (P) try_to_unmap_one+0x3dc/0xd3c (L) rmap_walk_anon+0xdc/0x1f8 rmap_walk+0x3c/0x58 try_to_unmap+0x88/0x90 unmap_poisoned_folio+0x30/0xa8 do_migrate_range+0x4a0/0x568 offline_pages+0x5a4/0x670 memory_block_action+0x17c/0x374 memory_subsys_offline+0x3c/0x78 device_offline+0xa4/0xd0 state_store+0x8c/0xf0 dev_attr_store+0x18/0x2c sysfs_kf_write+0x44/0x54 kernfs_fop_write_iter+0x118/0x1a8 vfs_write+0x3a8/0x4bc ksys_write+0x6c/0xf8 __arm64_sys_write+0x1c/0x28 invoke_syscall+0x44/0x100 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x30/0xd0 el0t_64_sync_handler+0xc8/0xcc el0t_64_sync+0x198/0x19c Code: f9407be0 b5fff320 d4210000 17ffff97 (d4210000) ---[ end trace 0000000000000000 ]--- Link: https://lkml.kernel.org/r/20250217014329.3610326-4-mawupeng1@huawei.com Fixes: b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined") Signed-off-by: Ma Wupeng Acked-by: David Hildenbrand Acked-by: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/memory_hotplug.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a6abd8d4a09c6..16cf9e17077e3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1832,8 +1832,11 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { if (WARN_ON(folio_test_lru(folio))) folio_isolate_lru(folio); - if (folio_mapped(folio)) + if (folio_mapped(folio)) { + folio_lock(folio); unmap_poisoned_folio(folio, pfn, false); + folio_unlock(folio); + } goto put_folio; } From 04ec365e3fdf136ba5f9053b02fb6c3368a22e83 Mon Sep 17 00:00:00 2001 From: Ujwal Kundur Date: Sat, 15 Feb 2025 16:21:07 +0530 Subject: [PATCH 280/503] Documentation: fix doc link to fault-injection.rst Fix incorrect reference to fault-injection docs Link: https://lkml.kernel.org/r/20250215105106.734-1-ujwal.kundur@gmail.com Signed-off-by: Ujwal Kundur Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1af972a92d06f..35796c290ca35 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2103,7 +2103,7 @@ config FAIL_SKB_REALLOC reallocated, catching possible invalid pointers to the skb. For more information, check - Documentation/dev-tools/fault-injection/fault-injection.rst + Documentation/fault-injection/fault-injection.rst config FAULT_INJECTION_CONFIGFS bool "Configfs interface for fault-injection capabilities" From 19fac3c93991502a22c5132824c40b6a2e64b136 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 18 Feb 2025 10:14:11 +0100 Subject: [PATCH 281/503] dma: kmsan: export kmsan_handle_dma() for modules kmsan_handle_dma() is used by virtio_ring() which can be built as a module. kmsan_handle_dma() needs to be exported otherwise building the virtio_ring fails. Export kmsan_handle_dma for modules. Link: https://lkml.kernel.org/r/20250218091411.MMS3wBN9@linutronix.de Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502150634.qjxwSeJR-lkp@intel.com/ Fixes: 7ade4f10779c ("dma: kmsan: unpoison DMA mappings") Signed-off-by: Sebastian Andrzej Siewior Cc: Alexander Potapenko Cc: Dmitriy Vyukov Cc: Macro Elver Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton --- mm/kmsan/hooks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c index 3ea50f09311fd..3df45c25c1f62 100644 --- a/mm/kmsan/hooks.c +++ b/mm/kmsan/hooks.c @@ -357,6 +357,7 @@ void kmsan_handle_dma(struct page *page, size_t offset, size_t size, size -= to_go; } } +EXPORT_SYMBOL_GPL(kmsan_handle_dma); void kmsan_handle_dma_sg(struct scatterlist *sg, int nents, enum dma_data_direction dir) From c3e998398de48a7528842f05858a3a6bb21002e6 Mon Sep 17 00:00:00 2001 From: gao xu Date: Wed, 19 Feb 2025 01:56:28 +0000 Subject: [PATCH 282/503] mm: fix possible NULL pointer dereference in __swap_duplicate Add a NULL check on the return value of swp_swap_info in __swap_duplicate to prevent crashes caused by NULL pointer dereference. The reason why swp_swap_info() returns NULL is unclear; it may be due to CPU cache issues or DDR bit flips. The probability of this issue is very small - it has been observed to occur approximately 1 in 500,000 times per week. The stack info we encountered is as follows: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058 [RB/E]rb_sreason_str_set: sreason_str set null_pointer Mem abort info: ESR = 0x0000000096000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 39-bit VAs, pgdp=00000008a80e5000 [0000000000000058] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP Skip md ftrace buffer dump for: 0x1609e0 ... pc : swap_duplicate+0x44/0x164 lr : copy_page_range+0x508/0x1e78 sp : ffffffc0f2a699e0 x29: ffffffc0f2a699e0 x28: ffffff8a5b28d388 x27: ffffff8b06603388 x26: ffffffdf7291fe70 x25: 0000000000000006 x24: 0000000000100073 x23: 00000000002d2d2f x22: 0000000000000008 x21: 0000000000000000 x20: 00000000002d2d2f x19: 18000000002d2d2f x18: ffffffdf726faec0 x17: 0000000000000000 x16: 0010000000000001 x15: 0040000000000001 x14: 0400000000000001 x13: ff7ffffffffffb7f x12: ffeffffffffffbff x11: ffffff8a5c7e1898 x10: 0000000000000018 x9 : 0000000000000006 x8 : 1800000000000000 x7 : 0000000000000000 x6 : ffffff8057c01f10 x5 : 000000000000a318 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000006daf200000 x1 : 0000000000000001 x0 : 18000000002d2d2f Call trace: swap_duplicate+0x44/0x164 copy_page_range+0x508/0x1e78 copy_process+0x1278/0x21cc kernel_clone+0x90/0x438 __arm64_sys_clone+0x5c/0x8c invoke_syscall+0x58/0x110 do_el0_svc+0x8c/0xe0 el0_svc+0x38/0x9c el0t_64_sync_handler+0x44/0xec el0t_64_sync+0x1a8/0x1ac Code: 9139c35a 71006f3f 54000568 f8797b55 (f9402ea8) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception SMP: stopping secondary CPUs The patch seems to only provide a workaround, but there are no more effective software solutions to handle the bit flips problem. This path will change the issue from a system crash to a process exception, thereby reducing the impact on the entire machine. akpm: this is probably a kernel bug, but this patch keeps the system running and doesn't reduce that bug's debuggability. Link: https://lkml.kernel.org/r/e223b0e6ba2f4924984b1917cc717bd5@honor.com Signed-off-by: gao xu Reviewed-by: Barry Song Cc: Suren Baghdasaryan Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/swapfile.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/swapfile.c b/mm/swapfile.c index ba19430dd4ead..fab99d67026aa 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3542,6 +3542,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr) int err, i; si = swp_swap_info(entry); + if (WARN_ON_ONCE(!si)) { + pr_err("%s%08lx\n", Bad_file, entry.val); + return -EINVAL; + } offset = swp_offset(entry); VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); From 67bab13307c83fb742c2556b06cdc39dbad27f07 Mon Sep 17 00:00:00 2001 From: Ge Yang Date: Wed, 19 Feb 2025 11:46:44 +0800 Subject: [PATCH 283/503] mm/hugetlb: wait for hugetlb folios to be freed Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing of huge pages if in non-task context"), which supports deferring the freeing of hugetlb pages, the allocation of contiguous memory through cma_alloc() may fail probabilistically. In the CMA allocation process, if it is found that the CMA area is occupied by in-use hugetlb folios, these in-use hugetlb folios need to be migrated to another location. When there are no available hugetlb folios in the free hugetlb pool during the migration of in-use hugetlb folios, new folios are allocated from the buddy system. A temporary state is set on the newly allocated folio. Upon completion of the hugetlb folio migration, the temporary state is transferred from the new folios to the old folios. Normally, when the old folios with the temporary state are freed, it is directly released back to the buddy system. However, due to the deferred freeing of hugetlb pages, the PageBuddy() check fails, ultimately leading to the failure of cma_alloc(). Here is a simplified call trace illustrating the process: cma_alloc() ->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios ->unmap_and_move_huge_page() ->folio_putback_hugetlb() // Free old folios ->test_pages_isolated() ->__test_page_isolated_in_pageblock() ->PageBuddy(page) // Check if the page is in buddy To resolve this issue, we have implemented a function named wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb folios are properly released back to the buddy system after their migration is completed. By invoking wait_for_freed_hugetlb_folios() before calling PageBuddy(), we ensure that PageBuddy() will succeed. Link: https://lkml.kernel.org/r/1739936804-18199-1-git-send-email-yangge1116@126.com Fixes: c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in non-task context") Signed-off-by: Ge Yang Reviewed-by: Muchun Song Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Barry Song <21cnbao@gmail.com> Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 5 +++++ mm/hugetlb.c | 8 ++++++++ mm/page_isolation.c | 10 ++++++++++ 3 files changed, 23 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ec8c0ccc8f959..dbe76d4f1bfc8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -682,6 +682,7 @@ struct huge_bootmem_page { int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); +void wait_for_freed_hugetlb_folios(void); struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, bool cow_from_owner); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, @@ -1066,6 +1067,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn, return 0; } +static inline void wait_for_freed_hugetlb_folios(void) +{ +} + static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, bool cow_from_owner) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 163190e89ea16..811b29f77abf8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) return ret; } +void wait_for_freed_hugetlb_folios(void) +{ + if (llist_empty(&hpage_freelist)) + return; + + flush_work(&free_hpage_work); +} + typedef enum { /* * For either 0/1: we checked the per-vma resv map, and one resv diff --git a/mm/page_isolation.c b/mm/page_isolation.c index c608e9d728655..a051a29e95ad0 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -607,6 +607,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, struct zone *zone; int ret; + /* + * Due to the deferred freeing of hugetlb folios, the hugepage folios may + * not immediately release to the buddy system. This can cause PageBuddy() + * to fail in __test_page_isolated_in_pageblock(). To ensure that the + * hugetlb folios are properly released back to the buddy system, we + * invoke the wait_for_freed_hugetlb_folios() function to wait for the + * release to complete. + */ + wait_for_freed_hugetlb_folios(); + /* * Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free * pages are not aligned to pageblock_nr_pages. From 47b16d0462a460000b8f05dfb1292377ac48f3ca Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sat, 22 Feb 2025 16:19:52 +0000 Subject: [PATCH 284/503] mm: abort vma_modify() on merge out of memory failure The remainder of vma_modify() relies upon the vmg state remaining pristine after a merge attempt. Usually this is the case, however in the one edge case scenario of a merge attempt failing not due to the specified range being unmergeable, but rather due to an out of memory error arising when attempting to commit the merge, this assumption becomes untrue. This results in vmg->start, end being modified, and thus the proceeding attempts to split the VMA will be done with invalid start/end values. Thankfully, it is likely practically impossible for us to hit this in reality, as it would require a maple tree node pre-allocation failure that would likely never happen due to it being 'too small to fail', i.e. the kernel would simply keep retrying reclaim until it succeeded. However, this scenario remains theoretically possible, and what we are doing here is wrong so we must correct it. The safest option is, when this scenario occurs, to simply give up the operation. If we cannot allocate memory to merge, then we cannot allocate memory to split either (perhaps moreso!). Any scenario where this would be happening would be under very extreme (likely fatal) memory pressure, so it's best we give up early. So there is no doubt it is appropriate to simply bail out in this scenario. However, in general we must if at all possible never assume VMG state is stable after a merge attempt, since merge operations update VMG fields. As a result, additionally also make this clear by storing start, end in local variables. The issue was reported originally by syzkaller, and by Brad Spengler (via an off-list discussion), and in both instances it manifested as a triggering of the assert: VM_WARN_ON_VMG(start >= end, vmg); In vma_merge_existing_range(). It seems at least one scenario in which this is occurring is one in which the merge being attempted is due to an madvise() across multiple VMAs which looks like this: start end |<------>| |----------|------| | vma | next | |----------|------| When madvise_walk_vmas() is invoked, we first find vma in the above (determining prev to be equal to vma as we are offset into vma), and then enter the loop. We determine the end of vma that forms part of the range we are madvise()'ing by setting 'tmp' to this value: /* Here vma->vm_start <= start < (end|vma->vm_end) */ tmp = vma->vm_end; We then invoke the madvise() operation via visit(), letting prev get updated to point to vma as part of the operation: /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ error = visit(vma, &prev, start, tmp, arg); Where the visit() function pointer in this instance is madvise_vma_behavior(). As observed in syzkaller reports, it is ultimately madvise_update_vma() that is invoked, calling vma_modify_flags_name() and vma_modify() in turn. Then, in vma_modify(), we attempt the merge: merged = vma_merge_existing_range(vmg); if (merged) return merged; We invoke this with vmg->start, end set to start, tmp as such: start tmp |<--->| |----------|------| | vma | next | |----------|------| We find ourselves in the merge right scenario, but the one in which we cannot remove the middle (we are offset into vma). Here we have a special case where vmg->start, end get set to perhaps unintuitive values - we intended to shrink the middle VMA and expand the next. This means vmg->start, end are set to... vma->vm_start, start. Now the commit_merge() fails, and vmg->start, end are left like this. This means we return to the rest of vma_modify() with vmg->start, end (here denoted as start', end') set as: start' end' |<-->| |----------|------| | vma | next | |----------|------| So we now erroneously try to split accordingly. This is where the unfortunate stuff begins. We start with: /* Split any preceding portion of the VMA. */ if (vma->vm_start < vmg->start) { ... } This doesn't trigger as we are no longer offset into vma at the start. But then we invoke: /* Split any trailing portion of the VMA. */ if (vma->vm_end > vmg->end) { ... } Which does get invoked. This leaves us with: start' end' |<-->| |----|-----|------| | vma| new | next | |----|-----|------| We then return ultimately to madvise_walk_vmas(). Here 'new' is unknown, and putting back the values known in this function we are faced with: start tmp end | | | |----|-----|------| | vma| new | next | |----|-----|------| prev Then: start = tmp; So: start end | | |----|-----|------| | vma| new | next | |----|-----|------| prev The following code does not cause anything to happen: if (prev && start < prev->vm_end) start = prev->vm_end; if (start >= end) break; And then we invoke: if (prev) vma = find_vma(mm, prev->vm_end); Which is where a problem occurs - we don't know about 'new' so we essentially look for the vma after prev, which is new, whereas we actually intended to discover next! So we end up with: start end | | |----|-----|------| |prev| vma | next | |----|-----|------| And we have successfully bypassed all of the checks madvise_walk_vmas() has to ensure early exit should we end up moving out of range. We loop around, and hit: /* Here vma->vm_start <= start < (end|vma->vm_end) */ tmp = vma->vm_end; Oh dear. Now we have: tmp start end | | |----|-----|------| |prev| vma | next | |----|-----|------| We then invoke: /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ error = visit(vma, &prev, start, tmp, arg); Where start == tmp. That is, a zero range. This is not good. We invoke visit() which is madvise_vma_behavior() which does not check the range (for good reason, it assumes all checks have been done before it was called), which in turn finally calls madvise_update_vma(). The madvise_update_vma() function calls vma_modify_flags_name() in turn, which ultimately invokes vma_modify() with... start == end. vma_modify() calls vma_merge_existing_range() and finally we hit: VM_WARN_ON_VMG(start >= end, vmg); Which triggers, as start == end. While it might be useful to add some CONFIG_DEBUG_VM asserts in these instances to catch this kind of error, since we have just eliminated any possibility of that happening, we will add such asserts separately as to reduce churn and aid backporting. Link: https://lkml.kernel.org/r/20250222161952.41957-1-lorenzo.stoakes@oracle.com Fixes: 2f1c6611b0a8 ("mm: introduce vma_merge_struct and abstract vma_merge(),vma_modify()") Signed-off-by: Lorenzo Stoakes Tested-by: Brad Spengler Reported-by: Brad Spengler Reported-by: syzbot+46423ed8fa1f1148c6e4@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/6774c98f.050a0220.25abdd.0991.GAE@google.com/ Cc: Jann Horn Cc: Liam Howlett Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/vma.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mm/vma.c b/mm/vma.c index af1d549b179c9..96bcb372c90e4 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -1509,24 +1509,28 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg) { struct vm_area_struct *vma = vmg->vma; + unsigned long start = vmg->start; + unsigned long end = vmg->end; struct vm_area_struct *merged; /* First, try to merge. */ merged = vma_merge_existing_range(vmg); if (merged) return merged; + if (vmg_nomem(vmg)) + return ERR_PTR(-ENOMEM); /* Split any preceding portion of the VMA. */ - if (vma->vm_start < vmg->start) { - int err = split_vma(vmg->vmi, vma, vmg->start, 1); + if (vma->vm_start < start) { + int err = split_vma(vmg->vmi, vma, start, 1); if (err) return ERR_PTR(err); } /* Split any trailing portion of the VMA. */ - if (vma->vm_end > vmg->end) { - int err = split_vma(vmg->vmi, vma, vmg->end, 0); + if (vma->vm_end > end) { + int err = split_vma(vmg->vmi, vma, end, 0); if (err) return ERR_PTR(err); From 51f271c1940fc9a5f77931ec603b457ea293bd56 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Mon, 24 Feb 2025 19:39:10 +0800 Subject: [PATCH 285/503] mm: swap: add back full cluster when no entry is reclaimed If no swap cache is reclaimed, cluster taken off from full_clusters list will not be put in any list and we can't reclaime HAS_CACHE slots efficiently. Do relocate_cluster for such cluster to avoid inefficiency. Link: https://lkml.kernel.org/r/20250224113910.522439-1-shikemeng@huaweicloud.com Fixes: 3b644773eefd ("mm, swap: reduce contention on device lock") Signed-off-by: Kemeng Shi Reviewed-by: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/swapfile.c b/mm/swapfile.c index fab99d67026aa..a6c41c7ffb037 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -858,6 +858,10 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) offset++; } + /* in case no swap cache is reclaimed */ + if (ci->flags == CLUSTER_FLAG_NONE) + relocate_cluster(si, ci); + unlock_cluster(ci); if (to_scan <= 0) break; From 7a2e7ae5d13658ada38898e5a3a8a40a7910db06 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 23 Feb 2025 00:08:46 +0800 Subject: [PATCH 286/503] mm: swap: use correct step in loop to wait all clusters in wait_for_allocation() Use correct step in loop to wait all clusters in wait_for_allocation(). If we miss some cluster in wait_for_allocation(), use after free may occur as follows: shmem_writepage swapoff folio_alloc_swap get_swap_pages scan_swap_map_slots cluster_alloc_swap_entry alloc_swap_scan_cluster cluster_alloc_range /* SWP_WRITEOK is valid */ if (!(si->flags & SWP_WRITEOK)) ... del_from_avail_list(p, true); ... /* miss the cluster in shmem_writepage */ wait_for_allocation() ... try_to_unuse() memset(si->swap_map + start, usage, nr_pages); swap_range_alloc(si, nr_pages); ci->count += nr_pages; /* return a valid entry */ ... exit_swap_address_space(p->type); ... ... add_to_swap_cache /* dereference swap_address_space(entry) which is NULL */ xas_lock_irq(&xas); Link: https://lkml.kernel.org/r/20250222160850.505274-3-shikemeng@huaweicloud.com Fixes: 9a0ddeb79880 ("mm, swap: hold a reference during scan and cleanup flag usage") Signed-off-by: Kemeng Shi Reviewed-by: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index a6c41c7ffb037..6460b6cb36c99 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2645,7 +2645,6 @@ static void wait_for_allocation(struct swap_info_struct *si) for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) { ci = lock_cluster(si, offset); unlock_cluster(ci); - offset += SWAPFILE_CLUSTER; } } From 57d910cffaa0be981f558ff603e2d896b36b8241 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 23 Feb 2025 00:08:47 +0800 Subject: [PATCH 287/503] mm, swap: avoid BUG_ON in relocate_cluster() If allocation is racy with swapoff, we may call free_cluster for cluster already in free list and trigger BUG_ON() as following: Allocation Swapoff cluster_alloc_swap_entry ... /* may get a free cluster with offset */ offset = xxx; if (offset) ci = lock_cluster(si, offset); ... del_from_avail_list(p, true); si->flags &= ~SWP_WRITEOK; alloc_swap_scan_cluster(si, ci, ...) ... /* failed to alloc entry from free entry */ if (!cluster_alloc_range(...)) break; ... /* add back a free cluster */ relocate_cluster(si, ci); if (!ci->count) free_cluster(si, ci); VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); To prevent the BUG_ON(), call free_cluster() for free cluster to move the cluster to tail of list. Check cluster is not free before calling free_cluster() in relocate_cluster() to avoid BUG_ON(). Link: https://lkml.kernel.org/r/20250222160850.505274-4-shikemeng@huaweicloud.com Fixes: 3b644773eefd ("mm, swap: reduce contention on device lock") Signed-off-by: Kemeng Shi Reviewed-by: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 6460b6cb36c99..df7c4e8b089ca 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -653,7 +653,8 @@ static void relocate_cluster(struct swap_info_struct *si, return; if (!ci->count) { - free_cluster(si, ci); + if (ci->flags != CLUSTER_FLAG_FREE) + free_cluster(si, ci); } else if (ci->count != SWAPFILE_CLUSTER) { if (ci->flags != CLUSTER_FLAG_FRAG) move_cluster(si, ci, &si->frag_clusters[ci->order], From ce6d9c1c2b5cc785016faa11b48b6cd317eb367e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 24 Feb 2025 21:20:02 -0500 Subject: [PATCH 288/503] NFS: fix nfs_release_folio() to not deadlock via kcompactd writeback Add PF_KCOMPACTD flag and current_is_kcompactd() helper to check for it so nfs_release_folio() can skip calling nfs_wb_folio() from kcompactd. Otherwise NFS can deadlock waiting for kcompactd enduced writeback which recurses back to NFS (which triggers writeback to NFSD via NFS loopback mount on the same host, NFSD blocks waiting for XFS's call to __filemap_get_folio): 6070.550357] INFO: task kcompactd0:58 blocked for more than 4435 seconds. {--- [58] "kcompactd0" [<0>] folio_wait_bit+0xe8/0x200 [<0>] folio_wait_writeback+0x2b/0x80 [<0>] nfs_wb_folio+0x80/0x1b0 [nfs] [<0>] nfs_release_folio+0x68/0x130 [nfs] [<0>] split_huge_page_to_list_to_order+0x362/0x840 [<0>] migrate_pages_batch+0x43d/0xb90 [<0>] migrate_pages_sync+0x9a/0x240 [<0>] migrate_pages+0x93c/0x9f0 [<0>] compact_zone+0x8e2/0x1030 [<0>] compact_node+0xdb/0x120 [<0>] kcompactd+0x121/0x2e0 [<0>] kthread+0xcf/0x100 [<0>] ret_from_fork+0x31/0x40 [<0>] ret_from_fork_asm+0x1a/0x30 ---} [akpm@linux-foundation.org: fix build] Link: https://lkml.kernel.org/r/20250225022002.26141-1-snitzer@kernel.org Fixes: 96780ca55e3c ("NFS: fix up nfs_release_folio() to try to release the page") Signed-off-by: Mike Snitzer Cc: Anna Schumaker Cc: Trond Myklebust Cc: Signed-off-by: Andrew Morton --- fs/nfs/file.c | 3 ++- include/linux/compaction.h | 5 +++++ include/linux/sched.h | 2 +- mm/compaction.c | 3 +++ 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1bb646752e466..033feeab8c346 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -457,7 +458,7 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp) /* If the private flag is set, then the folio is not freeable */ if (folio_test_private(folio)) { if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || - current_is_kswapd()) + current_is_kswapd() || current_is_kcompactd()) return false; if (nfs_wb_folio(folio->mapping->host, folio) < 0) return false; diff --git a/include/linux/compaction.h b/include/linux/compaction.h index e947764960496..7bf0c521db634 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -80,6 +80,11 @@ static inline unsigned long compact_gap(unsigned int order) return 2UL << order; } +static inline int current_is_kcompactd(void) +{ + return current->flags & PF_KCOMPACTD; +} + #ifdef CONFIG_COMPACTION extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order); diff --git a/include/linux/sched.h b/include/linux/sched.h index 9632e3318e0d6..9c15365a30c08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1701,7 +1701,7 @@ extern struct pid *cad_pid; #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ -#define PF__HOLE__00010000 0x00010000 +#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ diff --git a/mm/compaction.c b/mm/compaction.c index 12ed8425fa175..a3203d97123ea 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -3181,6 +3181,7 @@ static int kcompactd(void *p) long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC); long timeout = default_timeout; + current->flags |= PF_KCOMPACTD; set_freezable(); pgdat->kcompactd_max_order = 0; @@ -3237,6 +3238,8 @@ static int kcompactd(void *p) pgdat->proactive_compact_trigger = false; } + current->flags &= ~PF_KCOMPACTD; + return 0; } From c29564d8b46f64f5e6e6f1c9c02f7761b7b90963 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Fri, 21 Feb 2025 15:16:25 +0800 Subject: [PATCH 289/503] include/linux/log2.h: mark is_power_of_2() with __always_inline When building kernel with randconfig, there is an error: In function `kvm_is_cr4_bit_set',inlined from `kvm_update_cpuid_runtime' at arch/x86/kvm/cpuid.c:310:9: include/linux/compiler_types.h:542:38: error: call to `__compiletime_assert_380' declared with attribute error: BUILD_BUG_ON failed: !is_power_of_2(cr4_bit). '!is_power_of_2(X86_CR4_OSXSAVE)' is False, but gcc treats is_power_of_2() as non-inline function and a compilation error happens. Fix this by marking is_power_of_2() with __always_inline. Link: https://lkml.kernel.org/r/20250221071624.1356899-1-suhui@nfschina.com Signed-off-by: Su Hui Cc: Binbin Wu Cc: Paolo Bonzini Cc: Sean Christopherson Signed-off-by: Andrew Morton --- include/linux/log2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/log2.h b/include/linux/log2.h index 9f30d087a1281..1366cb688a6d9 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -41,7 +41,7 @@ int __ilog2_u64(u64 n) * *not* considered a power of two. * Return: true if @n is a power of 2, otherwise false. */ -static inline __attribute__((const)) +static __always_inline __attribute__((const)) bool is_power_of_2(unsigned long n) { return (n != 0 && ((n & (n - 1)) == 0)); From 1c684d77dfbcf926e0dd28f6d260e8fdd8a58e85 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 25 Feb 2025 14:23:31 -0800 Subject: [PATCH 290/503] selftests/damon/damos_quota: make real expectation of quota exceeds Patch series "selftests/damon: three fixes for false results". Fix three DAMON selftest bugs that cause two and one false positive failures and successes. This patch (of 3): damos_quota.py assumes the quota will always exceeded. But whether quota will be exceeded or not depend on the monitoring results. Actually the monitored workload has chaning access pattern and hence sometimes the quota may not really be exceeded. As a result, false positive test failures happen. Expect how much time the quota will be exceeded by checking the monitoring results, and use it instead of the naive assumption. Link: https://lkml.kernel.org/r/20250225222333.505646-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250225222333.505646-2-sj@kernel.org Fixes: 51f58c9da14b ("selftests/damon: add a test for DAMOS quota") Signed-off-by: SeongJae Park Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/damos_quota.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py index 7d4c6bb2e3cd2..57c4937aaed28 100755 --- a/tools/testing/selftests/damon/damos_quota.py +++ b/tools/testing/selftests/damon/damos_quota.py @@ -51,16 +51,19 @@ def main(): nr_quota_exceeds = scheme.stats.qt_exceeds wss_collected.sort() + nr_expected_quota_exceeds = 0 for wss in wss_collected: if wss > sz_quota: print('quota is not kept: %s > %s' % (wss, sz_quota)) print('collected samples are as below') print('\n'.join(['%d' % wss for wss in wss_collected])) exit(1) + if wss == sz_quota: + nr_expected_quota_exceeds += 1 - if nr_quota_exceeds < len(wss_collected): - print('quota is not always exceeded: %d > %d' % - (len(wss_collected), nr_quota_exceeds)) + if nr_quota_exceeds < nr_expected_quota_exceeds: + print('quota is exceeded less than expected: %d < %d' % + (nr_quota_exceeds, nr_expected_quota_exceeds)) exit(1) if __name__ == '__main__': From 695469c07a65547acb6e229b3fdf6aaa881817e3 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 25 Feb 2025 14:23:32 -0800 Subject: [PATCH 291/503] selftests/damon/damon_nr_regions: set ops update for merge results check to 100ms damon_nr_regions.py updates max_nr_regions to a number smaller than expected number of real regions and confirms DAMON respect the harsh limit. To give time for DAMON to make changes for the regions, 3 aggregation intervals (300 milliseconds) are given. The internal mechanism works with not only the max_nr_regions, but also sz_limit, though. It avoids merging region if that casn make region of size larger than sz_limit. In the test, sz_limit is set too small to achive the new max_nr_regions, unless it is updated for the new min_nr_regions. But the update is done only once per operations set update interval, which is one second by default. Hence, the test randomly incurs false positive failures. Fix it by setting the ops interval same to aggregation interval, to make sure sz_limit is updated by the time of the check. Link: https://lkml.kernel.org/r/20250225222333.505646-3-sj@kernel.org Fixes: 8bf890c81612 ("selftests/damon/damon_nr_regions: test online-tuned max_nr_regions") Signed-off-by: SeongJae Park Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/damon_nr_regions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py index 2e8a74aff5431..6f1c1d88e3091 100755 --- a/tools/testing/selftests/damon/damon_nr_regions.py +++ b/tools/testing/selftests/damon/damon_nr_regions.py @@ -109,6 +109,7 @@ def main(): attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs attrs.min_nr_regions = 3 attrs.max_nr_regions = 7 + attrs.update_us = 100000 err = kdamonds.kdamonds[0].commit() if err is not None: proc.terminate() From 582ccf78f6090d88b1c7066b1e90b3d9ec952d08 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 25 Feb 2025 14:23:33 -0800 Subject: [PATCH 292/503] selftests/damon/damon_nr_regions: sort collected regiosn before checking with min/max boundaries damon_nr_regions.py starts DAMON, periodically collect number of regions in snapshots, and see if it is in the requested range. The check code assumes the numbers are sorted on the collection list, but there is no such guarantee. Hence this can result in false positive test success. Sort the list before doing the check. Link: https://lkml.kernel.org/r/20250225222333.505646-4-sj@kernel.org Fixes: 781497347d1b ("selftests/damon: implement test for min/max_nr_regions") Signed-off-by: SeongJae Park Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/damon_nr_regions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py index 6f1c1d88e3091..58f3291fed12a 100755 --- a/tools/testing/selftests/damon/damon_nr_regions.py +++ b/tools/testing/selftests/damon/damon_nr_regions.py @@ -65,6 +65,7 @@ def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions): test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % ( real_nr_regions, min_nr_regions, max_nr_regions) + collected_nr_regions.sort() if (collected_nr_regions[0] < min_nr_regions or collected_nr_regions[-1] > max_nr_regions): print('fail %s' % test_name) From c50f8e6053b0503375c2975bf47f182445aebb4c Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 26 Feb 2025 13:14:00 +1300 Subject: [PATCH 293/503] mm: fix kernel BUG when userfaultfd_move encounters swapcache userfaultfd_move() checks whether the PTE entry is present or a swap entry. - If the PTE entry is present, move_present_pte() handles folio migration by setting: src_folio->index = linear_page_index(dst_vma, dst_addr); - If the PTE entry is a swap entry, move_swap_pte() simply copies the PTE to the new dst_addr. This approach is incorrect because, even if the PTE is a swap entry, it can still reference a folio that remains in the swap cache. This creates a race window between steps 2 and 4. 1. add_to_swap: The folio is added to the swapcache. 2. try_to_unmap: PTEs are converted to swap entries. 3. pageout: The folio is written back. 4. Swapcache is cleared. If userfaultfd_move() occurs in the window between steps 2 and 4, after the swap PTE has been moved to the destination, accessing the destination triggers do_swap_page(), which may locate the folio in the swapcache. However, since the folio's index has not been updated to match the destination VMA, do_swap_page() will detect a mismatch. This can result in two critical issues depending on the system configuration. If KSM is disabled, both small and large folios can trigger a BUG during the add_rmap operation due to: page_pgoff(folio, page) != linear_page_index(vma, address) [ 13.336953] page: refcount:6 mapcount:1 mapping:00000000f43db19c index:0xffffaf150 pfn:0x4667c [ 13.337520] head: order:2 mapcount:1 entire_mapcount:0 nr_pages_mapped:1 pincount:0 [ 13.337716] memcg:ffff00000405f000 [ 13.337849] anon flags: 0x3fffc0000020459(locked|uptodate|dirty|owner_priv_1|head|swapbacked|node=0|zone=0|lastcpupid=0xffff) [ 13.338630] raw: 03fffc0000020459 ffff80008507b538 ffff80008507b538 ffff000006260361 [ 13.338831] raw: 0000000ffffaf150 0000000000004000 0000000600000000 ffff00000405f000 [ 13.339031] head: 03fffc0000020459 ffff80008507b538 ffff80008507b538 ffff000006260361 [ 13.339204] head: 0000000ffffaf150 0000000000004000 0000000600000000 ffff00000405f000 [ 13.339375] head: 03fffc0000000202 fffffdffc0199f01 ffffffff00000000 0000000000000001 [ 13.339546] head: 0000000000000004 0000000000000000 00000000ffffffff 0000000000000000 [ 13.339736] page dumped because: VM_BUG_ON_PAGE(page_pgoff(folio, page) != linear_page_index(vma, address)) [ 13.340190] ------------[ cut here ]------------ [ 13.340316] kernel BUG at mm/rmap.c:1380! [ 13.340683] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP [ 13.340969] Modules linked in: [ 13.341257] CPU: 1 UID: 0 PID: 107 Comm: a.out Not tainted 6.14.0-rc3-gcf42737e247a-dirty #299 [ 13.341470] Hardware name: linux,dummy-virt (DT) [ 13.341671] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 13.341815] pc : __page_check_anon_rmap+0xa0/0xb0 [ 13.341920] lr : __page_check_anon_rmap+0xa0/0xb0 [ 13.342018] sp : ffff80008752bb20 [ 13.342093] x29: ffff80008752bb20 x28: fffffdffc0199f00 x27: 0000000000000001 [ 13.342404] x26: 0000000000000000 x25: 0000000000000001 x24: 0000000000000001 [ 13.342575] x23: 0000ffffaf0d0000 x22: 0000ffffaf0d0000 x21: fffffdffc0199f00 [ 13.342731] x20: fffffdffc0199f00 x19: ffff000006210700 x18: 00000000ffffffff [ 13.342881] x17: 6c203d2120296567 x16: 6170202c6f696c6f x15: 662866666f67705f [ 13.343033] x14: 6567617028454741 x13: 2929737365726464 x12: ffff800083728ab0 [ 13.343183] x11: ffff800082996bf8 x10: 0000000000000fd7 x9 : ffff80008011bc40 [ 13.343351] x8 : 0000000000017fe8 x7 : 00000000fffff000 x6 : ffff8000829eebf8 [ 13.343498] x5 : c0000000fffff000 x4 : 0000000000000000 x3 : 0000000000000000 [ 13.343645] x2 : 0000000000000000 x1 : ffff0000062db980 x0 : 000000000000005f [ 13.343876] Call trace: [ 13.344045] __page_check_anon_rmap+0xa0/0xb0 (P) [ 13.344234] folio_add_anon_rmap_ptes+0x22c/0x320 [ 13.344333] do_swap_page+0x1060/0x1400 [ 13.344417] __handle_mm_fault+0x61c/0xbc8 [ 13.344504] handle_mm_fault+0xd8/0x2e8 [ 13.344586] do_page_fault+0x20c/0x770 [ 13.344673] do_translation_fault+0xb4/0xf0 [ 13.344759] do_mem_abort+0x48/0xa0 [ 13.344842] el0_da+0x58/0x130 [ 13.344914] el0t_64_sync_handler+0xc4/0x138 [ 13.345002] el0t_64_sync+0x1ac/0x1b0 [ 13.345208] Code: aa1503e0 f000f801 910f6021 97ff5779 (d4210000) [ 13.345504] ---[ end trace 0000000000000000 ]--- [ 13.345715] note: a.out[107] exited with irqs disabled [ 13.345954] note: a.out[107] exited with preempt_count 2 If KSM is enabled, Peter Xu also discovered that do_swap_page() may trigger an unexpected CoW operation for small folios because ksm_might_need_to_copy() allocates a new folio when the folio index does not match linear_page_index(vma, addr). This patch also checks the swapcache when handling swap entries. If a match is found in the swapcache, it processes it similarly to a present PTE. However, there are some differences. For example, the folio is no longer exclusive because folio_try_share_anon_rmap_pte() is performed during unmapping. Furthermore, in the case of swapcache, the folio has already been unmapped, eliminating the risk of concurrent rmap walks and removing the need to acquire src_folio's anon_vma or lock. Note that for large folios, in the swapcache handling path, we directly return -EBUSY since split_folio() will return -EBUSY regardless if the folio is under writeback or unmapped. This is not an urgent issue, so a follow-up patch may address it separately. [v-songbaohua@oppo.com: minor cleanup according to Peter Xu] Link: https://lkml.kernel.org/r/20250226024411.47092-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20250226001400.9129-1-21cnbao@gmail.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Barry Song Acked-by: Peter Xu Reviewed-by: Suren Baghdasaryan Cc: Andrea Arcangeli Cc: Al Viro Cc: Axel Rasmussen Cc: Brian Geffon Cc: Christian Brauner Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jann Horn Cc: Kalesh Singh Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport (IBM) Cc: Nicolas Geoffray Cc: Ryan Roberts Cc: Shuah Khan Cc: ZhangPeng Cc: Tangquan Zheng Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 74 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index af3dfc3633dbe..c45b672e10d17 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -18,6 +18,7 @@ #include #include #include "internal.h" +#include "swap.h" static __always_inline bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end) @@ -1076,16 +1077,14 @@ static int move_present_pte(struct mm_struct *mm, return err; } -static int move_swap_pte(struct mm_struct *mm, +static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, pte_t *dst_pte, pte_t *src_pte, pte_t orig_dst_pte, pte_t orig_src_pte, pmd_t *dst_pmd, pmd_t dst_pmdval, - spinlock_t *dst_ptl, spinlock_t *src_ptl) + spinlock_t *dst_ptl, spinlock_t *src_ptl, + struct folio *src_folio) { - if (!pte_swp_exclusive(orig_src_pte)) - return -EBUSY; - double_pt_lock(dst_ptl, src_ptl); if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte, @@ -1094,6 +1093,16 @@ static int move_swap_pte(struct mm_struct *mm, return -EAGAIN; } + /* + * The src_folio resides in the swapcache, requiring an update to its + * index and mapping to align with the dst_vma, where a swap-in may + * occur and hit the swapcache after moving the PTE. + */ + if (src_folio) { + folio_move_anon_rmap(src_folio, dst_vma); + src_folio->index = linear_page_index(dst_vma, dst_addr); + } + orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); set_pte_at(mm, dst_addr, dst_pte, orig_src_pte); double_pt_unlock(dst_ptl, src_ptl); @@ -1141,6 +1150,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, __u64 mode) { swp_entry_t entry; + struct swap_info_struct *si = NULL; pte_t orig_src_pte, orig_dst_pte; pte_t src_folio_pte; spinlock_t *src_ptl, *dst_ptl; @@ -1322,6 +1332,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, dst_ptl, src_ptl, src_folio); } else { + struct folio *folio = NULL; + entry = pte_to_swp_entry(orig_src_pte); if (non_swap_entry(entry)) { if (is_migration_entry(entry)) { @@ -1335,9 +1347,53 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, goto out; } - err = move_swap_pte(mm, dst_addr, src_addr, dst_pte, src_pte, - orig_dst_pte, orig_src_pte, dst_pmd, - dst_pmdval, dst_ptl, src_ptl); + if (!pte_swp_exclusive(orig_src_pte)) { + err = -EBUSY; + goto out; + } + + si = get_swap_device(entry); + if (unlikely(!si)) { + err = -EAGAIN; + goto out; + } + /* + * Verify the existence of the swapcache. If present, the folio's + * index and mapping must be updated even when the PTE is a swap + * entry. The anon_vma lock is not taken during this process since + * the folio has already been unmapped, and the swap entry is + * exclusive, preventing rmap walks. + * + * For large folios, return -EBUSY immediately, as split_folio() + * also returns -EBUSY when attempting to split unmapped large + * folios in the swapcache. This issue needs to be resolved + * separately to allow proper handling. + */ + if (!src_folio) + folio = filemap_get_folio(swap_address_space(entry), + swap_cache_index(entry)); + if (!IS_ERR_OR_NULL(folio)) { + if (folio_test_large(folio)) { + err = -EBUSY; + folio_put(folio); + goto out; + } + src_folio = folio; + src_folio_pte = orig_src_pte; + if (!folio_trylock(src_folio)) { + pte_unmap(&orig_src_pte); + pte_unmap(&orig_dst_pte); + src_pte = dst_pte = NULL; + put_swap_device(si); + si = NULL; + /* now we can block and wait */ + folio_lock(src_folio); + goto retry; + } + } + err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte, + orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, + dst_ptl, src_ptl, src_folio); } out: @@ -1354,6 +1410,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, if (src_pte) pte_unmap(src_pte); mmu_notifier_invalidate_range_end(&range); + if (si) + put_swap_device(si); return err; } From 058313515d5aab10d0a01dd634f92ed4a4e71d4c Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 25 Feb 2025 17:52:55 +0800 Subject: [PATCH 294/503] mm: shmem: fix potential data corruption during shmem swapin Alex and Kairui reported some issues (system hang or data corruption) when swapping out or swapping in large shmem folios. This is especially easy to reproduce when the tmpfs is mount with the 'huge=within_size' parameter. Thanks to Kairui's reproducer, the issue can be easily replicated. The root cause of the problem is that swap readahead may asynchronously swap in order 0 folios into the swap cache, while the shmem mapping can still store large swap entries. Then an order 0 folio is inserted into the shmem mapping without splitting the large swap entry, which overwrites the original large swap entry, leading to data corruption. When getting a folio from the swap cache, we should split the large swap entry stored in the shmem mapping if the orders do not match, to fix this issue. Link: https://lkml.kernel.org/r/2fe47c557e74e9df5fe2437ccdc6c9115fa1bf70.1740476943.git.baolin.wang@linux.alibaba.com Fixes: 809bc86517cc ("mm: shmem: support large folio swap out") Signed-off-by: Baolin Wang Reported-by: Alex Xu (Hello71) Reported-by: Kairui Song Closes: https://lore.kernel.org/all/1738717785.im3r5g2vxc.none@localhost/ Tested-by: Kairui Song Cc: David Hildenbrand Cc: Lance Yang Cc: Matthew Wilcow Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 4ea6109a80431..cebbac97a2219 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2253,7 +2253,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct folio *folio = NULL; bool skip_swapcache = false; swp_entry_t swap; - int error, nr_pages; + int error, nr_pages, order, split_order; VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); swap = radix_to_swp_entry(*foliop); @@ -2272,10 +2272,9 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); + order = xa_get_order(&mapping->i_pages, index); if (!folio) { - int order = xa_get_order(&mapping->i_pages, index); bool fallback_order0 = false; - int split_order; /* Or update major stats only when swapin succeeds?? */ if (fault_type) { @@ -2339,6 +2338,29 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, error = -ENOMEM; goto failed; } + } else if (order != folio_order(folio)) { + /* + * Swap readahead may swap in order 0 folios into swapcache + * asynchronously, while the shmem mapping can still stores + * large swap entries. In such cases, we should split the + * large swap entry to prevent possible data corruption. + */ + split_order = shmem_split_large_entry(inode, index, swap, gfp); + if (split_order < 0) { + error = split_order; + goto failed; + } + + /* + * If the large swap entry has already been split, it is + * necessary to recalculate the new swap entry based on + * the old order alignment. + */ + if (split_order > 0) { + pgoff_t offset = index - round_down(index, 1 << split_order); + + swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); + } } alloced: @@ -2346,7 +2368,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, folio_lock(folio); if ((!skip_swapcache && !folio_test_swapcache(folio)) || folio->swap.val != swap.val || - !shmem_confirm_swap(mapping, index, swap)) { + !shmem_confirm_swap(mapping, index, swap) || + xa_get_order(&mapping->i_pages, index) != folio_order(folio)) { error = -EEXIST; goto unlock; } From ea6de4f8f8f32e54662118a97c441a6ad7b24345 Mon Sep 17 00:00:00 2001 From: Sun YangKai Date: Wed, 26 Feb 2025 23:32:43 +0800 Subject: [PATCH 295/503] mm: zswap: use ATOMIC_LONG_INIT to initialize zswap_stored_pages This is currently the only atomic_long_t variable initialized by ATOMIC_INIT macro found in the kernel by using `grep -r atomic_long_t | grep ATOMIC_INIT` This was introduced in 6e1fa555ec77, in which we modified the type of zswap_stored_pages to atomic_long_t, but didn't change the initialization. Link: https://lkml.kernel.org/r/20250226153253.19179-1-sunk67188@gmail.com Fixes: 6e1fa555ec77 ("mm: zswap: modify zswap_stored_pages to be atomic_long_t") Signed-off-by: Sun YangKai Acked-by: Yosry Ahmed Acked-by: David Hildenbrand Cc: Chengming Zhou Cc: Johannes Weiner Cc: Kanchana P Sridhar Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/zswap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zswap.c b/mm/zswap.c index ac9d299e7d0c1..23365e76a3ce3 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -43,7 +43,7 @@ * statistics **********************************/ /* The number of compressed pages currently stored in zswap */ -atomic_long_t zswap_stored_pages = ATOMIC_INIT(0); +atomic_long_t zswap_stored_pages = ATOMIC_LONG_INIT(0); /* * The statistics below are not protected from concurrent access for From 37b338eed10581784e854d4262da05c8d960c748 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 26 Feb 2025 10:55:08 -0800 Subject: [PATCH 296/503] userfaultfd: do not block on locking a large folio with raised refcount Lokesh recently raised an issue about UFFDIO_MOVE getting into a deadlock state when it goes into split_folio() with raised folio refcount. split_folio() expects the reference count to be exactly mapcount + num_pages_in_folio + 1 (see can_split_folio()) and fails with EAGAIN otherwise. If multiple processes are trying to move the same large folio, they raise the refcount (all tasks succeed in that) then one of them succeeds in locking the folio, while others will block in folio_lock() while keeping the refcount raised. The winner of this race will proceed with calling split_folio() and will fail returning EAGAIN to the caller and unlocking the folio. The next competing process will get the folio locked and will go through the same flow. In the meantime the original winner will be retried and will block in folio_lock(), getting into the queue of waiting processes only to repeat the same path. All this results in a livelock. An easy fix would be to avoid waiting for the folio lock while holding folio refcount, similar to madvise_free_huge_pmd() where folio lock is acquired before raising the folio refcount. Since we lock and take a refcount of the folio while holding the PTE lock, changing the order of these operations should not break anything. Modify move_pages_pte() to try locking the folio first and if that fails and the folio is large then return EAGAIN without touching the folio refcount. If the folio is single-page then split_folio() is not called, so we don't have this issue. Lokesh has a reproducer [1] and I verified that this change fixes the issue. [1] https://github.com/lokeshgidra/uffd_move_ioctl_deadlock [akpm@linux-foundation.org: reflow comment to 80 cols, s/end/end up/] Link: https://lkml.kernel.org/r/20250226185510.2732648-2-surenb@google.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Suren Baghdasaryan Reported-by: Lokesh Gidra Reviewed-by: Peter Xu Acked-by: Liam R. Howlett Cc: Andrea Arcangeli Cc: Barry Song <21cnbao@gmail.com> Cc: Barry Song Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jann Horn Cc: Kalesh Singh Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index c45b672e10d17..f5c6b3454f76b 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1250,6 +1250,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, */ if (!src_folio) { struct folio *folio; + bool locked; /* * Pin the page while holding the lock to be sure the @@ -1269,12 +1270,26 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, goto out; } + locked = folio_trylock(folio); + /* + * We avoid waiting for folio lock with a raised + * refcount for large folios because extra refcounts + * will result in split_folio() failing later and + * retrying. If multiple tasks are trying to move a + * large folio we can end up livelocking. + */ + if (!locked && folio_test_large(folio)) { + spin_unlock(src_ptl); + err = -EAGAIN; + goto out; + } + folio_get(folio); src_folio = folio; src_folio_pte = orig_src_pte; spin_unlock(src_ptl); - if (!folio_trylock(src_folio)) { + if (!locked) { pte_unmap(&orig_src_pte); pte_unmap(&orig_dst_pte); src_pte = dst_pte = NULL; From 927e926d72d9155fde3264459fe9bfd7b5e40d28 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 26 Feb 2025 10:55:09 -0800 Subject: [PATCH 297/503] userfaultfd: fix PTE unmapping stack-allocated PTE copies Current implementation of move_pages_pte() copies source and destination PTEs in order to detect concurrent changes to PTEs involved in the move. However these copies are also used to unmap the PTEs, which will fail if CONFIG_HIGHPTE is enabled because the copies are allocated on the stack. Fix this by using the actual PTEs which were kmap()ed. Link: https://lkml.kernel.org/r/20250226185510.2732648-3-surenb@google.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Suren Baghdasaryan Reported-by: Peter Xu Reviewed-by: Peter Xu Cc: Andrea Arcangeli Cc: Barry Song <21cnbao@gmail.com> Cc: Barry Song Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jann Horn Cc: Kalesh Singh Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index f5c6b3454f76b..d06453fa8abae 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1290,8 +1290,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, spin_unlock(src_ptl); if (!locked) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; /* now we can block and wait */ folio_lock(src_folio); @@ -1307,8 +1307,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, /* at this point we have src_folio locked */ if (folio_test_large(src_folio)) { /* split_folio() can block */ - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; err = split_folio(src_folio); if (err) @@ -1333,8 +1333,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, goto out; } if (!anon_vma_trylock_write(src_anon_vma)) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; /* now we can block and wait */ anon_vma_lock_write(src_anon_vma); @@ -1352,8 +1352,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, entry = pte_to_swp_entry(orig_src_pte); if (non_swap_entry(entry)) { if (is_migration_entry(entry)) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; migration_entry_wait(mm, src_pmd, src_addr); err = -EAGAIN; @@ -1396,8 +1396,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, src_folio = folio; src_folio_pte = orig_src_pte; if (!folio_trylock(src_folio)) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; put_swap_device(si); si = NULL; From adae46ac1e38a288b14f0298e27412adcba83f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= Date: Wed, 26 Feb 2025 13:26:27 +0100 Subject: [PATCH 298/503] mm: shmem: remove unnecessary warning in shmem_writepage() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although the scenario where shmem_writepage() is called with info->flags & VM_LOCKED is unlikely to happen, it's still possible, as evidenced by syzbot [1]. However, the warning in this case isn't necessary because the situation is already handled correctly [2]. [2] https://lore.kernel.org/lkml/8afe1f7f-31a2-4fc0-1fbd-f9ba8a116fe3@google.com/ Link: https://lkml.kernel.org/r/20250226-20250221-warning-in-shmem_writepage-v1-1-5ad19420e17e@igalia.com Fixes: 9a976f0c847b ("shmem: skip page split if we're not reclaiming") Signed-off-by: Ricardo Cañuelo Navarro Reported-by: Pengfei Xu Closes: https://lore.kernel.org/lkml/ZZ9PShXjKJkVelNm@xpf.sh.intel.com/ [1] Suggested-by: Hugh Dickins Reviewed-by: Baolin Wang Cc: Florent Revest Cc: Christian Brauner Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: Florent Revest Cc: Luis Chamberalin Signed-off-by: Andrew Morton --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index cebbac97a2219..3fcd1690eedd7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1548,7 +1548,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) if (WARN_ON_ONCE(!wbc->for_reclaim)) goto redirty; - if (WARN_ON_ONCE((info->flags & VM_LOCKED) || sbinfo->noswap)) + if ((info->flags & VM_LOCKED) || sbinfo->noswap) goto redirty; if (!total_swap_pages) From 3685024edd270f7c791f993157d65d3c928f3d6e Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 26 Feb 2025 12:16:09 +0000 Subject: [PATCH 299/503] mm: don't skip arch_sync_kernel_mappings() in error paths Fix callers that previously skipped calling arch_sync_kernel_mappings() if an error occurred during a pgtable update. The call is still required to sync any pgtable updates that may have occurred prior to hitting the error condition. These are theoretical bugs discovered during code review. Link: https://lkml.kernel.org/r/20250226121610.2401743-1-ryan.roberts@arm.com Fixes: 2ba3e6947aed ("mm/vmalloc: track which page-table levels were modified") Fixes: 0c95cba49255 ("mm: apply_to_pte_range warn and fail if a large pte is encountered") Signed-off-by: Ryan Roberts Reviewed-by: Anshuman Khandual Reviewed-by: Catalin Marinas Cc: Christop Hellwig Cc: "Uladzislau Rezki (Sony)" Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 6 ++++-- mm/vmalloc.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index b4d3d4893267c..55d0d49546273 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3051,8 +3051,10 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, next = pgd_addr_end(addr, end); if (pgd_none(*pgd) && !create) continue; - if (WARN_ON_ONCE(pgd_leaf(*pgd))) - return -EINVAL; + if (WARN_ON_ONCE(pgd_leaf(*pgd))) { + err = -EINVAL; + break; + } if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) { if (!create) continue; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a6e7acebe9adf..61981ee1c9d2f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -586,13 +586,13 @@ static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end, mask |= PGTBL_PGD_MODIFIED; err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask); if (err) - return err; + break; } while (pgd++, addr = next, addr != end); if (mask & ARCH_PAGE_TABLE_SYNC_MASK) arch_sync_kernel_mappings(start, end); - return 0; + return err; } /* From 34b82f33cf3f03bc39e9a205a913d790e1520ade Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Wed, 26 Feb 2025 11:23:41 -0500 Subject: [PATCH 300/503] mm: fix finish_fault() handling for large folios When handling faults for anon shmem finish_fault() will attempt to install ptes for the entire folio. Unfortunately if it encounters a single non-pte_none entry in that range it will bail, even if the pte that triggered the fault is still pte_none. When this situation happens the fault will be retried endlessly never making forward progress. This patch fixes this behavior and if it detects that a pte in the range is not pte_none it will fall back to setting a single pte. [bgeffon@google.com: tweak whitespace] Link: https://lkml.kernel.org/r/20250227133236.1296853-1-bgeffon@google.com Link: https://lkml.kernel.org/r/20250226162341.915535-1-bgeffon@google.com Fixes: 43e027e41423 ("mm: memory: extend finish_fault() to support large folio") Signed-off-by: Brian Geffon Suggested-by: Baolin Wang Reported-by: Marek Maslanka Cc: Hugh Dickins Cc: David Hildenbrand Cc: Hugh Dickens Cc: Kefeng Wang Cc: Matthew Wilcow (Oracle) Cc: Suren Baghdasaryan Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 55d0d49546273..b9661ccfa64fd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5185,7 +5185,11 @@ vm_fault_t finish_fault(struct vm_fault *vmf) bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED); int type, nr_pages; - unsigned long addr = vmf->address; + unsigned long addr; + bool needs_fallback = false; + +fallback: + addr = vmf->address; /* Did we COW the page? */ if (is_cow) @@ -5224,7 +5228,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf) * approach also applies to non-anonymous-shmem faults to avoid * inflating the RSS of the process. */ - if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma))) { + if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma)) || + unlikely(needs_fallback)) { nr_pages = 1; } else if (nr_pages > 1) { pgoff_t idx = folio_page_idx(folio, page); @@ -5260,9 +5265,9 @@ vm_fault_t finish_fault(struct vm_fault *vmf) ret = VM_FAULT_NOPAGE; goto unlock; } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) { - update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages); - ret = VM_FAULT_NOPAGE; - goto unlock; + needs_fallback = true; + pte_unmap_unlock(vmf->pte, vmf->ptl); + goto fallback; } folio_ref_add(folio, nr_pages - 1); From eae116d1f0449ade3269ca47a67432622f5c6438 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 25 Feb 2025 22:22:58 -0500 Subject: [PATCH 301/503] Revert "mm/page_alloc.c: don't show protection in zone's ->lowmem_reserve[] for empty zone" Commit 96a5c186efff ("mm/page_alloc.c: don't show protection in zone's ->lowmem_reserve[] for empty zone") removes the protection of lower zones from allocations targeting memory-less high zones. This had an unintended impact on the pattern of reclaims because it makes the high-zone-targeted allocation more likely to succeed in lower zones, which adds pressure to said zones. I.e, the following corresponding checks in zone_watermark_ok/zone_watermark_fast are less likely to trigger: if (free_pages <= min + z->lowmem_reserve[highest_zoneidx]) return false; As a result, we are observing an increase in reclaim and kswapd scans, due to the increased pressure. This was initially observed as increased latency in filesystem operations when benchmarking with fio on a machine with some memory-less zones, but it has since been associated with increased contention in locks related to memory reclaim. By reverting this patch, the original performance was recovered on that machine. The original commit was introduced as a clarification of the /proc/zoneinfo output, so it doesn't seem there are usecases depending on it, making the revert a simple solution. For reference, I collected vmstat with and without this patch on a freshly booted system running intensive randread io from an nvme for 5 minutes. I got: rpm-6.12.0-slfo.1.2 -> pgscan_kswapd 5629543865 Patched -> pgscan_kswapd 33580844 33M scans is similar to what we had in kernels predating this patch. These numbers is fairly representative of the workload on this machine, as measured in several runs. So we are talking about a 2-order of magnitude increase. Link: https://lkml.kernel.org/r/20250226032258.234099-1-krisman@suse.de Fixes: 96a5c186efff ("mm/page_alloc.c: don't show protection in zone's ->lowmem_reserve[] for empty zone") Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Mel Gorman Cc: Baoquan He Cc: Signed-off-by: Andrew Morton --- mm/page_alloc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 579789600a3c7..fe986e6de7a01 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5849,11 +5849,10 @@ static void setup_per_zone_lowmem_reserve(void) for (j = i + 1; j < MAX_NR_ZONES; j++) { struct zone *upper_zone = &pgdat->node_zones[j]; - bool empty = !zone_managed_pages(upper_zone); managed_pages += zone_managed_pages(upper_zone); - if (clear || empty) + if (clear) zone->lowmem_reserve[j] = 0; else zone->lowmem_reserve[j] = managed_pages / ratio; From 88f5a9a945bf25df1f90032baf95457370c2206e Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 27 Feb 2025 17:02:28 +0530 Subject: [PATCH 302/503] MAINTAINERS: .mailmap: update Sumit Garg's email address Update Sumit Garg's email address to @kernel.org. Link: https://lkml.kernel.org/r/20250227113228.1809449-1-sumit.garg@linaro.org Signed-off-by: Sumit Garg Cc: Herbert Xu Cc: Jarkko Sakkinen Cc: Jens Wiklander Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index a897c16d3baef..4a93909286d8b 100644 --- a/.mailmap +++ b/.mailmap @@ -689,6 +689,7 @@ Subbaraman Narayanamurthy Subhash Jadavani Sudarshan Rajagopalan Sudeep Holla Sudeep KarkadaNagesha +Sumit Garg Sumit Semwal Surabhi Vishnoi Sven Eckelmann diff --git a/MAINTAINERS b/MAINTAINERS index 4e17764cb6ed4..5e1be7b25912e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12864,7 +12864,7 @@ F: include/keys/trusted_dcp.h F: security/keys/trusted-keys/trusted_dcp.c KEYS-TRUSTED-TEE -M: Sumit Garg +M: Sumit Garg L: linux-integrity@vger.kernel.org L: keyrings@vger.kernel.org S: Supported @@ -17658,7 +17658,7 @@ F: Documentation/ABI/testing/sysfs-bus-optee-devices F: drivers/tee/optee/ OP-TEE RANDOM NUMBER GENERATOR (RNG) DRIVER -M: Sumit Garg +M: Sumit Garg L: op-tee@lists.trustedfirmware.org S: Maintained F: drivers/char/hw_random/optee-rng.c @@ -23268,7 +23268,7 @@ F: include/media/i2c/tw9910.h TEE SUBSYSTEM M: Jens Wiklander -R: Sumit Garg +R: Sumit Garg L: op-tee@lists.trustedfirmware.org S: Maintained F: Documentation/ABI/testing/sysfs-class-tee From b2ef51c74b0171fde7eb69b6152d3d2f743ef269 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Thu, 27 Feb 2025 15:34:09 +0800 Subject: [PATCH 303/503] rapidio: fix an API misues when rio_add_net() fails rio_add_net() calls device_register() and fails when device_register() fails. Thus, put_device() should be used rather than kfree(). Add "mport->net = NULL;" to avoid a use after free issue. Link: https://lkml.kernel.org/r/20250227073409.3696854-1-haoxiang_li2024@163.com Fixes: e8de370188d0 ("rapidio: add mport char device driver") Signed-off-by: Haoxiang Li Reviewed-by: Dan Carpenter Cc: Alexandre Bounine Cc: Matt Porter Cc: Yang Yingliang Cc: Signed-off-by: Andrew Morton --- drivers/rapidio/devices/rio_mport_cdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 27afbb9d544b7..cbf531d0ba688 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -1742,7 +1742,8 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, err = rio_add_net(net); if (err) { rmcd_debug(RDEV, "failed to register net, err=%d", err); - kfree(net); + put_device(&net->dev); + mport->net = NULL; goto cleanup; } } From e842f9a1edf306bf36fe2a4d847a0b0d458770de Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Thu, 27 Feb 2025 12:11:31 +0800 Subject: [PATCH 304/503] rapidio: add check for rio_add_net() in rio_scan_alloc_net() The return value of rio_add_net() should be checked. If it fails, put_device() should be called to free the memory and give up the reference initialized in rio_add_net(). Link: https://lkml.kernel.org/r/20250227041131.3680761-1-haoxiang_li2024@163.com Fixes: e6b585ca6e81 ("rapidio: move net allocation into core code") Signed-off-by: Yang Yingliang Signed-off-by: Haoxiang Li Cc: Alexandre Bounine Cc: Matt Porter Cc: Dan Carpenter Cc: Signed-off-by: Andrew Morton --- drivers/rapidio/rio-scan.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index fdcf742b2adbc..c12941f71e2cb 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -871,7 +871,10 @@ static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport, dev_set_name(&net->dev, "rnet_%d", net->id); net->dev.parent = &mport->dev; net->dev.release = rio_scan_release_dev; - rio_add_net(net); + if (rio_add_net(net)) { + put_device(&net->dev); + net = NULL; + } } return net; From 8fe9ed44dc29fba0786b7e956d2e87179e407582 Mon Sep 17 00:00:00 2001 From: Hao Zhang Date: Thu, 27 Feb 2025 11:41:29 +0800 Subject: [PATCH 305/503] mm/page_alloc: fix uninitialized variable The variable "compact_result" is not initialized in function __alloc_pages_slowpath(). It causes should_compact_retry() to use an uninitialized value. Initialize variable "compact_result" with the value COMPACT_SKIPPED. BUG: KMSAN: uninit-value in __alloc_pages_slowpath+0xee8/0x16c0 mm/page_alloc.c:4416 __alloc_pages_slowpath+0xee8/0x16c0 mm/page_alloc.c:4416 __alloc_frozen_pages_noprof+0xa4c/0xe00 mm/page_alloc.c:4752 alloc_pages_mpol+0x4cd/0x890 mm/mempolicy.c:2270 alloc_frozen_pages_noprof mm/mempolicy.c:2341 [inline] alloc_pages_noprof mm/mempolicy.c:2361 [inline] folio_alloc_noprof+0x1dc/0x350 mm/mempolicy.c:2371 filemap_alloc_folio_noprof+0xa6/0x440 mm/filemap.c:1019 __filemap_get_folio+0xb9a/0x1840 mm/filemap.c:1970 grow_dev_folio fs/buffer.c:1039 [inline] grow_buffers fs/buffer.c:1105 [inline] __getblk_slow fs/buffer.c:1131 [inline] bdev_getblk+0x2c9/0xab0 fs/buffer.c:1431 getblk_unmovable include/linux/buffer_head.h:369 [inline] ext4_getblk+0x3b7/0xe50 fs/ext4/inode.c:864 ext4_bread_batch+0x9f/0x7d0 fs/ext4/inode.c:933 __ext4_find_entry+0x1ebb/0x36c0 fs/ext4/namei.c:1627 ext4_lookup_entry fs/ext4/namei.c:1729 [inline] ext4_lookup+0x189/0xb40 fs/ext4/namei.c:1797 __lookup_slow+0x538/0x710 fs/namei.c:1793 lookup_slow+0x6a/0xd0 fs/namei.c:1810 walk_component fs/namei.c:2114 [inline] link_path_walk+0xf29/0x1420 fs/namei.c:2479 path_openat+0x30f/0x6250 fs/namei.c:3985 do_filp_open+0x268/0x600 fs/namei.c:4016 do_sys_openat2+0x1bf/0x2f0 fs/open.c:1428 do_sys_open fs/open.c:1443 [inline] __do_sys_openat fs/open.c:1459 [inline] __se_sys_openat fs/open.c:1454 [inline] __x64_sys_openat+0x2a1/0x310 fs/open.c:1454 x64_sys_call+0x36f5/0x3c30 arch/x86/include/generated/asm/syscalls_64.h:258 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Local variable compact_result created at: __alloc_pages_slowpath+0x66/0x16c0 mm/page_alloc.c:4218 __alloc_frozen_pages_noprof+0xa4c/0xe00 mm/page_alloc.c:4752 Link: https://lkml.kernel.org/r/tencent_ED1032321D6510B145CDBA8CBA0093178E09@qq.com Reported-by: syzbot+0cfd5e38e96a5596f2b6@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0cfd5e38e96a5596f2b6 Signed-off-by: Hao Zhang Reviewed-by: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fe986e6de7a01..94917c729120f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4243,6 +4243,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, restart: compaction_retries = 0; no_progress_loops = 0; + compact_result = COMPACT_SKIPPED; compact_priority = DEF_COMPACT_PRIORITY; cpuset_mems_cookie = read_mems_allowed_begin(); zonelist_iter_cookie = zonelist_iter_begin(); From 80da96d735094ea22985ced98bc57fe3a4422921 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 4 Mar 2025 14:41:57 +0100 Subject: [PATCH 306/503] drm/bochs: Fix DPMS regression The recent rewrite with the use of regular atomic helpers broke the DPMS unblanking on X11. Fix it by moving the call of bochs_hw_blank(false) from CRTC mode_set_nofb() to atomic_enable(). Fixes: 2037174993c8 ("drm/bochs: Use regular atomic helpers") Link: https://bugzilla.suse.com/show_bug.cgi?id=1238209 Signed-off-by: Takashi Iwai Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20250304134203.20534-1-tiwai@suse.de --- drivers/gpu/drm/tiny/bochs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index c67e1f9067859..8706763af8fba 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -335,8 +335,6 @@ static void bochs_hw_setmode(struct bochs_device *bochs, struct drm_display_mode bochs->xres, bochs->yres, bochs->bpp, bochs->yres_virtual); - bochs_hw_blank(bochs, false); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_ENABLE, 0); bochs_dispi_write(bochs, VBE_DISPI_INDEX_BPP, bochs->bpp); bochs_dispi_write(bochs, VBE_DISPI_INDEX_XRES, bochs->xres); @@ -506,6 +504,9 @@ static int bochs_crtc_helper_atomic_check(struct drm_crtc *crtc, static void bochs_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { + struct bochs_device *bochs = to_bochs_device(crtc->dev); + + bochs_hw_blank(bochs, false); } static void bochs_crtc_helper_atomic_disable(struct drm_crtc *crtc, From d385c8bceb14665e935419334aa3d3fac2f10456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Wed, 5 Mar 2025 15:58:49 +0100 Subject: [PATCH 307/503] pid: Do not set pid_max in new pid namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is already difficult for users to troubleshoot which of multiple pid limits restricts their workload. The per-(hierarchical-)NS pid_max would contribute to the confusion. Also, the implementation copies the limit upon creation from parent, this pattern showed cumbersome with some attributes in legacy cgroup controllers -- it's subject to race condition between parent's limit modification and children creation and once copied it must be changed in the descendant. Let's do what other places do (ucounts or cgroup limits) -- create new pid namespaces without any limit at all. The global limit (actually any ancestor's limit) is still effectively in place, we avoid the set/unshare race and bumps of global (ancestral) limit have the desired effect on pid namespace that do not care. Link: https://lore.kernel.org/r/20240408145819.8787-1-mkoutny@suse.com/ Link: https://lore.kernel.org/r/20250221170249.890014-1-mkoutny@suse.com/ Fixes: 7863dcc72d0f4 ("pid: allow pid_max to be set per pid namespace") Signed-off-by: Michal Koutný Link: https://lore.kernel.org/r/20250305145849.55491-1-mkoutny@suse.com Signed-off-by: Christian Brauner --- kernel/pid_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 8f6cfec87555a..7098ed44e717d 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -107,7 +107,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns goto out_free_idr; ns->ns.ops = &pidns_operations; - ns->pid_max = parent_pid_ns->pid_max; + ns->pid_max = PID_MAX_LIMIT; err = register_pidns_sysctls(ns); if (err) goto out_free_inum; From 14672f059d83f591afb2ee1fff56858efe055e5a Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Thu, 6 Mar 2025 10:59:53 +0530 Subject: [PATCH 308/503] sched/deadline: Use online cpus for validating runtime The ftrace selftest reported a failure because writing -1 to sched_rt_runtime_us returns -EBUSY. This happens when the possible CPUs are different from active CPUs. Active CPUs are part of one root domain, while remaining CPUs are part of def_root_domain. Since active cpumask is being used, this results in cpus=0 when a non active CPUs is used in the loop. Fix it by looping over the online CPUs instead for validating the bandwidth calculations. Signed-off-by: Shrikanth Hegde Signed-off-by: Ingo Molnar Reviewed-by: Juri Lelli Link: https://lore.kernel.org/r/20250306052954.452005-2-sshegde@linux.ibm.com --- kernel/sched/deadline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 38e4537790af7..ff4df16b5186d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -3189,7 +3189,7 @@ int sched_dl_global_validate(void) * value smaller than the currently allocated bandwidth in * any of the root_domains. */ - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { rcu_read_lock_sched(); if (dl_bw_visited(cpu, gen)) From b1536481c81fb604074da799e4f2d2038a1663f7 Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Thu, 6 Mar 2025 10:59:54 +0530 Subject: [PATCH 309/503] sched/rt: Update limit of sched_rt sysctl in documentation By default fair_server dl_server allocates 5% of the bandwidth to the root domain. Due to this writing any value less than 5% fails due to -EBUSY: $ cat /proc/sys/kernel/sched_rt_period_us 1000000 $ echo 49999 > /proc/sys/kernel/sched_rt_runtime_us -bash: echo: write error: Device or resource busy $ echo 50000 > /proc/sys/kernel/sched_rt_runtime_us $ Since the sched_rt_runtime_us allows -1 as the minimum, put this restriction in the documentation. One should check average of runtime/period in /sys/kernel/debug/sched/fair_server/cpuX/* for exact value. Signed-off-by: Shrikanth Hegde Signed-off-by: Ingo Molnar Reviewed-by: Juri Lelli Link: https://lore.kernel.org/r/20250306052954.452005-3-sshegde@linux.ibm.com --- Documentation/scheduler/sched-rt-group.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/scheduler/sched-rt-group.rst b/Documentation/scheduler/sched-rt-group.rst index 80b05a3009ea2..ab464335d3204 100644 --- a/Documentation/scheduler/sched-rt-group.rst +++ b/Documentation/scheduler/sched-rt-group.rst @@ -102,6 +102,9 @@ The system wide settings are configured under the /proc virtual file system: * sched_rt_period_us takes values from 1 to INT_MAX. * sched_rt_runtime_us takes values from -1 to sched_rt_period_us. * A run time of -1 specifies runtime == period, ie. no limit. + * sched_rt_runtime_us/sched_rt_period_us > 0.05 inorder to preserve + bandwidth for fair dl_server. For accurate value check average of + runtime/period in /sys/kernel/debug/sched/fair_server/cpuX/ 2.2 Default behaviour From cf7ee25e70c6edfac4553d6b671e8b19db1d9573 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 4 Mar 2025 13:59:51 +0800 Subject: [PATCH 310/503] mctp i3c: handle NULL header address daddr can be NULL if there is no neighbour table entry present, in that case the tx packet should be dropped. saddr will usually be set by MCTP core, but check for NULL in case a packet is transmitted by a different protocol. Signed-off-by: Matt Johnston Fixes: c8755b29b58e ("mctp i3c: MCTP I3C driver") Link: https://patch.msgid.link/20250304-mctp-i3c-null-v1-1-4416bbd56540@codeconstruct.com.au Signed-off-by: Paolo Abeni --- drivers/net/mctp/mctp-i3c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c index d247fe483c588..c1e72253063b5 100644 --- a/drivers/net/mctp/mctp-i3c.c +++ b/drivers/net/mctp/mctp-i3c.c @@ -507,6 +507,9 @@ static int mctp_i3c_header_create(struct sk_buff *skb, struct net_device *dev, { struct mctp_i3c_internal_hdr *ihdr; + if (!daddr || !saddr) + return -EINVAL; + skb_push(skb, sizeof(struct mctp_i3c_internal_hdr)); skb_reset_mac_header(skb); ihdr = (void *)skb_mac_header(skb); From 0e7633d7b95b67f1758aea19f8e85621c5f506a3 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 4 Mar 2025 19:10:39 +0100 Subject: [PATCH 311/503] net: ipv6: fix dst ref loop in ila lwtunnel This patch follows commit 92191dd10730 ("net: ipv6: fix dst ref loops in rpl, seg6 and ioam6 lwtunnels") and, on a second thought, the same patch is also needed for ila (even though the config that triggered the issue was pathological, but still, we don't want that to happen). Fixes: 79ff2fc31e0f ("ila: Cache a route to translated address") Cc: Tom Herbert Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20250304181039.35951-1-justin.iurman@uliege.be Signed-off-by: Paolo Abeni --- net/ipv6/ila/ila_lwt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index ff7e734e335b0..ac4bcc623603a 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -88,7 +88,8 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } - if (ilwt->connected) { + /* cache only if we don't create a dst reference loop */ + if (ilwt->connected && orig_dst->lwtstate != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr); local_bh_enable(); From 5da15a9c11c1c47ef573e6805b60a7d8a1687a2a Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Wed, 5 Mar 2025 09:16:55 +0100 Subject: [PATCH 312/503] net: ipv6: fix missing dst ref drop in ila lwtunnel Add missing skb_dst_drop() to drop reference to the old dst before adding the new dst to the skb. Fixes: 79ff2fc31e0f ("ila: Cache a route to translated address") Cc: Tom Herbert Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20250305081655.19032-1-justin.iurman@uliege.be Signed-off-by: Paolo Abeni --- net/ipv6/ila/ila_lwt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index ac4bcc623603a..7d574f5132e2f 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -96,6 +96,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) } } + skb_dst_drop(skb); skb_dst_set(skb, dst); return dst_output(net, sk, skb); From 9a665fe3d967fe46edb4fd2497c7a5cc2dac2f55 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 6 Mar 2025 11:44:41 +0100 Subject: [PATCH 313/503] USB: serial: option: match on interface class for Telit FN990B The device id entries for Telit FN990B ended up matching only on the interface protocol. While this works, the protocol is qualified by the interface class (and subclass) which should have been included. Switch to matching using USB_DEVICE_AND_INTERFACE_INFO() while keeping the entries sorted also by protocol for consistency. Link: https://lore.kernel.org/20250227110655.3647028-2-fabio.porcedda@gmail.com/ Cc: Fabio Porcedda Cc: Daniele Palmas Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 58bd54e8c483a..1ea2870725aca 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1394,22 +1394,22 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c8, 0xff), /* Telit FE910C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x60) }, /* Telit FN990B (rmnet) */ - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x40) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x30), + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x30), /* Telit FN990B (rmnet) */ .driver_info = NCTRL(5) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x60) }, /* Telit FN990B (MBIM) */ - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x40) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x30), + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x30), /* Telit FN990B (MBIM) */ .driver_info = NCTRL(6) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x60) }, /* Telit FN990B (RNDIS) */ - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x40) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x30), + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d2, 0xff, 0xff, 0x30), /* Telit FN990B (RNDIS) */ .driver_info = NCTRL(6) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x60) }, /* Telit FN990B (ECM) */ - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x40) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x30), + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d2, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d2, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d3, 0xff, 0xff, 0x30), /* Telit FN990B (ECM) */ .driver_info = NCTRL(6) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d3, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d3, 0xff, 0xff, 0x60) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 4981bb50392b7515b765da28cf8768ce624c2670 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 4 Mar 2025 10:19:38 +0100 Subject: [PATCH 314/503] USB: serial: option: add Telit Cinterion FE990B compositions Add the following Telit Cinterion FE990B40 compositions: 0x10b0: rmnet + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 7 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10b0 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE990 S: SerialNumber=28c2595e C: #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10b1: MBIM + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10b1 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE990 S: SerialNumber=28c2595e C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10b2: RNDIS + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10b2 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE990 S: SerialNumber=28c2595e C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=ef(misc ) Sub=04 Prot=01 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10b3: ECM + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 11 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10b3 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE990 S: SerialNumber=28c2595e C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Reviewed-by: Daniele Palmas [ johan: use USB_DEVICE_AND_INTERFACE_INFO() and sort by protocol ] Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1ea2870725aca..aeb71d96a8287 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1388,6 +1388,22 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10aa, 0xff), /* Telit FN920C04 (MBIM) */ .driver_info = NCTRL(3) | RSVD(4) | RSVD(5) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b0, 0xff, 0xff, 0x30), /* Telit FE990B (rmnet) */ + .driver_info = NCTRL(5) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b0, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b0, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b1, 0xff, 0xff, 0x30), /* Telit FE990B (MBIM) */ + .driver_info = NCTRL(6) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b1, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b1, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b2, 0xff, 0xff, 0x30), /* Telit FE990B (RNDIS) */ + .driver_info = NCTRL(6) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b2, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b2, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b3, 0xff, 0xff, 0x30), /* Telit FE990B (ECM) */ + .driver_info = NCTRL(6) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b3, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b3, 0xff, 0xff, 0x60) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c0, 0xff), /* Telit FE910C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c4, 0xff), /* Telit FE910C04 (rmnet) */ From 6232f0d8e100a26275bbd773fc56a60af2c95322 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 4 Mar 2025 10:19:39 +0100 Subject: [PATCH 315/503] USB: serial: option: fix Telit Cinterion FE990A name The correct name for FE990 is FE990A so use it in order to avoid confusion with FE990B. Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index aeb71d96a8287..5cd26dac2069f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1368,13 +1368,13 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990A (PCIe) */ .driver_info = RSVD(0) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990 (rmnet) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990A (rmnet) */ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff), /* Telit FE990 (MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff), /* Telit FE990A (MBIM) */ .driver_info = NCTRL(0) | RSVD(1) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1082, 0xff), /* Telit FE990 (RNDIS) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1082, 0xff), /* Telit FE990A (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990A (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a0, 0xff), /* Telit FN20C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(3) }, From 9af152dcf1a06f589f44a74da4ad67e365d4db9a Mon Sep 17 00:00:00 2001 From: Ivan Abramov Date: Thu, 6 Mar 2025 14:20:45 +0300 Subject: [PATCH 316/503] drm/gma500: Add NULL check for pci_gfx_root in mid_get_vbt_data() Since pci_get_domain_bus_and_slot() can return NULL, add NULL check for pci_gfx_root in the mid_get_vbt_data(). This change is similar to the checks implemented in mid_get_fuse_settings() and mid_get_pci_revID(), which were introduced by commit 0cecdd818cd7 ("gma500: Final enables for Oaktrail") as "additional minor bulletproofing". Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: f910b411053f ("gma500: Add the glue to the various BIOS and firmware interfaces") Signed-off-by: Ivan Abramov Signed-off-by: Patrik Jakobsson Link: https://patchwork.freedesktop.org/patch/msgid/20250306112046.17144-1-i.abramov@mt-integration.ru --- drivers/gpu/drm/gma500/mid_bios.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/gma500/mid_bios.c b/drivers/gpu/drm/gma500/mid_bios.c index 7e76790c6a81f..cba97d7db131d 100644 --- a/drivers/gpu/drm/gma500/mid_bios.c +++ b/drivers/gpu/drm/gma500/mid_bios.c @@ -279,6 +279,11 @@ static void mid_get_vbt_data(struct drm_psb_private *dev_priv) 0, PCI_DEVFN(2, 0)); int ret = -1; + if (pci_gfx_root == NULL) { + WARN_ON(1); + return; + } + /* Get the address of the platform config vbt */ pci_read_config_dword(pci_gfx_root, 0xFC, &addr); pci_dev_put(pci_gfx_root); From b5e3956535466187657563b754ba0f1da8626c7f Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 6 Mar 2025 14:39:51 +0800 Subject: [PATCH 317/503] kbuild: install-extmod-build: Fix build when specifying KBUILD_OUTPUT Since commit 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package"), the linux-headers pacman package fails to build when "O=" is set. The build system complains: /mnt/chroot/linux/scripts/Makefile.build:41: mnt/chroots/linux-mainline/pacman/linux-upstream/pkg/linux-upstream-headers/usr//lib/modules/6.14.0-rc3-00350-g771dba31fffc/build/scripts/Makefile: No such file or directory This is because the "srcroot" variable is set to "." and the "build" variable is set to the absolute path. This makes the "src" variables point to wrong directory. Change the "build" variable to a relative path to "." to fix build. Fixes: 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package") Signed-off-by: Inochi Amaoto Signed-off-by: Masahiro Yamada --- scripts/package/install-extmod-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index 2966473b46609..b96538787f3d9 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE) and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"${destdir}"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"$(realpath --relative-base=. "${destdir}")"/scripts rm -f "${destdir}/scripts/Kbuild" fi From fb8286562ecfb585e26b033c5e32e6fb85efb0b3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Mar 2025 04:05:26 +0100 Subject: [PATCH 318/503] netfilter: nf_tables: make destruction work queue pernet The call to flush_work before tearing down a table from the netlink notifier was supposed to make sure that all earlier updates (e.g. rule add) that might reference that table have been processed. Unfortunately, flush_work() waits for the last queued instance. This could be an instance that is different from the one that we must wait for. This is because transactions are protected with a pernet mutex, but the work item is global, so holding the transaction mutex doesn't prevent another netns from queueing more work. Make the work item pernet so that flush_work() will wait for all transactions queued from this netns. A welcome side effect is that we no longer need to wait for transaction objects from foreign netns. The gc work queue is still global. This seems to be ok because nft_set structures are reference counted and each container structure owns a reference on the net namespace. The destroy_list is still protected by a global spinlock rather than pernet one but the hold time is very short anyway. v2: call cancel_work_sync before reaping the remaining tables (Pablo). Fixes: 9f6958ba2e90 ("netfilter: nf_tables: unconditionally flush pending work before notifier") Reported-by: syzbot+5d8c5789c8cb076b2c25@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +++- net/netfilter/nf_tables_api.c | 24 ++++++++++++++---------- net/netfilter/nft_compat.c | 8 ++++---- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 60d5dcdb289c9..803d5f1601f9d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1891,7 +1891,7 @@ void nft_chain_filter_fini(void); void __init nft_chain_route_init(void); void nft_chain_route_fini(void); -void nf_tables_trans_destroy_flush_work(void); +void nf_tables_trans_destroy_flush_work(struct net *net); int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); __be64 nf_jiffies64_to_msecs(u64 input); @@ -1905,6 +1905,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head destroy_list; struct list_head commit_set_list; struct list_head binding_list; struct list_head module_list; @@ -1915,6 +1916,7 @@ struct nftables_pernet { unsigned int base_seq; unsigned int gc_seq; u8 validate_state; + struct work_struct destroy_work; }; extern unsigned int nf_tables_net_id; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a34de9c17cf10..c2df81b7e9505 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -34,7 +34,6 @@ unsigned int nf_tables_net_id __read_mostly; static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); -static LIST_HEAD(nf_tables_destroy_list); static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); static DEFINE_SPINLOCK(nf_tables_gc_list_lock); @@ -125,7 +124,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s table->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); -static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); static void nft_trans_gc_work(struct work_struct *work); static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); @@ -10006,11 +10004,12 @@ static void nft_commit_release(struct nft_trans *trans) static void nf_tables_trans_destroy_work(struct work_struct *w) { + struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work); struct nft_trans *trans, *next; LIST_HEAD(head); spin_lock(&nf_tables_destroy_list_lock); - list_splice_init(&nf_tables_destroy_list, &head); + list_splice_init(&nft_net->destroy_list, &head); spin_unlock(&nf_tables_destroy_list_lock); if (list_empty(&head)) @@ -10024,9 +10023,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) } } -void nf_tables_trans_destroy_flush_work(void) +void nf_tables_trans_destroy_flush_work(struct net *net) { - flush_work(&trans_destroy_work); + struct nftables_pernet *nft_net = nft_pernet(net); + + flush_work(&nft_net->destroy_work); } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); @@ -10484,11 +10485,11 @@ static void nf_tables_commit_release(struct net *net) trans->put_net = true; spin_lock(&nf_tables_destroy_list_lock); - list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); + list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list); spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); - schedule_work(&trans_destroy_work); + schedule_work(&nft_net->destroy_work); mutex_unlock(&nft_net->commit_mutex); } @@ -11853,7 +11854,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(net); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && @@ -11895,6 +11896,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->destroy_list); INIT_LIST_HEAD(&nft_net->commit_set_list); INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); @@ -11903,6 +11905,7 @@ static int __net_init nf_tables_init_net(struct net *net) nft_net->base_seq = 1; nft_net->gc_seq = 0; nft_net->validate_state = NFT_VALIDATE_SKIP; + INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work); return 0; } @@ -11931,14 +11934,17 @@ static void __net_exit nf_tables_exit_net(struct net *net) if (!list_empty(&nft_net->module_list)) nf_tables_module_autoload_cleanup(net); + cancel_work_sync(&nft_net->destroy_work); __nft_release_tables(net); nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); WARN_ON_ONCE(!list_empty(&nft_net->module_list)); WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); + WARN_ON_ONCE(!list_empty(&nft_net->destroy_list)); } static void nf_tables_exit_batch(struct list_head *net_exit_list) @@ -12029,10 +12035,8 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); - nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); - cancel_work_sync(&trans_destroy_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); nf_tables_core_module_exit(); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7ca4f0d21fe2a..72711d62fddfa 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return 0; } -static void nft_compat_wait_for_destructors(void) +static void nft_compat_wait_for_destructors(struct net *net) { /* xtables matches or targets can have side effects, e.g. * creation/destruction of /proc files. @@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void) * work queue. If we have pending invocations we thus * need to wait for those to finish. */ - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(net); } static int @@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); - nft_compat_wait_for_destructors(); + nft_compat_wait_for_destructors(ctx->net); ret = xt_check_target(&par, size, proto, inv); if (ret < 0) { @@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - nft_compat_wait_for_destructors(); + nft_compat_wait_for_destructors(ctx->net); return xt_check_match(&par, size, proto, inv); } From e26f1cfeac6712516bfeed80890da664f4f2e88a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 6 Mar 2025 13:32:54 +0000 Subject: [PATCH 319/503] ASoC: cs42l43: Fix maximum ADC Volume The range of ADC volume is -1 -> 3 (-6 to 18dB) so the number of levels should actually be 4. Fixes: fc918cbe874e ("ASoC: cs42l43: Add support for the cs42l43") Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250306133254.1861046-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 4257dbefe9dd1..d307b56a7f38e 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -1146,7 +1146,7 @@ static const struct snd_kcontrol_new cs42l43_controls[] = { SOC_DOUBLE_R_SX_TLV("ADC Volume", CS42L43_ADC_B_CTRL1, CS42L43_ADC_B_CTRL2, CS42L43_ADC_PGA_GAIN_SHIFT, - 0xF, 5, cs42l43_adc_tlv), + 0xF, 4, cs42l43_adc_tlv), SOC_DOUBLE("PDM1 Invert Switch", CS42L43_DMIC_PDM_CTRL, CS42L43_PDM1L_INV_SHIFT, CS42L43_PDM1R_INV_SHIFT, 1, 0), From 35d99c68af40a8ca175babc5a89ef7e2226fb3ca Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Mon, 3 Mar 2025 10:42:33 +0800 Subject: [PATCH 320/503] btrfs: fix a leaked chunk map issue in read_one_chunk() Add btrfs_free_chunk_map() to free the memory allocated by btrfs_alloc_chunk_map() if btrfs_add_chunk_map() fails. Fixes: 7dc66abb5a47 ("btrfs: use a dedicated data structure for chunk maps") CC: stable@vger.kernel.org Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Haoxiang Li Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f6ae76815e4b5..6f8dcf59b5257 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7151,6 +7151,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, btrfs_err(fs_info, "failed to add chunk map, start=%llu len=%llu: %d", map->start, map->chunk_len, ret); + btrfs_free_chunk_map(map); } return ret; From 391b41f983bf7ff853de44704d8e14e7cc648a9b Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Wed, 5 Mar 2025 16:37:50 +0000 Subject: [PATCH 321/503] gpio: rcar: Fix missing of_node_put() call of_parse_phandle_with_fixed_args() requires its caller to call into of_node_put() on the node pointer from the output structure, but such a call is currently missing. Call into of_node_put() to rectify that. Fixes: 159f8a0209af ("gpio-rcar: Add DT support") Signed-off-by: Fabrizio Castro Reviewed-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250305163753.34913-2-fabrizio.castro.jz@renesas.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-rcar.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index 8e0544e924886..a7a1cdf7ac66d 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -468,7 +468,12 @@ static int gpio_rcar_parse_dt(struct gpio_rcar_priv *p, unsigned int *npins) p->info = *info; ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args); - *npins = ret == 0 ? args.args[2] : RCAR_MAX_GPIO_PER_BANK; + if (ret) { + *npins = RCAR_MAX_GPIO_PER_BANK; + } else { + *npins = args.args[2]; + of_node_put(args.np); + } if (*npins == 0 || *npins > RCAR_MAX_GPIO_PER_BANK) { dev_warn(p->dev, "Invalid number of gpio lines %u, using %u\n", From ff712188daa3fe3ce7e11e530b4dca3826dae14a Mon Sep 17 00:00:00 2001 From: Miao Li Date: Tue, 4 Mar 2025 15:07:57 +0800 Subject: [PATCH 322/503] usb: quirks: Add DELAY_INIT and NO_LPM for Prolific Mass Storage Card Reader When used on Huawei hisi platforms, Prolific Mass Storage Card Reader which the VID:PID is in 067b:2731 might fail to enumerate at boot time and doesn't work well with LPM enabled, combination quirks: USB_QUIRK_DELAY_INIT + USB_QUIRK_NO_LPM fixed the problems. Signed-off-by: Miao Li Cc: stable Link: https://lore.kernel.org/r/20250304070757.139473-1-limiao870622@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index dfcfc142bd5e1..8efbacc5bc341 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -341,6 +341,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0638, 0x0a13), .driver_info = USB_QUIRK_STRING_FETCH_255 }, + /* Prolific Single-LUN Mass Storage Card Reader */ + { USB_DEVICE(0x067b, 0x2731), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_NO_LPM }, + /* Saitek Cyborg Gold Joystick */ { USB_DEVICE(0x06a3, 0x0006), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, From b13abcb7ddd8d38de769486db5bd917537b32ab1 Mon Sep 17 00:00:00 2001 From: Andrei Kuchynski Date: Wed, 5 Mar 2025 11:17:39 +0000 Subject: [PATCH 323/503] usb: typec: ucsi: Fix NULL pointer access Resources should be released only after all threads that utilize them have been destroyed. This commit ensures that resources are not released prematurely by waiting for the associated workqueue to complete before deallocating them. Cc: stable Fixes: b9aa02ca39a4 ("usb: typec: ucsi: Add polling mechanism for partner tasks like alt mode checking") Signed-off-by: Andrei Kuchynski Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250305111739.1489003-2-akuchynski@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 7a56d3f840d75..2a2915b0a645f 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1825,11 +1825,11 @@ static int ucsi_init(struct ucsi *ucsi) err_unregister: for (con = connector; con->port; con++) { + if (con->wq) + destroy_workqueue(con->wq); ucsi_unregister_partner(con); ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON); ucsi_unregister_port_psy(con); - if (con->wq) - destroy_workqueue(con->wq); usb_power_delivery_unregister_capabilities(con->port_sink_caps); con->port_sink_caps = NULL; @@ -2013,10 +2013,6 @@ void ucsi_unregister(struct ucsi *ucsi) for (i = 0; i < ucsi->cap.num_connectors; i++) { cancel_work_sync(&ucsi->connector[i].work); - ucsi_unregister_partner(&ucsi->connector[i]); - ucsi_unregister_altmodes(&ucsi->connector[i], - UCSI_RECIPIENT_CON); - ucsi_unregister_port_psy(&ucsi->connector[i]); if (ucsi->connector[i].wq) { struct ucsi_work *uwork; @@ -2032,6 +2028,11 @@ void ucsi_unregister(struct ucsi *ucsi) destroy_workqueue(ucsi->connector[i].wq); } + ucsi_unregister_partner(&ucsi->connector[i]); + ucsi_unregister_altmodes(&ucsi->connector[i], + UCSI_RECIPIENT_CON); + ucsi_unregister_port_psy(&ucsi->connector[i]); + usb_power_delivery_unregister_capabilities(ucsi->connector[i].port_sink_caps); ucsi->connector[i].port_sink_caps = NULL; usb_power_delivery_unregister_capabilities(ucsi->connector[i].port_source_caps); From 74d42bdb3a4673b1c10d1f457184e4d3c9cb0196 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2025 07:30:42 -1000 Subject: [PATCH 324/503] fs/pipe: express 'pipe_empty()' in terms of 'pipe_occupancy()' That's what 'pipe_full()' does, so it's more consistent. But more importantly it gets the type limits right when the pipe head and tail are no longer necessarily 'unsigned int'. Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index e572e6fc4f81f..4d0a2267e6efc 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -177,23 +177,23 @@ static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) } /** - * pipe_empty - Return true if the pipe is empty + * pipe_occupancy - Return number of slots used in the pipe * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ -static inline bool pipe_empty(unsigned int head, unsigned int tail) +static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) { - return head == tail; + return (pipe_index_t)(head - tail); } /** - * pipe_occupancy - Return number of slots used in the pipe + * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ -static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) +static inline bool pipe_empty(unsigned int head, unsigned int tail) { - return (pipe_index_t)(head - tail); + return !pipe_occupancy(head, tail); } /** From d810d4c27bf34c719243bab9feb0d843edc09fd7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2025 07:33:58 -1000 Subject: [PATCH 325/503] fs/pipe: do not open-code pipe head/tail logic in FIONREAD Rasmus points out that we do indeed have other cases of breakage from the type changes that were introduced on 32-bit targets in order to read the pipe head and tail values atomically (commit 3d252160b818: "fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex"). Fix it up by using the proper helper functions that now deal with the pipe buffer index types properly. This makes the code simpler and more obvious. The compiler does the CSE and loop hoisting of the pipe ring size masking that we used to do manually, so open-coding this was never a good idea. Reported-by: Rasmus Villemoes Link: https://lore.kernel.org/all/87cyeu5zgk.fsf@prevas.dk/ Fixes: 3d252160b818 ("fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex")Cc: Oleg Nesterov Cc: Mateusz Guzik Cc: K Prateek Nayak Cc: Swapnil Sapkal Signed-off-by: Linus Torvalds --- fs/pipe.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index e8e6698f36981..5c872775a6db9 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -614,7 +614,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe = filp->private_data; - unsigned int count, head, tail, mask; + unsigned int count, head, tail; switch (cmd) { case FIONREAD: @@ -622,10 +622,9 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) count = 0; head = pipe->head; tail = pipe->tail; - mask = pipe->ring_size - 1; - while (tail != head) { - count += pipe->bufs[tail & mask].len; + while (!pipe_empty(head, tail)) { + count += pipe_buf(pipe, tail)->len; tail++; } mutex_unlock(&pipe->mutex); From ebb0f38bb47f74b29e267babdbcd2c47d5292aa8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2025 07:53:25 -1000 Subject: [PATCH 326/503] fs/pipe: fix pipe buffer index use in FUSE This was another case that Rasmus pointed out where the direct access to the pipe head and tail pointers broke on 32-bit configurations due to the type changes. As with the pipe FIONREAD case, fix it by using the appropriate helper functions that deal with the right pipe index sizing. Reported-by: Rasmus Villemoes Link: https://lore.kernel.org/all/878qpi5wz4.fsf@prevas.dk/ Fixes: 3d252160b818 ("fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex")Cc: Oleg > Cc: Mateusz Guzik Cc: K Prateek Nayak Cc: Swapnil Sapkal Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2b2d1b7555444..3c9caafca9e29 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2107,7 +2107,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - unsigned int head, tail, mask, count; + unsigned int head, tail, count; unsigned nbuf; unsigned idx; struct pipe_buffer *bufs; @@ -2124,8 +2124,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, head = pipe->head; tail = pipe->tail; - mask = pipe->ring_size - 1; - count = head - tail; + count = pipe_occupancy(head, tail); bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL); if (!bufs) { @@ -2135,8 +2134,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, nbuf = 0; rem = 0; - for (idx = tail; idx != head && rem < len; idx++) - rem += pipe->bufs[idx & mask].len; + for (idx = tail; !pipe_empty(head, idx) && rem < len; idx++) + rem += pipe_buf(pipe, idx)->len; ret = -EINVAL; if (rem < len) @@ -2147,10 +2146,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, struct pipe_buffer *ibuf; struct pipe_buffer *obuf; - if (WARN_ON(nbuf >= count || tail == head)) + if (WARN_ON(nbuf >= count || pipe_empty(head, tail))) goto out_free; - ibuf = &pipe->bufs[tail & mask]; + ibuf = pipe_buf(pipe, tail); obuf = &bufs[nbuf]; if (rem >= ibuf->len) { From 6933c1067fe6df8ddb34dd68bdb2aa172cbd08c8 Mon Sep 17 00:00:00 2001 From: Alban Kurti Date: Thu, 6 Feb 2025 21:07:54 +0000 Subject: [PATCH 327/503] rust: init: add missing newline to pr_info! calls Several pr_info! calls in rust/kernel/init.rs (both in code examples and macro documentation) were missing a newline, causing logs to run together. This commit updates these calls to include a trailing newline, improving readability and consistency with the C side. Fixes: 6841d45a3030 ("rust: init: add `stack_pin_init!` macro") Fixes: 7f8977a7fe6d ("rust: init: add `{pin_}chain` functions to `{Pin}Init`") Fixes: d0fdc3961270 ("rust: init: add `PinnedDrop` trait and macros") Fixes: 4af84c6a85c6 ("rust: init: update expanded macro explanation") Reported-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1139 Signed-off-by: Alban Kurti Link: https://lore.kernel.org/r/20250206-printing_fix-v3-3-a85273b501ae@invicto.ai [ Replaced Closes with Link since it fixes part of the issue. Added one more Fixes tag (still same set of stable kernels). - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/init.rs | 12 ++++++------ rust/kernel/init/macros.rs | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 8bbd5e3398fcb..e25d047f3c827 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -259,7 +259,7 @@ pub mod macros; /// }, /// })); /// let foo: Pin<&mut Foo> = foo; -/// pr_info!("a: {}", &*foo.a.lock()); +/// pr_info!("a: {}\n", &*foo.a.lock()); /// ``` /// /// # Syntax @@ -319,7 +319,7 @@ macro_rules! stack_pin_init { /// }, GFP_KERNEL)?, /// })); /// let foo = foo.unwrap(); -/// pr_info!("a: {}", &*foo.a.lock()); +/// pr_info!("a: {}\n", &*foo.a.lock()); /// ``` /// /// ```rust,ignore @@ -352,7 +352,7 @@ macro_rules! stack_pin_init { /// x: 64, /// }, GFP_KERNEL)?, /// })); -/// pr_info!("a: {}", &*foo.a.lock()); +/// pr_info!("a: {}\n", &*foo.a.lock()); /// # Ok::<_, AllocError>(()) /// ``` /// @@ -882,7 +882,7 @@ pub unsafe trait PinInit: Sized { /// /// impl Foo { /// fn setup(self: Pin<&mut Self>) { - /// pr_info!("Setting up foo"); + /// pr_info!("Setting up foo\n"); /// } /// } /// @@ -986,7 +986,7 @@ pub unsafe trait Init: PinInit { /// /// impl Foo { /// fn setup(&mut self) { - /// pr_info!("Setting up foo"); + /// pr_info!("Setting up foo\n"); /// } /// } /// @@ -1336,7 +1336,7 @@ impl InPlaceWrite for UniqueArc> { /// #[pinned_drop] /// impl PinnedDrop for Foo { /// fn drop(self: Pin<&mut Self>) { -/// pr_info!("Foo is being dropped!"); +/// pr_info!("Foo is being dropped!\n"); /// } /// } /// ``` diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs index 1fd146a832416..b7213962a6a5a 100644 --- a/rust/kernel/init/macros.rs +++ b/rust/kernel/init/macros.rs @@ -45,7 +45,7 @@ //! #[pinned_drop] //! impl PinnedDrop for Foo { //! fn drop(self: Pin<&mut Self>) { -//! pr_info!("{self:p} is getting dropped."); +//! pr_info!("{self:p} is getting dropped.\n"); //! } //! } //! @@ -412,7 +412,7 @@ //! #[pinned_drop] //! impl PinnedDrop for Foo { //! fn drop(self: Pin<&mut Self>) { -//! pr_info!("{self:p} is getting dropped."); +//! pr_info!("{self:p} is getting dropped.\n"); //! } //! } //! ``` @@ -423,7 +423,7 @@ //! // `unsafe`, full path and the token parameter are added, everything else stays the same. //! unsafe impl ::kernel::init::PinnedDrop for Foo { //! fn drop(self: Pin<&mut Self>, _: ::kernel::init::__internal::OnlyCallFromDrop) { -//! pr_info!("{self:p} is getting dropped."); +//! pr_info!("{self:p} is getting dropped.\n"); //! } //! } //! ``` From 50c3e77eb3712a039760345999709ee0fad83447 Mon Sep 17 00:00:00 2001 From: Alban Kurti Date: Thu, 6 Feb 2025 21:07:55 +0000 Subject: [PATCH 328/503] rust: sync: add missing newline in locked_by log example The pr_info! example in rust/kernel/sync/locked_by.rs was missing a newline. This patch appends the missing newline to ensure that log messages for locked resources display correctly. Fixes: 7b1f55e3a984 ("rust: sync: introduce `LockedBy`") Reported-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1139 Signed-off-by: Alban Kurti Link: https://lore.kernel.org/r/20250206-printing_fix-v3-4-a85273b501ae@invicto.ai [ Replaced Closes with Link since it fixes part of the issue. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/sync/locked_by.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/sync/locked_by.rs b/rust/kernel/sync/locked_by.rs index a7b244675c2b9..61f100a45b350 100644 --- a/rust/kernel/sync/locked_by.rs +++ b/rust/kernel/sync/locked_by.rs @@ -55,7 +55,7 @@ use core::{cell::UnsafeCell, mem::size_of, ptr}; /// fn print_bytes_used(dir: &Directory, file: &File) { /// let guard = dir.inner.lock(); /// let inner_file = file.inner.access(&guard); -/// pr_info!("{} {}", guard.bytes_used, inner_file.bytes_used); +/// pr_info!("{} {}\n", guard.bytes_used, inner_file.bytes_used); /// } /// /// /// Increments `bytes_used` for both the directory and file. From 0ea4c3906416cefd6ae7ae5e93af9f2ef1b8c39b Mon Sep 17 00:00:00 2001 From: Alban Kurti Date: Thu, 6 Feb 2025 21:07:56 +0000 Subject: [PATCH 329/503] rust: workqueue: add missing newline to pr_info! examples The documentation examples in rust/kernel/workqueue.rs use pr_info! calls that lack a trailing newline. To maintain consistency with kernel logging practices, this patch adds the newline to all affected examples. Fixes: 15b286d1fd05 ("rust: workqueue: add examples") Reported-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1139 Signed-off-by: Alban Kurti Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250206-printing_fix-v3-5-a85273b501ae@invicto.ai [ Replaced Closes with Link since it fixes part of the issue. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/workqueue.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 0cd100d2aefb4..b7be224cdf4bb 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -60,7 +60,7 @@ //! type Pointer = Arc; //! //! fn run(this: Arc) { -//! pr_info!("The value is: {}", this.value); +//! pr_info!("The value is: {}\n", this.value); //! } //! } //! @@ -108,7 +108,7 @@ //! type Pointer = Arc; //! //! fn run(this: Arc) { -//! pr_info!("The value is: {}", this.value_1); +//! pr_info!("The value is: {}\n", this.value_1); //! } //! } //! @@ -116,7 +116,7 @@ //! type Pointer = Arc; //! //! fn run(this: Arc) { -//! pr_info!("The second value is: {}", this.value_2); +//! pr_info!("The second value is: {}\n", this.value_2); //! } //! } //! From c00b413a96261faef4ce22329153c6abd4acef25 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 6 Mar 2025 16:59:16 +0100 Subject: [PATCH 330/503] x86/boot: Sanitize boot params before parsing command line The 5-level paging code parses the command line to look for the 'no5lvl' string, and does so very early, before sanitize_boot_params() has been called and has been given the opportunity to wipe bogus data from the fields in boot_params that are not covered by struct setup_header, and are therefore supposed to be initialized to zero by the bootloader. This triggers an early boot crash when using syslinux-efi to boot a recent kernel built with CONFIG_X86_5LEVEL=y and CONFIG_EFI_STUB=n, as the 0xff padding that now fills the unused PE/COFF header is copied into boot_params by the bootloader, and interpreted as the top half of the command line pointer. Fix this by sanitizing the boot_params before use. Note that there is no harm in calling this more than once; subsequent invocations are able to spot that the boot_params have already been cleaned up. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: Linus Torvalds Cc: # v6.1+ Link: https://lore.kernel.org/r/20250306155915.342465-2-ardb+git@google.com Closes: https://lore.kernel.org/all/202503041549.35913.ulrich.gemkow@ikr.uni-stuttgart.de --- arch/x86/boot/compressed/pgtable_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index c882e1f67af01..d8c5de40669d3 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "misc.h" #include +#include #include #include #include "pgtable.h" @@ -107,6 +108,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ + sanitize_boot_params(bp); boot_params_ptr = bp; /* From 33255c161ac4be003ac87c434ebc49645d18a929 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Feb 2025 20:07:58 -0500 Subject: [PATCH 331/503] bcachefs: Fix bch2_dev_journal_alloc() spuriously failing Previously, we fixed journal resize spuriousl failing with -BCH_ERR_open_buckets_empty, but initial journal allocation was missed because it didn't invoke the "block on allocator" loop at all. Factor out the "loop on allocator" code to fix that. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 59 +++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 5dabbf3c0965c..05b1250619ecc 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1021,8 +1021,8 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, /* allocate journal on a device: */ -static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - bool new_fs, struct closure *cl) +static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, + bool new_fs, struct closure *cl) { struct bch_fs *c = ca->fs; struct journal_device *ja = &ca->journal; @@ -1150,26 +1150,20 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, return ret; } -/* - * Allocate more journal space at runtime - not currently making use if it, but - * the code works: - */ -int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, - unsigned nr) +static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca, + unsigned nr, bool new_fs) { struct journal_device *ja = &ca->journal; - struct closure cl; int ret = 0; + struct closure cl; closure_init_stack(&cl); - down_write(&c->state_lock); - /* don't handle reducing nr of buckets yet: */ if (nr < ja->nr) - goto unlock; + return 0; - while (ja->nr < nr) { + while (!ret && ja->nr < nr) { struct disk_reservation disk_res = { 0, 0, 0 }; /* @@ -1182,27 +1176,38 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, * filesystem-wide allocation will succeed, this is a device * specific allocation - we can hang here: */ + if (!new_fs) { + ret = bch2_disk_reservation_get(c, &disk_res, + bucket_to_sector(ca, nr - ja->nr), 1, 0); + if (ret) + break; + } - ret = bch2_disk_reservation_get(c, &disk_res, - bucket_to_sector(ca, nr - ja->nr), 1, 0); - if (ret) - break; + ret = bch2_set_nr_journal_buckets_iter(ca, nr, new_fs, &cl); - ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl); + if (ret == -BCH_ERR_bucket_alloc_blocked || + ret == -BCH_ERR_open_buckets_empty) + ret = 0; /* wait and retry */ bch2_disk_reservation_put(c, &disk_res); - closure_sync(&cl); - - if (ret && - ret != -BCH_ERR_bucket_alloc_blocked && - ret != -BCH_ERR_open_buckets_empty) - break; } - bch_err_fn(c, ret); -unlock: + return ret; +} + +/* + * Allocate more journal space at runtime - not currently making use if it, but + * the code works: + */ +int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, + unsigned nr) +{ + down_write(&c->state_lock); + int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false); up_write(&c->state_lock); + + bch_err_fn(c, ret); return ret; } @@ -1228,7 +1233,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) min(1 << 13, (1 << 24) / ca->mi.bucket_size)); - ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL); + ret = bch2_set_nr_journal_buckets_loop(ca->fs, ca, nr, new_fs); err: bch_err_fn(ca, ret); return ret; From 8ba73f53dc5b7545776e09e6982115dcbcbabec4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Feb 2025 11:34:41 -0500 Subject: [PATCH 332/503] bcachefs: copygc now skips non-rw devices There's no point in doing copygc on non-rw devices: the fragmentation doesn't matter if we're not writing to them, and we may not have anywhere to put the data on our other devices. Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 21805509ab9e6..6718dc37c5a35 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -74,20 +74,14 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, struct move_bucket *b, u64 time) { struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c k; - struct bch_alloc_v4 _a; - const struct bch_alloc_v4 *a; - int ret; - if (bch2_bucket_is_open(trans->c, - b->k.bucket.inode, - b->k.bucket.offset)) + if (bch2_bucket_is_open(c, b->k.bucket.inode, b->k.bucket.offset)) return 0; - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, - b->k.bucket, BTREE_ITER_cached); - ret = bkey_err(k); + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, + b->k.bucket, BTREE_ITER_cached); + int ret = bkey_err(k); if (ret) return ret; @@ -95,13 +89,18 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (!ca) goto out; - a = bch2_alloc_to_v4(k, &_a); + if (ca->mi.state != BCH_MEMBER_STATE_rw || + !bch2_dev_is_online(ca)) + goto out_put; + + struct bch_alloc_v4 _a; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); b->k.gen = a->gen; b->sectors = bch2_bucket_sectors_dirty(*a); u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); ret = lru_idx && lru_idx <= time; - +out_put: bch2_dev_put(ca); out: bch2_trans_iter_exit(trans, &iter); From 115ef44a98220fddfab37a39a19370497cd718b9 Mon Sep 17 00:00:00 2001 From: Jun Yang Date: Wed, 5 Mar 2025 23:44:10 +0800 Subject: [PATCH 333/503] sched: address a potential NULL pointer dereference in the GRED scheduler. If kzalloc in gred_init returns a NULL pointer, the code follows the error handling path, invoking gred_destroy. This, in turn, calls gred_offload, where memset could receive a NULL pointer as input, potentially leading to a kernel crash. When table->opt is NULL in gred_init(), gred_change_table_def() is not called yet, so it is not necessary to call ->ndo_setup_tc() in gred_offload(). Signed-off-by: Jun Yang Reviewed-by: Cong Wang Fixes: f25c0515c521 ("net: sched: gred: dynamically allocate tc_gred_qopt_offload") Link: https://patch.msgid.link/20250305154410.3505642-1-juny24602@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_gred.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index ab6234b4fcd54..532fde548b88f 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -913,7 +913,8 @@ static void gred_destroy(struct Qdisc *sch) for (i = 0; i < table->DPs; i++) gred_destroy_vq(table->tab[i]); - gred_offload(sch, TC_GRED_DESTROY); + if (table->opt) + gred_offload(sch, TC_GRED_DESTROY); kfree(table->opt); } From e7112524e5e885181cc5ae4d258f33b9dbe0b907 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 6 Mar 2025 08:27:51 -0800 Subject: [PATCH 334/503] block: Name the RQF flags enum Commit 5f89154e8e9e3445f9b59 ("block: Use enum to define RQF_x bit indexes") converted the RQF flags to an anonymous enum, which was a beneficial change. This patch goes one step further by naming the enum as "rqf_flags". This naming enables exporting these flags to BPF clients, eliminating the need to duplicate these flags in BPF code. Instead, BPF clients can now access the same kernel-side values through CO:RE (Compile Once, Run Everywhere), as shown in this example: rqf_stats = bpf_core_enum_value(enum rqf_flags, __RQF_STATS) Suggested-by: Yonghong Song Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20250306-rqf_flags-v1-1-bbd64918b406@debian.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fa2a76cc2f73d..71f4f0cc3dac6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -28,7 +28,7 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); typedef __u32 __bitwise req_flags_t; /* Keep rqf_name[] in sync with the definitions below */ -enum { +enum rqf_flags { /* drive already may have started this one */ __RQF_STARTED, /* request for flush sequence */ From 00a7d39898c8010bfd5ff62af31ca5db34421b38 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2025 18:25:35 -1000 Subject: [PATCH 335/503] fs/pipe: add simpler helpers for common cases The fix to atomically read the pipe head and tail state when not holding the pipe mutex has caused a number of headaches due to the size change of the involved types. It turns out that we don't have _that_ many places that access these fields directly and were affected, but we have more than we strictly should have, because our low-level helper functions have been designed to have intimate knowledge of how the pipes work. And as a result, that random noise of direct 'pipe->head' and 'pipe->tail' accesses makes it harder to pinpoint any actual potential problem spots remaining. For example, we didn't have a "is the pipe full" helper function, but instead had a "given these pipe buffer indexes and this pipe size, is the pipe full". That's because some low-level pipe code does actually want that much more complicated interface. But most other places literally just want a "is the pipe full" helper, and not having it meant that those places ended up being unnecessarily much too aware of this all. It would have been much better if only the very core pipe code that cared had been the one aware of this all. So let's fix it - better late than never. This just introduces the trivial wrappers for "is this pipe full or empty" and to get how many pipe buffers are used, so that instead of writing if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) the places that literally just want to know if a pipe is full can just say if (pipe_is_full(pipe)) instead. The existing trivial cases were converted with a 'sed' script. This cuts down on the places that access pipe->head and pipe->tail directly outside of the pipe code (and core splice code) quite a lot. The splice code in particular still revels in doing the direct low-level accesses, and the fuse fuse_dev_splice_write() code also seems a bit unnecessarily eager to go very low-level, but it's at least a bit better than it used to be. Signed-off-by: Linus Torvalds --- drivers/char/virtio_console.c | 4 ++-- fs/fuse/dev.c | 2 +- fs/pipe.c | 6 +++--- fs/splice.c | 20 ++++++++++---------- include/linux/pipe_fs_i.h | 27 +++++++++++++++++++++++++++ mm/filemap.c | 7 +++---- mm/shmem.c | 6 +++--- 7 files changed, 49 insertions(+), 23 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 24442485e73e7..18f92dd44d456 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -923,14 +923,14 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, pipe_lock(pipe); ret = 0; - if (pipe_empty(pipe->head, pipe->tail)) + if (pipe_is_empty(pipe)) goto error_out; ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); if (ret < 0) goto error_out; - occupancy = pipe_occupancy(pipe->head, pipe->tail); + occupancy = pipe_buf_usage(pipe); buf = alloc_buf(port->portdev->vdev, 0, occupancy); if (!buf) { diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 3c9caafca9e29..2c3a4d09e500f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1457,7 +1457,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, if (ret < 0) goto out; - if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) { + if (pipe_buf_usage(pipe) + cs.nr_segs > pipe->max_usage) { ret = -EIO; goto out; } diff --git a/fs/pipe.c b/fs/pipe.c index 5c872775a6db9..4d0799e4e7196 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -394,7 +394,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) wake_next_reader = true; mutex_lock(&pipe->mutex); } - if (pipe_empty(pipe->head, pipe->tail)) + if (pipe_is_empty(pipe)) wake_next_reader = false; mutex_unlock(&pipe->mutex); @@ -577,11 +577,11 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); mutex_lock(&pipe->mutex); - was_empty = pipe_empty(pipe->head, pipe->tail); + was_empty = pipe_is_empty(pipe); wake_next_writer = true; } out: - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (pipe_is_full(pipe)) wake_next_writer = false; mutex_unlock(&pipe->mutex); diff --git a/fs/splice.c b/fs/splice.c index 28cfa63aa2364..23fa5561b9441 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -331,7 +331,7 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, int i; /* Work out how much data we can actually add into the pipe */ - used = pipe_occupancy(pipe->head, pipe->tail); + used = pipe_buf_usage(pipe); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); npages = DIV_ROUND_UP(len, PAGE_SIZE); @@ -527,7 +527,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des return -ERESTARTSYS; repeat: - while (pipe_empty(pipe->head, pipe->tail)) { + while (pipe_is_empty(pipe)) { if (!pipe->writers) return 0; @@ -820,7 +820,7 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, if (signal_pending(current)) break; - while (pipe_empty(pipe->head, pipe->tail)) { + while (pipe_is_empty(pipe)) { ret = 0; if (!pipe->writers) goto out; @@ -968,7 +968,7 @@ static ssize_t do_splice_read(struct file *in, loff_t *ppos, return 0; /* Don't try to read more the pipe has space for. */ - p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail); + p_space = pipe->max_usage - pipe_buf_usage(pipe); len = min_t(size_t, len, p_space << PAGE_SHIFT); if (unlikely(len > MAX_RW_COUNT)) @@ -1080,7 +1080,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, more = sd->flags & SPLICE_F_MORE; sd->flags |= SPLICE_F_MORE; - WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail)); + WARN_ON_ONCE(!pipe_is_empty(pipe)); while (len) { size_t read_len; @@ -1268,7 +1268,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) send_sig(SIGPIPE, current, 0); return -EPIPE; } - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (!pipe_is_full(pipe)) return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; @@ -1652,13 +1652,13 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check the pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (!pipe_empty(pipe->head, pipe->tail)) + if (!pipe_is_empty(pipe)) return 0; ret = 0; pipe_lock(pipe); - while (pipe_empty(pipe->head, pipe->tail)) { + while (pipe_is_empty(pipe)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -1688,13 +1688,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (!pipe_is_full(pipe)) return 0; ret = 0; pipe_lock(pipe); - while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + while (pipe_is_full(pipe)) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 4d0a2267e6efc..b698758000f8b 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -208,6 +208,33 @@ static inline bool pipe_full(unsigned int head, unsigned int tail, return pipe_occupancy(head, tail) >= limit; } +/** + * pipe_is_full - Return true if the pipe is full + * @pipe: the pipe + */ +static inline bool pipe_is_full(const struct pipe_inode_info *pipe) +{ + return pipe_full(pipe->head, pipe->tail, pipe->max_usage); +} + +/** + * pipe_is_empty - Return true if the pipe is empty + * @pipe: the pipe + */ +static inline bool pipe_is_empty(const struct pipe_inode_info *pipe) +{ + return pipe_empty(pipe->head, pipe->tail); +} + +/** + * pipe_buf_usage - Return how many pipe buffers are in use + * @pipe: the pipe + */ +static inline unsigned int pipe_buf_usage(const struct pipe_inode_info *pipe) +{ + return pipe_occupancy(pipe->head, pipe->tail); +} + /** * pipe_buf - Return the pipe buffer for the specified slot in the pipe ring * @pipe: The pipe to access diff --git a/mm/filemap.c b/mm/filemap.c index d4564a79eb353..2974691fdfad2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2897,8 +2897,7 @@ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe, size = min(size, folio_size(folio) - offset); offset %= PAGE_SIZE; - while (spliced < size && - !pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + while (spliced < size && !pipe_is_full(pipe)) { struct pipe_buffer *buf = pipe_head_buf(pipe); size_t part = min_t(size_t, PAGE_SIZE - offset, size - spliced); @@ -2955,7 +2954,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, iocb.ki_pos = *ppos; /* Work out how much data we can actually add into the pipe */ - used = pipe_occupancy(pipe->head, pipe->tail); + used = pipe_buf_usage(pipe); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); @@ -3015,7 +3014,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, total_spliced += n; *ppos += n; in->f_ra.prev_pos = *ppos; - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (pipe_is_full(pipe)) goto out; } diff --git a/mm/shmem.c b/mm/shmem.c index 4ea6109a80431..20032a333d80c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3487,7 +3487,7 @@ static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe, size = min_t(size_t, size, PAGE_SIZE - offset); - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + if (!pipe_is_full(pipe)) { struct pipe_buffer *buf = pipe_head_buf(pipe); *buf = (struct pipe_buffer) { @@ -3514,7 +3514,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, int error = 0; /* Work out how much data we can actually add into the pipe */ - used = pipe_occupancy(pipe->head, pipe->tail); + used = pipe_buf_usage(pipe); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); @@ -3601,7 +3601,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, total_spliced += n; *ppos += n; in->f_ra.prev_pos = *ppos; - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (pipe_is_full(pipe)) break; cond_resched(); From d048c84bc1d6b831ca4e3381a16fb616ad96d8db Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 Mar 2025 09:28:26 +0100 Subject: [PATCH 336/503] wifi: rework MAINTAINERS entries a bit Since I really don't want to be CC'ed on every patch add X: entries for all the drivers that are otherwise covered. In some cases, add a bit more to drivers that have other entries, mostly for the vendor directories, but for libertas also add libertas_tf. While at it, also add all nl80211-related (vendor) UAPI header files to the nl80211 entry. Link: https://patch.msgid.link/20250306092831.f7fdfe7df7b2.I7c86da443038af32e9bcbaa5f53b1e4128a0d1f9@changeid Signed-off-by: Johannes Berg --- MAINTAINERS | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 73a6c34692740..e989ca218d379 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -124,6 +124,7 @@ F: include/net/ieee80211_radiotap.h F: include/net/iw_handler.h F: include/net/wext.h F: include/uapi/linux/nl80211.h +N: include/uapi/linux/nl80211-.* F: include/uapi/linux/wireless.h F: net/wireless/ @@ -514,7 +515,7 @@ F: drivers/hwmon/adm1029.c ADM8211 WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan -F: drivers/net/wireless/admtek/adm8211.* +F: drivers/net/wireless/admtek/ ADP1050 HARDWARE MONITOR DRIVER M: Radu Sabau @@ -6195,7 +6196,7 @@ F: Documentation/process/cve.rst CW1200 WLAN driver S: Orphan -F: drivers/net/wireless/st/cw1200/ +F: drivers/net/wireless/st/ F: include/linux/platform_data/net-cw1200.h CX18 VIDEO4LINUX DRIVER @@ -13983,6 +13984,7 @@ MARVELL LIBERTAS WIRELESS DRIVER L: libertas-dev@lists.infradead.org S: Orphan F: drivers/net/wireless/marvell/libertas/ +F: drivers/net/wireless/marvell/libertas_tf/ MARVELL MACCHIATOBIN SUPPORT M: Russell King @@ -15652,7 +15654,7 @@ M: Ajay Singh M: Claudiu Beznea L: linux-wireless@vger.kernel.org S: Supported -F: drivers/net/wireless/microchip/wilc1000/ +F: drivers/net/wireless/microchip/ MICROSEMI MIPS SOCS M: Alexandre Belloni @@ -16438,6 +16440,23 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/devicetree/bindings/net/wireless/ F: drivers/net/wireless/ +X: drivers/net/wireless/ath/ +X: drivers/net/wireless/broadcom/ +X: drivers/net/wireless/intel/ +X: drivers/net/wireless/intersil/ +X: drivers/net/wireless/marvell/ +X: drivers/net/wireless/mediatek/mt76/ +X: drivers/net/wireless/mediatek/mt7601u/ +X: drivers/net/wireless/microchip/ +X: drivers/net/wireless/purelifi/ +X: drivers/net/wireless/quantenna/ +X: drivers/net/wireless/ralink/ +X: drivers/net/wireless/realtek/ +X: drivers/net/wireless/rsi/ +X: drivers/net/wireless/silabs/ +X: drivers/net/wireless/st/ +X: drivers/net/wireless/ti/ +X: drivers/net/wireless/zydas/ NETWORKING [DSA] M: Andrew Lunn @@ -17822,7 +17841,7 @@ M: Christian Lamparter L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/en/users/Drivers/p54 -F: drivers/net/wireless/intersil/p54/ +F: drivers/net/wireless/intersil/ PACKET SOCKETS M: Willem de Bruijn @@ -19099,7 +19118,7 @@ PURELIFI PLFXLC DRIVER M: Srinivasan Raju L: linux-wireless@vger.kernel.org S: Supported -F: drivers/net/wireless/purelifi/plfxlc/ +F: drivers/net/wireless/purelifi/ PVRUSB2 VIDEO4LINUX DRIVER M: Mike Isely @@ -19650,7 +19669,7 @@ M: Igor Mitsyanko R: Sergey Matyukevich L: linux-wireless@vger.kernel.org S: Maintained -F: drivers/net/wireless/quantenna +F: drivers/net/wireless/quantenna/ RADEON and AMDGPU DRM DRIVERS M: Alex Deucher @@ -19731,7 +19750,7 @@ RALINK RT2X00 WIRELESS LAN DRIVER M: Stanislaw Gruszka L: linux-wireless@vger.kernel.org S: Maintained -F: drivers/net/wireless/ralink/rt2x00/ +F: drivers/net/wireless/ralink/ RAMDISK RAM BLOCK DEVICE DRIVER M: Jens Axboe @@ -21698,7 +21717,7 @@ SILICON LABS WIRELESS DRIVERS (for WFxxx series) M: Jérôme Pouiller S: Supported F: Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml -F: drivers/net/wireless/silabs/wfx/ +F: drivers/net/wireless/silabs/ SILICON MOTION SM712 FRAME BUFFER DRIVER M: Sudip Mukherjee @@ -26198,7 +26217,7 @@ F: mm/zbud.c ZD1211RW WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan -F: drivers/net/wireless/zydas/zd1211rw/ +F: drivers/net/wireless/zydas/ ZD1301 MEDIA DRIVER L: linux-media@vger.kernel.org From bbb18f7e23a3f5f56d5c8b4ee0f78f00edb3b1b2 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 6 Mar 2025 12:25:46 +0200 Subject: [PATCH 337/503] wifi: iwlwifi: pcie: Fix TSO preparation The allocation of the scatter gather data structure should be done based on the number of memory chunks that need to be mapped, and it is not dependent on the overall payload length. Fix it. In addition, as the skb_to_sgvec() function returns an 'int' do not assign it to an 'unsigned int' as otherwise the error check would be useless. Fixes: 7f5e3038f029 ("wifi: iwlwifi: map entire SKB when sending AMSDUs") Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250306122425.8c0e23a3d583.I3cb4d6768c9d28ce3da6cd0a6c65466176cfc1ee@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 7b6071a59b694..7c1dd5cc084ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1869,12 +1869,12 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, unsigned int offset) { struct sg_table *sgt; - unsigned int n_segments; + unsigned int n_segments = skb_shinfo(skb)->nr_frags + 1; + int orig_nents; if (WARN_ON_ONCE(skb_has_frag_list(skb))) return NULL; - n_segments = DIV_ROUND_UP(skb->len - offset, skb_shinfo(skb)->gso_size); *hdr = iwl_pcie_get_page_hdr(trans, hdr_room + __alignof__(struct sg_table) + sizeof(struct sg_table) + @@ -1889,11 +1889,12 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, sg_init_table(sgt->sgl, n_segments); /* Only map the data, not the header (it is copied to the TSO page) */ - sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, offset, - skb->len - offset); - if (WARN_ON_ONCE(sgt->orig_nents <= 0)) + orig_nents = skb_to_sgvec(skb, sgt->sgl, offset, skb->len - offset); + if (WARN_ON_ONCE(orig_nents <= 0)) return NULL; + sgt->orig_nents = orig_nents; + /* And map the entire SKB */ if (dma_map_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0) < 0) return NULL; From b8c8a03e9b7bfc06f366b75daf3d0812400e7123 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 6 Mar 2025 12:25:47 +0200 Subject: [PATCH 338/503] wifi: iwlwifi: mvm: fix PNVM timeout for non-MSI-X platforms When MSI-X is not enabled, we mask all the interrupts in the interrupt handler and re-enable them when the interrupt thread runs. If STATUS_INT_ENABLED is not set, we won't re-enable in the thread. In order to get the ALIVE interrupt, we allow the ALIVE interrupt itself, and RX as well in order to receive the ALIVE notification (which is received as an RX from the firmware. The problem is that STATUS_INT_ENABLED is clear until the op_mode calls trans_fw_alive which means that until trans_fw_alive is called, any notification from the firmware will not be received. This became a problem when we inserted the pnvm_load exactly between the ALIVE and trans_fw_alive. Fix that by calling trans_fw_alive before loading the PNVM. This will allow to get the notification from the firmware about PNVM load being complete and continue the flow normally. This didn't happen on MSI-X because we don't disable the interrupts in the ISR when MSI-X is available. The error in the log looks like this: iwlwifi 0000:00:03.0: Timeout waiting for PNVM load! iwlwifi 0000:00:03.0: Failed to start RT ucode: -110 iwlwifi 0000:00:03.0: WRT: Collecting data: ini trigger 13 fired (delay=0ms). Fixes: 70d3ca86b025 ("iwlwifi: mvm: ring the doorbell and wait for PNVM load completion") Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250306122425.0f2cf207aae1.I025d8f724b44f52eadf6c19069352eb9275613a8@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index df49dd2e2026d..d10877856049a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -422,6 +422,8 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, /* if reached this point, Alive notification was received */ iwl_mei_alive_notif(true); + iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr); + ret = iwl_pnvm_load(mvm->trans, &mvm->notif_wait, &mvm->fw->ucode_capa); if (ret) { @@ -430,8 +432,6 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, return ret; } - iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr); - /* * Note: all the queues are enabled as part of the interface * initialization, but in firmware restart scenarios they From 1801a94299a5c7fc1a6825e92e1ce0dc7099faa9 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 6 Mar 2025 12:25:48 +0200 Subject: [PATCH 339/503] wifi: iwlwifi: trans: cancel restart work on op mode leave If the restart work happens to run after the opmode left (i.e. called iwl_trans_op_mode_leave), then the opmode memory (including its mutex) is likely to be freed already, and trans->opmode is NULL. Although the hw is stopped in that stage, which means that this restart got aborted (i.e. STATUS_RESET_PENDING will be cleared), it still can access trans->opmode (NULL pointer dereference) or the opmodes memory (which is freed). Fix this by canceling the restart wk in iwl_trans_op_mode_leave. Also make sure that the restart wk is really aborted. Fixes: 7391b2a4f7db ("wifi: iwlwifi: rework firmware error handling") Signed-off-by: Miri Korenblit Reviewed-by: Johannes Berg Link: https://patch.msgid.link/20250306122425.801301ba1b8b.I6f6143f550b6335b699920c5d4b2b78449607a96@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-trans.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 49c8507d1a6b1..47854a36413e1 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -403,6 +403,8 @@ void iwl_trans_op_mode_leave(struct iwl_trans *trans) iwl_trans_pcie_op_mode_leave(trans); + cancel_work_sync(&trans->restart.wk); + trans->op_mode = NULL; trans->state = IWL_TRANS_NO_FW; From 43e04077170799d0e6289f3e928f727e401b3d79 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 6 Mar 2025 12:37:55 +0200 Subject: [PATCH 340/503] wifi: mac80211: flush the station before moving it to UN-AUTHORIZED state We first want to flush the station to make sure we no longer have any frames being Tx by the station before the station is moved to un-authorized state. Failing to do that will lead to races: a frame may be sent after the station's state has been changed. Since the API clearly states that the driver can't fail the sta_state() transition down the list of state, we can easily flush the station first, and only then call the driver's sta_state(). Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250306123626.450bc40e8b04.I636ba96843c77f13309c15c9fd6eb0c5a52a7976@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f83268fa9f928..caa3d0236b5ec 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include @@ -1335,9 +1335,13 @@ static int _sta_info_move_state(struct sta_info *sta, sta->sta.addr, new_state); /* notify the driver before the actual changes so it can - * fail the transition + * fail the transition if the state is increasing. + * The driver is required not to fail when the transition + * is decreasing the state, so first, do all the preparation + * work and only then, notify the driver. */ - if (test_sta_flag(sta, WLAN_STA_INSERTED)) { + if (new_state > sta->sta_state && + test_sta_flag(sta, WLAN_STA_INSERTED)) { int err = drv_sta_state(sta->local, sta->sdata, sta, sta->sta_state, new_state); if (err) @@ -1413,6 +1417,16 @@ static int _sta_info_move_state(struct sta_info *sta, break; } + if (new_state < sta->sta_state && + test_sta_flag(sta, WLAN_STA_INSERTED)) { + int err = drv_sta_state(sta->local, sta->sdata, sta, + sta->sta_state, new_state); + + WARN_ONCE(err, + "Driver is not allowed to fail if the sta_state is transitioning down the list: %d\n", + err); + } + sta->sta_state = new_state; return 0; From 20d5a0b9cd0ccb32e886cf6baecf14936325bf10 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 6 Mar 2025 12:37:56 +0200 Subject: [PATCH 341/503] wifi: mac80211: don't queue sdata::work for a non-running sdata The worker really shouldn't be queued for a non-running interface. Also, if ieee80211_setup_sdata is called between queueing and executing the wk, it will be initialized, which will corrupt wiphy_work_list. Fixes: f8891461a277 ("mac80211: do not start any work during reconfigure flow") Signed-off-by: Miri Korenblit Reviewed-by: Johannes Berg Link: https://patch.msgid.link/20250306123626.1e02caf82640.I4949e71ed56e7186ed4968fa9ddff477473fa2f4@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 7f02bd5891eb9..fdda14c08e2b1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -6,7 +6,7 @@ * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * utilities for mac80211 */ @@ -2193,8 +2193,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_reconfig_roc(local); /* Requeue all works */ - list_for_each_entry(sdata, &local->interfaces, list) - wiphy_work_queue(local->hw.wiphy, &sdata->work); + list_for_each_entry(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + wiphy_work_queue(local->hw.wiphy, &sdata->work); + } } ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, From 2e85829ac7fbbd57b93f6cd334b6d448c9ce9db3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 Mar 2025 12:37:57 +0200 Subject: [PATCH 342/503] wifi: nl80211: fix assoc link handling The refactoring of the assoc link handling in order to support multi-link reconfiguration broke the setting of the assoc link ID, and thus resulted in the wrong BSS "use_for" value being selected. Fix that for both association and ML reconfiguration. Fixes: 720fa448f5a7 ("wifi: nl80211: Split the links handling of an association request") Signed-off-by: Johannes Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250306123626.7b233d769c32.I62fd04a8667dd55cedb9a1c0414cc92dd098da75@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e87267fbb442e..aac0e7298dc7a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11123,6 +11123,7 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device static int nl80211_process_links(struct cfg80211_registered_device *rdev, struct cfg80211_assoc_link *links, + int assoc_link_id, const u8 *ssid, int ssid_len, struct genl_info *info) { @@ -11153,7 +11154,7 @@ static int nl80211_process_links(struct cfg80211_registered_device *rdev, } links[link_id].bss = nl80211_assoc_bss(rdev, ssid, ssid_len, attrs, - link_id, link_id); + assoc_link_id, link_id); if (IS_ERR(links[link_id].bss)) { err = PTR_ERR(links[link_id].bss); links[link_id].bss = NULL; @@ -11350,8 +11351,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); ap_addr = req.ap_mld_addr; - err = nl80211_process_links(rdev, req.links, ssid, ssid_len, - info); + err = nl80211_process_links(rdev, req.links, req.link_id, + ssid, ssid_len, info); if (err) goto free; @@ -16506,7 +16507,10 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) add_links = 0; if (info->attrs[NL80211_ATTR_MLO_LINKS]) { - err = nl80211_process_links(rdev, links, NULL, 0, info); + err = nl80211_process_links(rdev, links, + /* mark as MLO, but not assoc */ + IEEE80211_MLD_MAX_NUM_LINKS, + NULL, 0, info); if (err) return err; From 9a267ce4a3fca93a34a8881046f97bcf472228c8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 Mar 2025 12:37:58 +0200 Subject: [PATCH 343/503] wifi: mac80211: fix SA Query processing in MLO When MLO is used and SA Query processing isn't done by userspace (e.g. wpa_supplicant w/o CONFIG_OCV), then the mac80211 code kicks in but uses the wrong addresses. Fix them. Signed-off-by: Johannes Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250306123626.bab48bb49061.I9391b22f1360d20ac8c4e92604de23f27696ba8f@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1e28efe4203c0..0659ec892ec6c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include @@ -3329,8 +3329,8 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, return; } - if (!ether_addr_equal(mgmt->sa, sdata->deflink.u.mgd.bssid) || - !ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid)) { + if (!ether_addr_equal(mgmt->sa, sdata->vif.cfg.ap_addr) || + !ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr)) { /* Not from the current AP or not associated yet. */ return; } @@ -3346,9 +3346,9 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, local->hw.extra_tx_headroom); resp = skb_put_zero(skb, 24); - memcpy(resp->da, mgmt->sa, ETH_ALEN); + memcpy(resp->da, sdata->vif.cfg.ap_addr, ETH_ALEN); memcpy(resp->sa, sdata->vif.addr, ETH_ALEN); - memcpy(resp->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(resp->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query)); From 72d520476a2fab6f3489e8388ab524985d6c4b90 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 6 Mar 2025 12:37:59 +0200 Subject: [PATCH 344/503] wifi: cfg80211: cancel wiphy_work before freeing wiphy A wiphy_work can be queued from the moment the wiphy is allocated and initialized (i.e. wiphy_new_nm). When a wiphy_work is queued, the rdev::wiphy_work is getting queued. If wiphy_free is called before the rdev::wiphy_work had a chance to run, the wiphy memory will be freed, and then when it eventally gets to run it'll use invalid memory. Fix this by canceling the work before freeing the wiphy. Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") Signed-off-by: Miri Korenblit Reviewed-by: Johannes Berg Link: https://patch.msgid.link/20250306123626.efd1d19f6e07.I48229f96f4067ef73f5b87302335e2fd750136c9@changeid Signed-off-by: Johannes Berg --- net/wireless/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/wireless/core.c b/net/wireless/core.c index 12b780de8779c..828e298726335 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1191,6 +1191,13 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) { struct cfg80211_internal_bss *scan, *tmp; struct cfg80211_beacon_registration *reg, *treg; + unsigned long flags; + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + WARN_ON(!list_empty(&rdev->wiphy_work_list)); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + cancel_work_sync(&rdev->wiphy_work); + rfkill_destroy(rdev->wiphy.rfkill); list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { list_del(®->list); From 502843396ec2a3eb4f58a2e4618a4a85fc5e0f46 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 3 Mar 2025 19:40:29 +0200 Subject: [PATCH 345/503] thunderbolt: Prevent use-after-free in resume from hibernate Kenneth noticed that his laptop crashes randomly when resuming from hibernate if there is device connected and display tunneled. I was able to reproduce this as well with the following steps: 1. Boot the system up, nothing connected. 2. Connect Thunderbolt 4 dock to the host. 3. Connect monitor to the Thunderbolt 4 dock. 4. Verify that there is picture on the screen. 5. Enter hibernate. 6. Exit hibernate. 7. Wait for the system to resume. Expectation: System resumes just fine, the connected monitor still shows screen. Actual result: There is crash during resume, screen is blank. What happens is that during resume from hibernate we tear down any existing tunnels created by the boot kernel and this ends up calling tb_dp_dprx_stop() which calls tb_tunnel_put() dropping the reference count to zero even though we never called tb_dp_dprx_start() for it (we never do that for discovery). This makes the discovered DP tunnel memory to be released and any access after that causes use-after-free and possible crash. Fix this so that we only stop DPRX flow if it has been started in the first place. Reported-by: Kenneth Crudup Closes: https://lore.kernel.org/linux-usb/8e175721-806f-45d6-892a-bd3356af80c9@panix.com/ Cc: stable@vger.kernel.org Fixes: d6d458d42e1e ("thunderbolt: Handle DisplayPort tunnel activation asynchronously") Reviewed-by: Yehezkel Bernat Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tunnel.c | 11 ++++++++--- drivers/thunderbolt/tunnel.h | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 8229a6fbda5ab..717b31d787289 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -1009,6 +1009,8 @@ static int tb_dp_dprx_start(struct tb_tunnel *tunnel) */ tb_tunnel_get(tunnel); + tunnel->dprx_started = true; + if (tunnel->callback) { tunnel->dprx_timeout = dprx_timeout_to_ktime(dprx_timeout); queue_delayed_work(tunnel->tb->wq, &tunnel->dprx_work, 0); @@ -1021,9 +1023,12 @@ static int tb_dp_dprx_start(struct tb_tunnel *tunnel) static void tb_dp_dprx_stop(struct tb_tunnel *tunnel) { - tunnel->dprx_canceled = true; - cancel_delayed_work(&tunnel->dprx_work); - tb_tunnel_put(tunnel); + if (tunnel->dprx_started) { + tunnel->dprx_started = false; + tunnel->dprx_canceled = true; + cancel_delayed_work(&tunnel->dprx_work); + tb_tunnel_put(tunnel); + } } static int tb_dp_activate(struct tb_tunnel *tunnel, bool active) diff --git a/drivers/thunderbolt/tunnel.h b/drivers/thunderbolt/tunnel.h index 7f6d3a18a41e8..8a0a0cb21a895 100644 --- a/drivers/thunderbolt/tunnel.h +++ b/drivers/thunderbolt/tunnel.h @@ -63,6 +63,7 @@ enum tb_tunnel_state { * @allocated_down: Allocated downstream bandwidth (only for USB3) * @bw_mode: DP bandwidth allocation mode registers can be used to * determine consumed and allocated bandwidth + * @dprx_started: DPRX negotiation was started (tb_dp_dprx_start() was called for it) * @dprx_canceled: Was DPRX capabilities read poll canceled * @dprx_timeout: If set DPRX capabilities read poll work will timeout after this passes * @dprx_work: Worker that is scheduled to poll completion of DPRX capabilities read @@ -100,6 +101,7 @@ struct tb_tunnel { int allocated_up; int allocated_down; bool bw_mode; + bool dprx_started; bool dprx_canceled; ktime_t dprx_timeout; struct delayed_work dprx_work; From 14cb5d83068ecf15d2da6f7d0e9ea9edbcbc0457 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Fri, 7 Mar 2025 00:28:46 +0000 Subject: [PATCH 346/503] x86/amd_nb: Use rdmsr_safe() in amd_get_mmconfig_range() Xen doesn't offer MSR_FAM10H_MMIO_CONF_BASE to all guests. This results in the following warning: unchecked MSR access error: RDMSR from 0xc0010058 at rIP: 0xffffffff8101d19f (xen_do_read_msr+0x7f/0xa0) Call Trace: xen_read_msr+0x1e/0x30 amd_get_mmconfig_range+0x2b/0x80 quirk_amd_mmconfig_area+0x28/0x100 pnp_fixup_device+0x39/0x50 __pnp_add_device+0xf/0x150 pnp_add_device+0x3d/0x100 pnpacpi_add_device_handler+0x1f9/0x280 acpi_ns_get_device_callback+0x104/0x1c0 acpi_ns_walk_namespace+0x1d0/0x260 acpi_get_devices+0x8a/0xb0 pnpacpi_init+0x50/0x80 do_one_initcall+0x46/0x2e0 kernel_init_freeable+0x1da/0x2f0 kernel_init+0x16/0x1b0 ret_from_fork+0x30/0x50 ret_from_fork_asm+0x1b/0x30 based on quirks for a "PNP0c01" device. Treating MMCFG as disabled is the right course of action, so no change is needed there. This was most likely exposed by fixing the Xen MSR accessors to not be silently-safe. Fixes: 3fac3734c43a ("xen/pv: support selecting safe/unsafe msr accesses") Signed-off-by: Andrew Cooper Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250307002846.3026685-1-andrew.cooper3@citrix.com --- arch/x86/kernel/amd_nb.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 11fac09e3a8cb..67e773744edb2 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -143,7 +143,6 @@ bool __init early_is_amd_nb(u32 device) struct resource *amd_get_mmconfig_range(struct resource *res) { - u32 address; u64 base, msr; unsigned int segn_busn_bits; @@ -151,13 +150,11 @@ struct resource *amd_get_mmconfig_range(struct resource *res) boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) return NULL; - /* assume all cpus from fam10h have mmconfig */ - if (boot_cpu_data.x86 < 0x10) + /* Assume CPUs from Fam10h have mmconfig, although not all VMs do */ + if (boot_cpu_data.x86 < 0x10 || + rdmsrl_safe(MSR_FAM10H_MMIO_CONF_BASE, &msr)) return NULL; - address = MSR_FAM10H_MMIO_CONF_BASE; - rdmsrl(address, msr); - /* mmconfig is not enabled */ if (!(msr & FAM10H_MMIO_CONF_ENABLE)) return NULL; From ac7c06acaa3738b38e83815ac0f07140ad320f13 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Thu, 6 Mar 2025 19:17:21 +1100 Subject: [PATCH 347/503] virt: sev-guest: Allocate request data dynamically Commit ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") narrowed the command mutex scope to snp_send_guest_request(). However, GET_REPORT, GET_DERIVED_KEY, and GET_EXT_REPORT share the req structure in snp_guest_dev. Without the mutex protection, concurrent requests can overwrite each other's data. Fix it by dynamically allocating the request structure. Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") Closes: https://github.com/AMDESE/AMDSEV/issues/265 Reported-by: andreas.stuehrk@yaxi.tech Signed-off-by: Nikunj A Dadhania Signed-off-by: Alexey Kardashevskiy Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250307013700.437505-2-aik@amd.com --- drivers/virt/coco/sev-guest/sev-guest.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 264b6523fe52f..23ac177472beb 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -38,12 +38,6 @@ struct snp_guest_dev { struct miscdevice misc; struct snp_msg_desc *msg_desc; - - union { - struct snp_report_req report; - struct snp_derived_key_req derived_key; - struct snp_ext_report_req ext_report; - } req; }; /* @@ -71,7 +65,7 @@ struct snp_req_resp { static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) { - struct snp_report_req *report_req = &snp_dev->req.report; + struct snp_report_req *report_req __free(kfree) = NULL; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_report_resp *report_resp; struct snp_guest_req req = {}; @@ -80,6 +74,10 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io if (!arg->req_data || !arg->resp_data) return -EINVAL; + report_req = kzalloc(sizeof(*report_req), GFP_KERNEL_ACCOUNT); + if (!report_req) + return -ENOMEM; + if (copy_from_user(report_req, (void __user *)arg->req_data, sizeof(*report_req))) return -EFAULT; @@ -116,7 +114,7 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) { - struct snp_derived_key_req *derived_key_req = &snp_dev->req.derived_key; + struct snp_derived_key_req *derived_key_req __free(kfree) = NULL; struct snp_derived_key_resp derived_key_resp = {0}; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_guest_req req = {}; @@ -136,6 +134,10 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque if (sizeof(buf) < resp_len) return -ENOMEM; + derived_key_req = kzalloc(sizeof(*derived_key_req), GFP_KERNEL_ACCOUNT); + if (!derived_key_req) + return -ENOMEM; + if (copy_from_user(derived_key_req, (void __user *)arg->req_data, sizeof(*derived_key_req))) return -EFAULT; @@ -168,7 +170,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques struct snp_req_resp *io) { - struct snp_ext_report_req *report_req = &snp_dev->req.ext_report; + struct snp_ext_report_req *report_req __free(kfree) = NULL; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_report_resp *report_resp; struct snp_guest_req req = {}; @@ -178,6 +180,10 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data)) return -EINVAL; + report_req = kzalloc(sizeof(*report_req), GFP_KERNEL_ACCOUNT); + if (!report_req) + return -ENOMEM; + if (copy_from_sockptr(report_req, io->req_data, sizeof(*report_req))) return -EFAULT; From 3e385c0d6ce88ac9916dcf84267bd5855d830748 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 7 Mar 2025 12:37:00 +1100 Subject: [PATCH 348/503] virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex Compared to the SNP Guest Request, the "Extended" version adds data pages for receiving certificates. If not enough pages provided, the HV can report to the VM how much is needed so the VM can reallocate and repeat. Commit ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") moved handling of the allocated/desired pages number out of scope of said mutex and create a possibility for a race (multiple instances trying to trigger Extended request in a VM) as there is just one instance of snp_msg_desc per /dev/sev-guest and no locking other than snp_cmd_mutex. Fix the issue by moving the data blob/size and the GHCB input struct (snp_req_data) into snp_guest_req which is allocated on stack now and accessed by the GHCB caller under that mutex. Stop allocating SEV_FW_BLOB_MAX_SIZE in snp_msg_alloc() as only one of four callers needs it. Free the received blob in get_ext_report() right after it is copied to the userspace. Possible future users of snp_send_guest_request() are likely to have different ideas about the buffer size anyways. Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikunj A Dadhania Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250307013700.437505-3-aik@amd.com --- arch/x86/coco/sev/core.c | 23 ++++++----------- arch/x86/include/asm/sev.h | 6 ++--- drivers/virt/coco/sev-guest/sev-guest.c | 34 ++++++++++++++++++++----- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 82492efc5d949..96c7bc698e6b6 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -2853,19 +2853,8 @@ struct snp_msg_desc *snp_msg_alloc(void) if (!mdesc->response) goto e_free_request; - mdesc->certs_data = alloc_shared_pages(SEV_FW_BLOB_MAX_SIZE); - if (!mdesc->certs_data) - goto e_free_response; - - /* initial the input address for guest request */ - mdesc->input.req_gpa = __pa(mdesc->request); - mdesc->input.resp_gpa = __pa(mdesc->response); - mdesc->input.data_gpa = __pa(mdesc->certs_data); - return mdesc; -e_free_response: - free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); e_free_request: free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); e_unmap: @@ -2885,7 +2874,6 @@ void snp_msg_free(struct snp_msg_desc *mdesc) kfree(mdesc->ctx); free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); - free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE); iounmap((__force void __iomem *)mdesc->secrets); memset(mdesc, 0, sizeof(*mdesc)); @@ -3054,7 +3042,7 @@ static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_r * sequence number must be incremented or the VMPCK must be deleted to * prevent reuse of the IV. */ - rc = snp_issue_guest_request(req, &mdesc->input, rio); + rc = snp_issue_guest_request(req, &req->input, rio); switch (rc) { case -ENOSPC: /* @@ -3064,7 +3052,7 @@ static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_r * order to increment the sequence number and thus avoid * IV reuse. */ - override_npages = mdesc->input.data_npages; + override_npages = req->input.data_npages; req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; /* @@ -3120,7 +3108,7 @@ static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_r } if (override_npages) - mdesc->input.data_npages = override_npages; + req->input.data_npages = override_npages; return rc; } @@ -3158,6 +3146,11 @@ int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req */ memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); + /* Initialize the input address for guest request */ + req->input.req_gpa = __pa(mdesc->request); + req->input.resp_gpa = __pa(mdesc->response); + req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0; + rc = __handle_guest_request(mdesc, req, rio); if (rc) { if (rc == -EIO && diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 1581246491b54..ba7999f66abe6 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -203,6 +203,9 @@ struct snp_guest_req { unsigned int vmpck_id; u8 msg_version; u8 msg_type; + + struct snp_req_data input; + void *certs_data; }; /* @@ -263,9 +266,6 @@ struct snp_msg_desc { struct snp_guest_msg secret_request, secret_response; struct snp_secrets_page *secrets; - struct snp_req_data input; - - void *certs_data; struct aesgcm_ctx *ctx; diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 23ac177472beb..70fbc9a3e703d 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -176,6 +176,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques struct snp_guest_req req = {}; int ret, npages = 0, resp_len; sockptr_t certs_address; + struct page *page; if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data)) return -EINVAL; @@ -209,8 +210,20 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques * the host. If host does not supply any certs in it, then copy * zeros to indicate that certificate data was not provided. */ - memset(mdesc->certs_data, 0, report_req->certs_len); npages = report_req->certs_len >> PAGE_SHIFT; + page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, + get_order(report_req->certs_len)); + if (!page) + return -ENOMEM; + + req.certs_data = page_address(page); + ret = set_memory_decrypted((unsigned long)req.certs_data, npages); + if (ret) { + pr_err("failed to mark page shared, ret=%d\n", ret); + __free_pages(page, get_order(report_req->certs_len)); + return -EFAULT; + } + cmd: /* * The intermediate response buffer is used while decrypting the @@ -219,10 +232,12 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques */ resp_len = sizeof(report_resp->data) + mdesc->ctx->authsize; report_resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT); - if (!report_resp) - return -ENOMEM; + if (!report_resp) { + ret = -ENOMEM; + goto e_free_data; + } - mdesc->input.data_npages = npages; + req.input.data_npages = npages; req.msg_version = arg->msg_version; req.msg_type = SNP_MSG_REPORT_REQ; @@ -237,7 +252,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques /* If certs length is invalid then copy the returned length */ if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN) { - report_req->certs_len = mdesc->input.data_npages << PAGE_SHIFT; + report_req->certs_len = req.input.data_npages << PAGE_SHIFT; if (copy_to_sockptr(io->req_data, report_req, sizeof(*report_req))) ret = -EFAULT; @@ -246,7 +261,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques if (ret) goto e_free; - if (npages && copy_to_sockptr(certs_address, mdesc->certs_data, report_req->certs_len)) { + if (npages && copy_to_sockptr(certs_address, req.certs_data, report_req->certs_len)) { ret = -EFAULT; goto e_free; } @@ -256,6 +271,13 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques e_free: kfree(report_resp); +e_free_data: + if (npages) { + if (set_memory_encrypted((unsigned long)req.certs_data, npages)) + WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); + else + __free_pages(page, get_order(report_req->certs_len)); + } return ret; } From 986c2e9ca818b0b74cfc737517549fd0b80ff15d Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 2 Mar 2025 00:16:01 +0100 Subject: [PATCH 349/503] drm/panic: use `div_ceil` to clean Clippy warning Starting with the upcoming Rust 1.86.0 (to be released 2025-04-03), Clippy warns: error: manually reimplementing `div_ceil` --> drivers/gpu/drm/drm_panic_qr.rs:548:26 | 548 | let pad_offset = (offset + 7) / 8; | ^^^^^^^^^^^^^^^^ help: consider using `.div_ceil()`: `offset.div_ceil(8)` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#manual_div_ceil And similarly for `stride`. Thus apply the suggestion to both. The behavior (and thus codegen) is not exactly equivalent [1][2], since `div_ceil()` returns the right value for the values that currently would overflow. Link: https://github.com/rust-lang/rust-clippy/issues/14333 [1] Link: https://godbolt.org/z/dPq6nGnv3 [2] Signed-off-by: Miguel Ojeda Fixes: cb5164ac43d0 ("drm/panic: Add a QR code panic screen") Cc: stable@vger.kernel.org # Needed in 6.12.y and 6.13.y only (Rust is pinned in older LTSs). Reviewed-by: Alice Ryhl Reviewed-by: Jocelyn Falempe Signed-off-by: Jocelyn Falempe Link: https://patchwork.freedesktop.org/patch/msgid/20250301231602.917580-1-ojeda@kernel.org --- drivers/gpu/drm/drm_panic_qr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index ef2d490965ba2..56692c6be2199 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -545,7 +545,7 @@ impl EncodedMsg<'_> { } self.push(&mut offset, (MODE_STOP, 4)); - let pad_offset = (offset + 7) / 8; + let pad_offset = offset.div_ceil(8); for i in pad_offset..self.version.max_data() { self.data[i] = PADDING[(i & 1) ^ (pad_offset & 1)]; } @@ -659,7 +659,7 @@ struct QrImage<'a> { impl QrImage<'_> { fn new<'a, 'b>(em: &'b EncodedMsg<'b>, qrdata: &'a mut [u8]) -> QrImage<'a> { let width = em.version.width(); - let stride = (width + 7) / 8; + let stride = width.div_ceil(8); let data = qrdata; let mut qr_image = QrImage { From cba3b86974a3388b12130654809e50cd19294849 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 2 Mar 2025 00:16:02 +0100 Subject: [PATCH 350/503] drm/panic: fix overindented list items in documentation Starting with the upcoming Rust 1.86.0 (to be released 2025-04-03), Clippy warns: error: doc list item overindented --> drivers/gpu/drm/drm_panic_qr.rs:914:5 | 914 | /// will be encoded as binary segment, otherwise it will be encoded | ^^^ help: try using ` ` (2 spaces) | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#doc_overindented_list_items The overindentation is slightly hard to notice, since all the items start with a backquote that makes it look OK, but it is there. Thus fix it. Signed-off-by: Miguel Ojeda Fixes: cb5164ac43d0 ("drm/panic: Add a QR code panic screen") Cc: stable@vger.kernel.org # Needed in 6.12.y and 6.13.y only (Rust is pinned in older LTSs). Reviewed-by: Jocelyn Falempe Reviewed-by: Alice Ryhl Signed-off-by: Jocelyn Falempe Link: https://patchwork.freedesktop.org/patch/msgid/20250301231602.917580-2-ojeda@kernel.org --- drivers/gpu/drm/drm_panic_qr.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 56692c6be2199..08b31d75c24a1 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -911,16 +911,16 @@ impl QrImage<'_> { /// /// * `url`: The base URL of the QR code. It will be encoded as Binary segment. /// * `data`: A pointer to the binary data, to be encoded. if URL is NULL, it -/// will be encoded as binary segment, otherwise it will be encoded -/// efficiently as a numeric segment, and appended to the URL. +/// will be encoded as binary segment, otherwise it will be encoded +/// efficiently as a numeric segment, and appended to the URL. /// * `data_len`: Length of the data, that needs to be encoded, must be less -/// than data_size. +/// than data_size. /// * `data_size`: Size of data buffer, it should be at least 4071 bytes to hold -/// a V40 QR code. It will then be overwritten with the QR code image. +/// a V40 QR code. It will then be overwritten with the QR code image. /// * `tmp`: A temporary buffer that the QR code encoder will use, to write the -/// segments and ECC. +/// segments and ECC. /// * `tmp_size`: Size of the temporary buffer, it must be at least 3706 bytes -/// long for V40. +/// long for V40. /// /// # Safety /// From 75ddcd5ad40ecd9fbc9f5a7a2ed0e1e74921db3c Mon Sep 17 00:00:00 2001 From: Hsin-chen Chuang Date: Fri, 28 Feb 2025 01:14:10 +0800 Subject: [PATCH 351/503] Bluetooth: btusb: Configure altsetting for HCI_USER_CHANNEL Automatically configure the altsetting for HCI_USER_CHANNEL when a SCO is connected. The motivation is to enable the HCI_USER_CHANNEL user to send out SCO data through USB Bluetooth chips, which is mainly used for bidirectional audio transfer (voice call). This was not capable because: - Per Bluetooth Core Spec v5, Vol 4, Part B, 2.1, the corresponding alternate setting should be set based on the air mode in order to transfer SCO data, but - The Linux Bluetooth HCI_USER_CHANNEL exposes the Bluetooth Host Controller Interface to the user space, which is something above the USB layer. The user space is not able to configure the USB alt while keeping the channel open. This patch intercepts the HCI_EV_SYNC_CONN_COMPLETE packets in btusb, extracts the air mode, and configures the alt setting in btusb. This patch is tested on ChromeOS devices. The USB Bluetooth models (CVSD, TRANS alt3 and alt6) could work without a customized kernel. Fixes: b16b327edb4d ("Bluetooth: btusb: add sysfs attribute to control USB alt setting") Signed-off-by: Hsin-chen Chuang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/Kconfig | 12 ++++++++++++ drivers/bluetooth/btusb.c | 41 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 4ab32abf0f486..7771edf54fb3f 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -56,6 +56,18 @@ config BT_HCIBTUSB_POLL_SYNC Say Y here to enable USB poll_sync for Bluetooth USB devices by default. +config BT_HCIBTUSB_AUTO_ISOC_ALT + bool "Automatically adjust alternate setting for Isoc endpoints" + depends on BT_HCIBTUSB + default y if CHROME_PLATFORMS + help + Say Y here to automatically adjusting the alternate setting for + HCI_USER_CHANNEL whenever a SCO link is established. + + When enabled, btusb intercepts the HCI_EV_SYNC_CONN_COMPLETE packets + and configures isoc endpoint alternate setting automatically when + HCI_USER_CHANNEL is in use. + config BT_HCIBTUSB_BCM bool "Broadcom protocol support" depends on BT_HCIBTUSB diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 2a8d91963c63f..a0fc465458b2f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -34,6 +34,7 @@ static bool force_scofix; static bool enable_autosuspend = IS_ENABLED(CONFIG_BT_HCIBTUSB_AUTOSUSPEND); static bool enable_poll_sync = IS_ENABLED(CONFIG_BT_HCIBTUSB_POLL_SYNC); static bool reset = true; +static bool auto_isoc_alt = IS_ENABLED(CONFIG_BT_HCIBTUSB_AUTO_ISOC_ALT); static struct usb_driver btusb_driver; @@ -1085,6 +1086,42 @@ static inline void btusb_free_frags(struct btusb_data *data) spin_unlock_irqrestore(&data->rxlock, flags); } +static void btusb_sco_connected(struct btusb_data *data, struct sk_buff *skb) +{ + struct hci_event_hdr *hdr = (void *) skb->data; + struct hci_ev_sync_conn_complete *ev = + (void *) skb->data + sizeof(*hdr); + struct hci_dev *hdev = data->hdev; + unsigned int notify_air_mode; + + if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT) + return; + + if (skb->len < sizeof(*hdr) || hdr->evt != HCI_EV_SYNC_CONN_COMPLETE) + return; + + if (skb->len != sizeof(*hdr) + sizeof(*ev) || ev->status) + return; + + switch (ev->air_mode) { + case BT_CODEC_CVSD: + notify_air_mode = HCI_NOTIFY_ENABLE_SCO_CVSD; + break; + + case BT_CODEC_TRANSPARENT: + notify_air_mode = HCI_NOTIFY_ENABLE_SCO_TRANSP; + break; + + default: + return; + } + + bt_dev_info(hdev, "enabling SCO with air mode %u", ev->air_mode); + data->sco_num = 1; + data->air_mode = notify_air_mode; + schedule_work(&data->work); +} + static int btusb_recv_event(struct btusb_data *data, struct sk_buff *skb) { if (data->intr_interval) { @@ -1092,6 +1129,10 @@ static int btusb_recv_event(struct btusb_data *data, struct sk_buff *skb) schedule_delayed_work(&data->rx_work, 0); } + /* Configure altsetting for HCI_USER_CHANNEL on SCO connected */ + if (auto_isoc_alt && hci_dev_test_flag(data->hdev, HCI_USER_CHANNEL)) + btusb_sco_connected(data, skb); + return data->recv_event(data->hdev, skb); } From 8d74c9106be8da051b22f0cd81e665f17d51ba5d Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 27 Feb 2025 23:28:15 +0200 Subject: [PATCH 352/503] Bluetooth: SCO: fix sco_conn refcounting on sco_conn_ready sco_conn refcount shall not be incremented a second time if the sk already owns the refcount, so hold only when adding new chan. Add sco_conn_hold() for clarity, as refcnt is never zero here due to the sco_conn_add(). Fixes SCO socket shutdown not actually closing the SCO connection. Fixes: ed9588554943 ("Bluetooth: SCO: remove the redundant sco_conn_put") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index aa7bfe26cb40f..ed6846864ea93 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -107,6 +107,14 @@ static void sco_conn_put(struct sco_conn *conn) kref_put(&conn->ref, sco_conn_free); } +static struct sco_conn *sco_conn_hold(struct sco_conn *conn) +{ + BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref)); + + kref_get(&conn->ref); + return conn; +} + static struct sco_conn *sco_conn_hold_unless_zero(struct sco_conn *conn) { if (!conn) @@ -1353,6 +1361,7 @@ static void sco_conn_ready(struct sco_conn *conn) bacpy(&sco_pi(sk)->src, &conn->hcon->src); bacpy(&sco_pi(sk)->dst, &conn->hcon->dst); + sco_conn_hold(conn); hci_conn_hold(conn->hcon); __sco_chan_add(conn, sk, parent); @@ -1411,8 +1420,10 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) struct sco_conn *conn; conn = sco_conn_add(hcon); - if (conn) + if (conn) { sco_conn_ready(conn); + sco_conn_put(conn); + } } else sco_conn_del(hcon, bt_to_errno(status)); } From 0bdd88971519cfa8a76d1a4dde182e74cfbd5d5c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 28 Feb 2025 13:12:54 -0500 Subject: [PATCH 353/503] Bluetooth: hci_event: Fix enabling passive scanning Passive scanning shall only be enabled when disconnecting LE links, otherwise it may start result in triggering scanning when e.g. an ISO link disconnects: > HCI Event: LE Meta Event (0x3e) plen 29 LE Connected Isochronous Stream Established (0x19) Status: Success (0x00) Connection Handle: 257 CIG Synchronization Delay: 0 us (0x000000) CIS Synchronization Delay: 0 us (0x000000) Central to Peripheral Latency: 10000 us (0x002710) Peripheral to Central Latency: 10000 us (0x002710) Central to Peripheral PHY: LE 2M (0x02) Peripheral to Central PHY: LE 2M (0x02) Number of Subevents: 1 Central to Peripheral Burst Number: 1 Peripheral to Central Burst Number: 1 Central to Peripheral Flush Timeout: 2 Peripheral to Central Flush Timeout: 2 Central to Peripheral MTU: 320 Peripheral to Central MTU: 160 ISO Interval: 10.00 msec (0x0008) ... > HCI Event: Disconnect Complete (0x05) plen 4 Status: Success (0x00) Handle: 257 Reason: Remote User Terminated Connection (0x13) < HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6 Extended scan: Enabled (0x01) Filter duplicates: Enabled (0x01) Duration: 0 msec (0x0000) Period: 0.00 sec (0x0000) Fixes: 9fcb18ef3acb ("Bluetooth: Introduce LE auto connect options") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 2cc7a93063501..903b0b52692aa 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3391,23 +3391,30 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, hci_update_scan(hdev); } - params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); - if (params) { - switch (params->auto_connect) { - case HCI_AUTO_CONN_LINK_LOSS: - if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) + /* Re-enable passive scanning if disconnected device is marked + * as auto-connectable. + */ + if (conn->type == LE_LINK) { + params = hci_conn_params_lookup(hdev, &conn->dst, + conn->dst_type); + if (params) { + switch (params->auto_connect) { + case HCI_AUTO_CONN_LINK_LOSS: + if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) + break; + fallthrough; + + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + hci_pend_le_list_del_init(params); + hci_pend_le_list_add(params, + &hdev->pend_le_conns); + hci_update_passive_scan(hdev); break; - fallthrough; - case HCI_AUTO_CONN_DIRECT: - case HCI_AUTO_CONN_ALWAYS: - hci_pend_le_list_del_init(params); - hci_pend_le_list_add(params, &hdev->pend_le_conns); - hci_update_passive_scan(hdev); - break; - - default: - break; + default: + break; + } } } From ab6ab707a4d060a51c45fc13e3b2228d5f7c0b87 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 4 Mar 2025 10:06:10 -0500 Subject: [PATCH 354/503] Revert "Bluetooth: hci_core: Fix sleeping function called from invalid context" This reverts commit 4d94f05558271654670d18c26c912da0c1c15549 which has problems (see [1]) and is no longer needed since 581dd2dc168f ("Bluetooth: hci_event: Fix using rcu_read_(un)lock while iterating") has reworked the code where the original bug has been found. [1] Link: https://lore.kernel.org/linux-bluetooth/877c55ci1r.wl-tiwai@suse.de/T/#t Fixes: 4d94f0555827 ("Bluetooth: hci_core: Fix sleeping function called from invalid context") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 108 +++++++++++-------------------- net/bluetooth/hci_core.c | 10 ++- net/bluetooth/iso.c | 6 -- net/bluetooth/l2cap_core.c | 12 ++-- net/bluetooth/rfcomm/core.c | 6 -- net/bluetooth/sco.c | 12 ++-- 6 files changed, 57 insertions(+), 97 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f756fac95488a..6281063cbd8e4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -804,6 +804,7 @@ struct hci_conn_params { extern struct list_head hci_dev_list; extern struct list_head hci_cb_list; extern rwlock_t hci_dev_list_lock; +extern struct mutex hci_cb_list_lock; #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags) #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags) @@ -2010,47 +2011,24 @@ struct hci_cb { char *name; - bool (*match) (struct hci_conn *conn); void (*connect_cfm) (struct hci_conn *conn, __u8 status); void (*disconn_cfm) (struct hci_conn *conn, __u8 status); void (*security_cfm) (struct hci_conn *conn, __u8 status, - __u8 encrypt); + __u8 encrypt); void (*key_change_cfm) (struct hci_conn *conn, __u8 status); void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role); }; -static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list) -{ - struct hci_cb *cb, *cpy; - - rcu_read_lock(); - list_for_each_entry_rcu(cb, &hci_cb_list, list) { - if (cb->match && cb->match(conn)) { - cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC); - if (!cpy) - break; - - *cpy = *cb; - INIT_LIST_HEAD(&cpy->list); - list_add_rcu(&cpy->list, list); - } - } - rcu_read_unlock(); -} - static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->connect_cfm) cb->connect_cfm(conn, status); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); if (conn->connect_cfm_cb) conn->connect_cfm_cb(conn, status); @@ -2058,43 +2036,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->disconn_cfm) cb->disconn_cfm(conn, reason); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); if (conn->disconn_cfm_cb) conn->disconn_cfm_cb(conn, reason); } -static inline void hci_security_cfm(struct hci_conn *conn, __u8 status, - __u8 encrypt) -{ - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); - - list_for_each_entry_safe(cb, tmp, &list, list) { - if (cb->security_cfm) - cb->security_cfm(conn, status, encrypt); - kfree(cb); - } - - if (conn->security_cfm_cb) - conn->security_cfm_cb(conn, status); -} - static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) { + struct hci_cb *cb; __u8 encrypt; if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) @@ -2102,11 +2059,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; - hci_security_cfm(conn, status, encrypt); + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->security_cfm) + cb->security_cfm(conn, status, encrypt); + } + mutex_unlock(&hci_cb_list_lock); + + if (conn->security_cfm_cb) + conn->security_cfm_cb(conn, status); } static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) { + struct hci_cb *cb; __u8 encrypt; if (conn->state == BT_CONFIG) { @@ -2133,38 +2099,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) conn->sec_level = conn->pending_sec_level; } - hci_security_cfm(conn, status, encrypt); + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->security_cfm) + cb->security_cfm(conn, status, encrypt); + } + mutex_unlock(&hci_cb_list_lock); + + if (conn->security_cfm_cb) + conn->security_cfm_cb(conn, status); } static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->key_change_cfm) cb->key_change_cfm(conn, status); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); } static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, __u8 role) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->role_switch_cfm) cb->role_switch_cfm(conn, status, role); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); } static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e7ec12437c8b1..012fc107901a6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -57,6 +57,7 @@ DEFINE_RWLOCK(hci_dev_list_lock); /* HCI callback list */ LIST_HEAD(hci_cb_list); +DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); @@ -2972,7 +2973,9 @@ int hci_register_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - list_add_tail_rcu(&cb->list, &hci_cb_list); + mutex_lock(&hci_cb_list_lock); + list_add_tail(&cb->list, &hci_cb_list); + mutex_unlock(&hci_cb_list_lock); return 0; } @@ -2982,8 +2985,9 @@ int hci_unregister_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - list_del_rcu(&cb->list); - synchronize_rcu(); + mutex_lock(&hci_cb_list_lock); + list_del(&cb->list); + mutex_unlock(&hci_cb_list_lock); return 0; } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 44acddf58a0cd..0cb52a3308bae 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2187,11 +2187,6 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return HCI_LM_ACCEPT; } -static bool iso_match(struct hci_conn *hcon) -{ - return hcon->type == ISO_LINK || hcon->type == LE_LINK; -} - static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) { if (hcon->type != ISO_LINK) { @@ -2373,7 +2368,6 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) static struct hci_cb iso_cb = { .name = "ISO", - .match = iso_match, .connect_cfm = iso_connect_cfm, .disconn_cfm = iso_disconn_cfm, }; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b22078b679726..c27ea70f71e1e 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7182,11 +7182,6 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, return NULL; } -static bool l2cap_match(struct hci_conn *hcon) -{ - return hcon->type == ACL_LINK || hcon->type == LE_LINK; -} - static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct hci_dev *hdev = hcon->hdev; @@ -7194,6 +7189,9 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) struct l2cap_chan *pchan; u8 dst_type; + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); if (status) { @@ -7258,6 +7256,9 @@ int l2cap_disconn_ind(struct hci_conn *hcon) static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); @@ -7565,7 +7566,6 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) static struct hci_cb l2cap_cb = { .name = "L2CAP", - .match = l2cap_match, .connect_cfm = l2cap_connect_cfm, .disconn_cfm = l2cap_disconn_cfm, .security_cfm = l2cap_security_cfm, diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4c56ca5a216c6..ad5177e3a69b7 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2134,11 +2134,6 @@ static int rfcomm_run(void *unused) return 0; } -static bool rfcomm_match(struct hci_conn *hcon) -{ - return hcon->type == ACL_LINK; -} - static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) { struct rfcomm_session *s; @@ -2185,7 +2180,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) static struct hci_cb rfcomm_cb = { .name = "RFCOMM", - .match = rfcomm_match, .security_cfm = rfcomm_security_cfm }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ed6846864ea93..5d1bc0d6aee03 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1407,13 +1407,11 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return lm; } -static bool sco_match(struct hci_conn *hcon) -{ - return hcon->type == SCO_LINK || hcon->type == ESCO_LINK; -} - static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status); if (!status) { @@ -1430,6 +1428,9 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); @@ -1455,7 +1456,6 @@ void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) static struct hci_cb sco_cb = { .name = "SCO", - .match = sco_match, .connect_cfm = sco_connect_cfm, .disconn_cfm = sco_disconn_cfm, }; From 6914f7e2e25fac9d1d2b62c208eaa5f2bf810fe9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 6 Mar 2025 23:00:16 +0100 Subject: [PATCH 355/503] x86/mm: Define PTRS_PER_PMD for assembly code too Andy reported the following build warning from head_32.S: In file included from arch/x86/kernel/head_32.S:29: arch/x86/include/asm/pgtable_32.h:59:5: error: "PTRS_PER_PMD" is not defined, evaluates to 0 [-Werror=undef] 59 | #if PTRS_PER_PMD > 1 The reason is that on 2-level i386 paging the folded in PMD's PTRS_PER_PMD constant is not defined in assembly headers, only in generic MM C headers. Instead of trying to fish out the definition from the generic headers, just define it - it even has a comment for it already... Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/Z8oa8AUVyi2HWfo9@gmail.com --- arch/x86/include/asm/pgtable-2level_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 7f6ccff0ba727..4a12c276b1812 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -23,17 +23,17 @@ typedef union { #define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED /* - * traditional i386 two-level paging structure: + * Traditional i386 two-level paging structure: */ #define PGDIR_SHIFT 22 #define PTRS_PER_PGD 1024 - /* - * the i386 is two-level, so we don't really have any - * PMD directory physically. + * The i386 is two-level, so we don't really have any + * PMD directory physically: */ +#define PTRS_PER_PMD 1 #define PTRS_PER_PTE 1024 From 966944f3711665db13e214fef6d02982c49bb972 Mon Sep 17 00:00:00 2001 From: Mitchell Levy Date: Fri, 7 Mar 2025 15:27:00 -0800 Subject: [PATCH 356/503] rust: lockdep: Remove support for dynamically allocated LockClassKeys Currently, dynamically allocated LockCLassKeys can be used from the Rust side without having them registered. This is a soundness issue, so remove them. Fixes: 6ea5aa08857a ("rust: sync: introduce `LockClassKey`") Suggested-by: Alice Ryhl Signed-off-by: Mitchell Levy Signed-off-by: Boqun Feng Signed-off-by: Ingo Molnar Reviewed-by: Benno Lossin Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250307232717.1759087-11-boqun.feng@gmail.com --- rust/kernel/sync.rs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 3498fb344dc93..16eab9138b2ba 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -30,28 +30,20 @@ pub struct LockClassKey(Opaque); unsafe impl Sync for LockClassKey {} impl LockClassKey { - /// Creates a new lock class key. - pub const fn new() -> Self { - Self(Opaque::uninit()) - } - pub(crate) fn as_ptr(&self) -> *mut bindings::lock_class_key { self.0.get() } } -impl Default for LockClassKey { - fn default() -> Self { - Self::new() - } -} - /// Defines a new static lock class and returns a pointer to it. #[doc(hidden)] #[macro_export] macro_rules! static_lock_class { () => {{ - static CLASS: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new(); + static CLASS: $crate::sync::LockClassKey = + // SAFETY: lockdep expects uninitialized memory when it's handed a statically allocated + // lock_class_key + unsafe { ::core::mem::MaybeUninit::uninit().assume_init() }; &CLASS }}; } From b3c5ec8b79bf6bc49cc4850d0949d712830283d7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 7 Mar 2025 15:26:51 -0800 Subject: [PATCH 357/503] locking/rtmutex: Use the 'struct' keyword in kernel-doc comment Add the "struct" keyword to prevent a kernel-doc warning: rtmutex_common.h:67: warning: cannot understand function prototype: 'struct rt_wake_q_head ' Signed-off-by: Randy Dunlap Signed-off-by: Boqun Feng Signed-off-by: Ingo Molnar Acked-by: Waiman Long Link: https://lore.kernel.org/r/20250307232717.1759087-2-boqun.feng@gmail.com --- kernel/locking/rtmutex_common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index c38a2d2d4a7ee..78dd3d8c65544 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -59,8 +59,8 @@ struct rt_mutex_waiter { }; /** - * rt_wake_q_head - Wrapper around regular wake_q_head to support - * "sleeping" spinlocks on RT + * struct rt_wake_q_head - Wrapper around regular wake_q_head to support + * "sleeping" spinlocks on RT * @head: The regular wake_q_head for sleeping lock variants * @rtlock_task: Task pointer for RT lock (spin/rwlock) wakeups */ From 85b2b9c16d053364e2004883140538e73b333cdb Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 7 Mar 2025 15:26:52 -0800 Subject: [PATCH 358/503] locking/semaphore: Use wake_q to wake up processes outside lock critical section A circular lock dependency splat has been seen involving down_trylock(): ====================================================== WARNING: possible circular locking dependency detected 6.12.0-41.el10.s390x+debug ------------------------------------------------------ dd/32479 is trying to acquire lock: 0015a20accd0d4f8 ((console_sem).lock){-.-.}-{2:2}, at: down_trylock+0x26/0x90 but task is already holding lock: 000000017e461698 (&zone->lock){-.-.}-{2:2}, at: rmqueue_bulk+0xac/0x8f0 the existing dependency chain (in reverse order) is: -> #4 (&zone->lock){-.-.}-{2:2}: -> #3 (hrtimer_bases.lock){-.-.}-{2:2}: -> #2 (&rq->__lock){-.-.}-{2:2}: -> #1 (&p->pi_lock){-.-.}-{2:2}: -> #0 ((console_sem).lock){-.-.}-{2:2}: The console_sem -> pi_lock dependency is due to calling try_to_wake_up() while holding the console_sem raw_spinlock. This dependency can be broken by using wake_q to do the wakeup instead of calling try_to_wake_up() under the console_sem lock. This will also make the semaphore's raw_spinlock become a terminal lock without taking any further locks underneath it. The hrtimer_bases.lock is a raw_spinlock while zone->lock is a spinlock. The hrtimer_bases.lock -> zone->lock dependency happens via the debug_objects_fill_pool() helper function in the debugobjects code. -> #4 (&zone->lock){-.-.}-{2:2}: __lock_acquire+0xe86/0x1cc0 lock_acquire.part.0+0x258/0x630 lock_acquire+0xb8/0xe0 _raw_spin_lock_irqsave+0xb4/0x120 rmqueue_bulk+0xac/0x8f0 __rmqueue_pcplist+0x580/0x830 rmqueue_pcplist+0xfc/0x470 rmqueue.isra.0+0xdec/0x11b0 get_page_from_freelist+0x2ee/0xeb0 __alloc_pages_noprof+0x2c2/0x520 alloc_pages_mpol_noprof+0x1fc/0x4d0 alloc_pages_noprof+0x8c/0xe0 allocate_slab+0x320/0x460 ___slab_alloc+0xa58/0x12b0 __slab_alloc.isra.0+0x42/0x60 kmem_cache_alloc_noprof+0x304/0x350 fill_pool+0xf6/0x450 debug_object_activate+0xfe/0x360 enqueue_hrtimer+0x34/0x190 __run_hrtimer+0x3c8/0x4c0 __hrtimer_run_queues+0x1b2/0x260 hrtimer_interrupt+0x316/0x760 do_IRQ+0x9a/0xe0 do_irq_async+0xf6/0x160 Normally a raw_spinlock to spinlock dependency is not legitimate and will be warned if CONFIG_PROVE_RAW_LOCK_NESTING is enabled, but debug_objects_fill_pool() is an exception as it explicitly allows this dependency for non-PREEMPT_RT kernel without causing PROVE_RAW_LOCK_NESTING lockdep splat. As a result, this dependency is legitimate and not a bug. Anyway, semaphore is the only locking primitive left that is still using try_to_wake_up() to do wakeup inside critical section, all the other locking primitives had been migrated to use wake_q to do wakeup outside of the critical section. It is also possible that there are other circular locking dependencies involving printk/console_sem or other existing/new semaphores lurking somewhere which may show up in the future. Let just do the migration now to wake_q to avoid headache like this. Reported-by: yzbot+ed801a886dfdbfe7136d@syzkaller.appspotmail.com Signed-off-by: Waiman Long Signed-off-by: Boqun Feng Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250307232717.1759087-3-boqun.feng@gmail.com --- kernel/locking/semaphore.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 34bfae72f2952..de9117c0e671e 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,7 @@ static noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); static noinline int __down_killable(struct semaphore *sem); static noinline int __down_timeout(struct semaphore *sem, long timeout); -static noinline void __up(struct semaphore *sem); +static noinline void __up(struct semaphore *sem, struct wake_q_head *wake_q); /** * down - acquire the semaphore @@ -183,13 +184,16 @@ EXPORT_SYMBOL(down_timeout); void __sched up(struct semaphore *sem) { unsigned long flags; + DEFINE_WAKE_Q(wake_q); raw_spin_lock_irqsave(&sem->lock, flags); if (likely(list_empty(&sem->wait_list))) sem->count++; else - __up(sem); + __up(sem, &wake_q); raw_spin_unlock_irqrestore(&sem->lock, flags); + if (!wake_q_empty(&wake_q)) + wake_up_q(&wake_q); } EXPORT_SYMBOL(up); @@ -269,11 +273,12 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long timeout) return __down_common(sem, TASK_UNINTERRUPTIBLE, timeout); } -static noinline void __sched __up(struct semaphore *sem) +static noinline void __sched __up(struct semaphore *sem, + struct wake_q_head *wake_q) { struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, struct semaphore_waiter, list); list_del(&waiter->list); waiter->up = true; - wake_up_process(waiter->task); + wake_q_add(wake_q, waiter->task); } From f3600c867c99a2cc8038680ecf211089c50e7971 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 6 Mar 2025 21:55:20 +0000 Subject: [PATCH 359/503] netmem: prevent TX of unreadable skbs Currently on stable trees we have support for netmem/devmem RX but not TX. It is not safe to forward/redirect an RX unreadable netmem packet into the device's TX path, as the device may call dma-mapping APIs on dma addrs that should not be passed to it. Fix this by preventing the xmit of unreadable skbs. Tested by configuring tc redirect: sudo tc qdisc add dev eth1 ingress sudo tc filter add dev eth1 ingress protocol ip prio 1 flower ip_proto \ tcp src_ip 192.168.1.12 action mirred egress redirect dev eth1 Before, I see unreadable skbs in the driver's TX path passed to dma mapping APIs. After, I don't see unreadable skbs in the driver's TX path passed to dma mapping APIs. Fixes: 65249feb6b3d ("net: add support for skbs with unreadable frags") Suggested-by: Jakub Kicinski Cc: stable@vger.kernel.org Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20250306215520.1415465-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 30da277c5a6f8..2f7f5fd9ffec7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3872,6 +3872,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device { netdev_features_t features; + if (!skb_frags_readable(skb)) + goto out_kfree_skb; + features = netif_skb_features(skb); skb = validate_xmit_vlan(skb, features); if (unlikely(!skb)) From d749d901b2168389f060b654fdaa08acf6b367d2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 6 Mar 2025 23:25:29 +0200 Subject: [PATCH 360/503] net/mlx5: Fill out devlink dev info only for PFs Firmware version query is supported on the PFs. Due to this following kernel warning log is observed: [ 188.590344] mlx5_core 0000:08:00.2: mlx5_fw_version_query:816:(pid 1453): fw query isn't supported by the FW Fix it by restricting the query and devlink info to the PF. Fixes: 8338d9378895 ("net/mlx5: Added devlink info callback") Signed-off-by: Jiri Pirko Reviewed-by: Kalesh AP Signed-off-by: Tariq Toukan Reviewed-by: Parav Pandit Link: https://patch.msgid.link/20250306212529.429329-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 98d4306929f3e..a2cf3e79693dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -46,6 +46,9 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, u32 running_fw, stored_fw; int err; + if (!mlx5_core_is_pf(dev)) + return 0; + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); if (err) return err; From dc5340c3133a3ebe54853fd299116149e528cfaa Mon Sep 17 00:00:00 2001 From: Joseph Huang Date: Thu, 6 Mar 2025 12:23:05 -0500 Subject: [PATCH 361/503] net: dsa: mv88e6xxx: Verify after ATU Load ops ATU Load operations could fail silently if there's not enough space on the device to hold the new entry. When this happens, the symptom depends on the unknown flood settings. If unknown multicast flood is disabled, the multicast packets are dropped when the ATU table is full. If unknown multicast flood is enabled, the multicast packets will be flooded to all ports. Either way, IGMP snooping is broken when the ATU Load operation fails silently. Do a Read-After-Write verification after each fdb/mdb add operation to make sure that the operation was really successful, and return -ENOSPC otherwise. Fixes: defb05b9b9b4 ("net: dsa: mv88e6xxx: Add support for fdb_add, fdb_del, and fdb_getnext") Signed-off-by: Joseph Huang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250306172306.3859214-1-Joseph.Huang@garmin.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 59 ++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 68d1e891752b8..5db96ca52505a 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2208,13 +2208,11 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, return err; } -static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, - const unsigned char *addr, u16 vid, - u8 state) +static int mv88e6xxx_port_db_get(struct mv88e6xxx_chip *chip, + const unsigned char *addr, u16 vid, + u16 *fid, struct mv88e6xxx_atu_entry *entry) { - struct mv88e6xxx_atu_entry entry; struct mv88e6xxx_vtu_entry vlan; - u16 fid; int err; /* Ports have two private address databases: one for when the port is @@ -2225,7 +2223,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, * VLAN ID into the port's database used for VLAN-unaware bridging. */ if (vid == 0) { - fid = MV88E6XXX_FID_BRIDGED; + *fid = MV88E6XXX_FID_BRIDGED; } else { err = mv88e6xxx_vtu_get(chip, vid, &vlan); if (err) @@ -2235,14 +2233,39 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, if (!vlan.valid) return -EOPNOTSUPP; - fid = vlan.fid; + *fid = vlan.fid; } - entry.state = 0; - ether_addr_copy(entry.mac, addr); - eth_addr_dec(entry.mac); + entry->state = 0; + ether_addr_copy(entry->mac, addr); + eth_addr_dec(entry->mac); + + return mv88e6xxx_g1_atu_getnext(chip, *fid, entry); +} + +static bool mv88e6xxx_port_db_find(struct mv88e6xxx_chip *chip, + const unsigned char *addr, u16 vid) +{ + struct mv88e6xxx_atu_entry entry; + u16 fid; + int err; - err = mv88e6xxx_g1_atu_getnext(chip, fid, &entry); + err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); + if (err) + return false; + + return entry.state && ether_addr_equal(entry.mac, addr); +} + +static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + const unsigned char *addr, u16 vid, + u8 state) +{ + struct mv88e6xxx_atu_entry entry; + u16 fid; + int err; + + err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); if (err) return err; @@ -2846,6 +2869,13 @@ static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC); + if (err) + goto out; + + if (!mv88e6xxx_port_db_find(chip, addr, vid)) + err = -ENOSPC; + +out: mv88e6xxx_reg_unlock(chip); return err; @@ -6614,6 +6644,13 @@ static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC); + if (err) + goto out; + + if (!mv88e6xxx_port_db_find(chip, mdb->addr, mdb->vid)) + err = -ENOSPC; + +out: mv88e6xxx_reg_unlock(chip); return err; From 26db9c9ee19c36a97dbb1cfef007a3c189c4c874 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 6 Mar 2025 18:24:18 +0800 Subject: [PATCH 362/503] net: mctp i3c: Copy headers if cloned Use skb_cow_head() prior to modifying the tx skb. This is necessary when the skb has been cloned, to avoid modifying other shared clones. Signed-off-by: Matt Johnston Fixes: c8755b29b58e ("mctp i3c: MCTP I3C driver") Link: https://patch.msgid.link/20250306-matt-i3c-cow-head-v1-1-d5e6a5495227@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- drivers/net/mctp/mctp-i3c.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c index c1e72253063b5..c678f79aa3561 100644 --- a/drivers/net/mctp/mctp-i3c.c +++ b/drivers/net/mctp/mctp-i3c.c @@ -506,10 +506,15 @@ static int mctp_i3c_header_create(struct sk_buff *skb, struct net_device *dev, const void *saddr, unsigned int len) { struct mctp_i3c_internal_hdr *ihdr; + int rc; if (!daddr || !saddr) return -EINVAL; + rc = skb_cow_head(skb, sizeof(struct mctp_i3c_internal_hdr)); + if (rc) + return rc; + skb_push(skb, sizeof(struct mctp_i3c_internal_hdr)); skb_reset_mac_header(skb); ihdr = (void *)skb_mac_header(skb); From df8ce77ba8b7c012a3edd1ca7368b46831341466 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 6 Mar 2025 10:33:20 +0800 Subject: [PATCH 363/503] net: mctp i2c: Copy headers if cloned Use skb_cow_head() prior to modifying the TX SKB. This is necessary when the SKB has been cloned, to avoid modifying other shared clones. Signed-off-by: Matt Johnston Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") Link: https://patch.msgid.link/20250306-matt-mctp-i2c-cow-v1-1-293827212681@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- drivers/net/mctp/mctp-i2c.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index e3dcdeacc12c5..d74d47dd6e04d 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -583,6 +583,7 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, struct mctp_i2c_hdr *hdr; struct mctp_hdr *mhdr; u8 lldst, llsrc; + int rc; if (len > MCTP_I2C_MAXMTU) return -EMSGSIZE; @@ -593,6 +594,10 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, lldst = *((u8 *)daddr); llsrc = *((u8 *)saddr); + rc = skb_cow_head(skb, sizeof(struct mctp_i2c_hdr)); + if (rc) + return rc; + skb_push(skb, sizeof(struct mctp_i2c_hdr)); skb_reset_mac_header(skb); hdr = (void *)skb_mac_header(skb); From a07364b394697d2e0baffeb517f41385259aa484 Mon Sep 17 00:00:00 2001 From: Andrei Botila Date: Tue, 4 Mar 2025 18:06:13 +0200 Subject: [PATCH 364/503] net: phy: nxp-c45-tja11xx: add TJA112X PHY configuration errata The most recent sillicon versions of TJA1120 and TJA1121 can achieve full silicon performance by putting the PHY in managed mode. It is necessary to apply these PHY writes before link gets established. Application of this fix is required after restart of device and wakeup from sleep. Cc: stable@vger.kernel.org Fixes: f1fe5dff2b8a ("net: phy: nxp-c45-tja11xx: add TJA1120 support") Signed-off-by: Andrei Botila Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250304160619.181046-2-andrei.botila@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/nxp-c45-tja11xx.c | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 34231b5b91754..709d6c9f7cbae 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -22,6 +22,11 @@ #define PHY_ID_TJA_1103 0x001BB010 #define PHY_ID_TJA_1120 0x001BB031 +#define VEND1_DEVICE_ID3 0x0004 +#define TJA1120_DEV_ID3_SILICON_VERSION GENMASK(15, 12) +#define TJA1120_DEV_ID3_SAMPLE_TYPE GENMASK(11, 8) +#define DEVICE_ID3_SAMPLE_TYPE_R 0x9 + #define VEND1_DEVICE_CONTROL 0x0040 #define DEVICE_CONTROL_RESET BIT(15) #define DEVICE_CONTROL_CONFIG_GLOBAL_EN BIT(14) @@ -1593,6 +1598,50 @@ static int nxp_c45_set_phy_mode(struct phy_device *phydev) return 0; } +/* Errata: ES_TJA1120 and ES_TJA1121 Rev. 1.0 — 28 November 2024 Section 3.1 */ +static void nxp_c45_tja1120_errata(struct phy_device *phydev) +{ + int silicon_version, sample_type; + bool macsec_ability; + int phy_abilities; + int ret = 0; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_DEVICE_ID3); + if (ret < 0) + return; + + sample_type = FIELD_GET(TJA1120_DEV_ID3_SAMPLE_TYPE, ret); + if (sample_type != DEVICE_ID3_SAMPLE_TYPE_R) + return; + + silicon_version = FIELD_GET(TJA1120_DEV_ID3_SILICON_VERSION, ret); + + phy_abilities = phy_read_mmd(phydev, MDIO_MMD_VEND1, + VEND1_PORT_ABILITIES); + macsec_ability = !!(phy_abilities & MACSEC_ABILITY); + if ((!macsec_ability && silicon_version == 2) || + (macsec_ability && silicon_version == 1)) { + /* TJA1120/TJA1121 PHY configuration errata workaround. + * Apply PHY writes sequence before link up. + */ + if (!macsec_ability) { + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x4b95); + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0xf3cd); + } else { + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x89c7); + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0x0893); + } + + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x0476, 0x58a0); + + phy_write_mmd(phydev, MDIO_MMD_PMAPMD, 0x8921, 0xa3a); + phy_write_mmd(phydev, MDIO_MMD_PMAPMD, 0x89F1, 0x16c1); + + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x0); + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0x0); + } +} + static int nxp_c45_config_init(struct phy_device *phydev) { int ret; @@ -1609,6 +1658,9 @@ static int nxp_c45_config_init(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 1); phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 2); + if (phy_id_compare(phydev->phy_id, PHY_ID_TJA_1120, GENMASK(31, 4))) + nxp_c45_tja1120_errata(phydev); + phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_CONFIG, PHY_CONFIG_AUTO); From 48939523843e4813e78920f54937944a8787134b Mon Sep 17 00:00:00 2001 From: Andrei Botila Date: Tue, 4 Mar 2025 18:06:14 +0200 Subject: [PATCH 365/503] net: phy: nxp-c45-tja11xx: add TJA112XB SGMII PCS restart errata TJA1120B/TJA1121B can achieve a stable operation of SGMII after a startup event by putting the SGMII PCS into power down mode and restart afterwards. It is necessary to put the SGMII PCS into power down mode and back up. Cc: stable@vger.kernel.org Fixes: f1fe5dff2b8a ("net: phy: nxp-c45-tja11xx: add TJA1120 support") Signed-off-by: Andrei Botila Link: https://patch.msgid.link/20250304160619.181046-3-andrei.botila@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/nxp-c45-tja11xx.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 709d6c9f7cbae..e9fc54517449c 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -114,6 +114,9 @@ #define MII_BASIC_CONFIG_RMII 0x5 #define MII_BASIC_CONFIG_MII 0x4 +#define VEND1_SGMII_BASIC_CONTROL 0xB000 +#define SGMII_LPM BIT(11) + #define VEND1_SYMBOL_ERROR_CNT_XTD 0x8351 #define EXTENDED_CNT_EN BIT(15) #define VEND1_MONITOR_STATUS 0xAC80 @@ -1598,11 +1601,11 @@ static int nxp_c45_set_phy_mode(struct phy_device *phydev) return 0; } -/* Errata: ES_TJA1120 and ES_TJA1121 Rev. 1.0 — 28 November 2024 Section 3.1 */ +/* Errata: ES_TJA1120 and ES_TJA1121 Rev. 1.0 — 28 November 2024 Section 3.1 & 3.2 */ static void nxp_c45_tja1120_errata(struct phy_device *phydev) { + bool macsec_ability, sgmii_ability; int silicon_version, sample_type; - bool macsec_ability; int phy_abilities; int ret = 0; @@ -1619,6 +1622,7 @@ static void nxp_c45_tja1120_errata(struct phy_device *phydev) phy_abilities = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_PORT_ABILITIES); macsec_ability = !!(phy_abilities & MACSEC_ABILITY); + sgmii_ability = !!(phy_abilities & SGMII_ABILITY); if ((!macsec_ability && silicon_version == 2) || (macsec_ability && silicon_version == 1)) { /* TJA1120/TJA1121 PHY configuration errata workaround. @@ -1639,6 +1643,18 @@ static void nxp_c45_tja1120_errata(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x0); phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0x0); + + if (sgmii_ability) { + /* TJA1120B/TJA1121B SGMII PCS restart errata workaround. + * Put SGMII PCS into power down mode and back up. + */ + phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, + VEND1_SGMII_BASIC_CONTROL, + SGMII_LPM); + phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, + VEND1_SGMII_BASIC_CONTROL, + SGMII_LPM); + } } } From 505ead7ab77f289f12d8a68ac83da068e4d4408b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 6 Mar 2025 05:16:18 -0800 Subject: [PATCH 366/503] netpoll: hold rcu read lock in __netpoll_send_skb() The function __netpoll_send_skb() is being invoked without holding the RCU read lock. This oversight triggers a warning message when CONFIG_PROVE_RCU_LIST is enabled: net/core/netpoll.c:330 suspicious rcu_dereference_check() usage! netpoll_send_skb netpoll_send_udp write_ext_msg console_flush_all console_unlock vprintk_emit To prevent npinfo from disappearing unexpectedly, ensure that __netpoll_send_skb() is protected with the RCU read lock. Fixes: 2899656b494dcd1 ("netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()") Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306-netpoll_rcu_v2-v2-1-bc4f5c51742a@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 62b4041aae1ae..0ab722d95a2df 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -319,6 +319,7 @@ static int netpoll_owner_active(struct net_device *dev) static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { netdev_tx_t status = NETDEV_TX_BUSY; + netdev_tx_t ret = NET_XMIT_DROP; struct net_device *dev; unsigned long tries; /* It is up to the caller to keep npinfo alive. */ @@ -327,11 +328,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) lockdep_assert_irqs_disabled(); dev = np->dev; + rcu_read_lock(); npinfo = rcu_dereference_bh(dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { dev_kfree_skb_irq(skb); - return NET_XMIT_DROP; + goto out; } /* don't get messages out of order, and no recursion */ @@ -370,7 +372,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } - return NETDEV_TX_OK; + ret = NETDEV_TX_OK; +out: + rcu_read_unlock(); + return ret; } netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) From da64a2359092ceec4f9dea5b329d0aef20104217 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 8 Mar 2025 13:50:45 +0800 Subject: [PATCH 367/503] LoongArch: Convert unreachable() to BUG() When compiling on LoongArch, there exists the following objtool warning in arch/loongarch/kernel/machine_kexec.o: kexec_reboot() falls through to next function crash_shutdown_secondary() Avoid using unreachable() as it can (and will in the absence of UBSAN) generate fall-through code. Use BUG() so we get a "break BRK_BUG" trap (with unreachable annotation). Cc: stable@vger.kernel.org # 6.12+ Acked-by: Josh Poimboeuf Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/machine_kexec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index 8ae641dc53bb7..f9381800e291c 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -126,14 +126,14 @@ void kexec_reboot(void) /* All secondary cpus go to kexec_smp_wait */ if (smp_processor_id() > 0) { relocated_kexec_smp_wait(NULL); - unreachable(); + BUG(); } #endif do_kexec = (void *)reboot_code_buffer; do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry); - unreachable(); + BUG(); } From a0d3c8bcb9206ac207c7ad3182027c6b0a1319bb Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 368/503] LoongArch: Eliminate superfluous get_numa_distances_cnt() In LoongArch, get_numa_distances_cnt() isn't in use, resulting in a compiler warning. Fix follow errors with clang-18 when W=1e: arch/loongarch/kernel/acpi.c:259:28: error: unused function 'get_numa_distances_cnt' [-Werror,-Wunused-function] 259 | static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) | ^~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Link: https://lore.kernel.org/all/Z7bHPVUH4lAezk0E@kernel.org/ Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/acpi.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 382a09a7152c3..1120ac2824f6e 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -249,18 +249,6 @@ static __init int setup_node(int pxm) return acpi_map_pxm_to_node(pxm); } -/* - * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for - * I/O localities since SRAT does not list them. I/O localities are - * not supported at this point. - */ -unsigned int numa_distance_cnt; - -static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) -{ - return slit->locality_count; -} - void __init numa_set_distance(int from, int to, int distance) { if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { From c9117434c8f7523f0b77db4c5766f5011cc94677 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 369/503] LoongArch: Use polling play_dead() when resuming from hibernation When CONFIG_RANDOM_KMALLOC_CACHES or other randomization infrastructrue enabled, the idle_task's stack may different between the booting kernel and target kernel. So when resuming from hibernation, an ACTION_BOOT_CPU IPI wakeup the idle instruction in arch_cpu_idle_dead() and jump to the interrupt handler. But since the stack pointer is changed, the interrupt handler cannot restore correct context. So rename the current arch_cpu_idle_dead() to idle_play_dead(), make it as the default version of play_dead(), and the new arch_cpu_idle_dead() call play_dead() directly. For hibernation, implement an arch-specific hibernate_resume_nonboot_cpu_disable() to use the polling version (idle instruction is replace by nop, and irq is disabled) of play_dead(), i.e. poll_play_dead(), to avoid IPI handler corrupting the idle_task's stack when resuming from hibernation. This solution is a little similar to commit 406f992e4a372dafbe3c ("x86 / hibernate: Use hlt_play_dead() when resuming from hibernation"). Cc: stable@vger.kernel.org Tested-by: Erpeng Xu Tested-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 47 ++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index fbf747447f13f..4b24589c0b565 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -423,7 +424,7 @@ void loongson_cpu_die(unsigned int cpu) mb(); } -void __noreturn arch_cpu_idle_dead(void) +static void __noreturn idle_play_dead(void) { register uint64_t addr; register void (*init_fn)(void); @@ -447,6 +448,50 @@ void __noreturn arch_cpu_idle_dead(void) BUG(); } +#ifdef CONFIG_HIBERNATION +static void __noreturn poll_play_dead(void) +{ + register uint64_t addr; + register void (*init_fn)(void); + + idle_task_exit(); + __this_cpu_write(cpu_state, CPU_DEAD); + + __smp_mb(); + do { + __asm__ __volatile__("nop\n\t"); + addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); + } while (addr == 0); + + init_fn = (void *)TO_CACHE(addr); + iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); + + init_fn(); + BUG(); +} +#endif + +static void (*play_dead)(void) = idle_play_dead; + +void __noreturn arch_cpu_idle_dead(void) +{ + play_dead(); + BUG(); /* play_dead() doesn't return */ +} + +#ifdef CONFIG_HIBERNATION +int hibernate_resume_nonboot_cpu_disable(void) +{ + int ret; + + play_dead = poll_play_dead; + ret = suspend_disable_secondary_cpus(); + play_dead = idle_play_dead; + + return ret; +} +#endif + #endif /* From c8477bb0a8e7f6b2e47952b403c5cb67a6929e55 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 370/503] LoongArch: Set max_pfn with the PFN of the last page The current max_pfn equals to zero. In this case, it causes user cannot get some page information through /proc filesystem such as kpagecount. The following message is displayed by stress-ng test suite with command "stress-ng --verbose --physpage 1 -t 1". # stress-ng --verbose --physpage 1 -t 1 stress-ng: error: [1691] physpage: cannot read page count for address 0x134ac000 in /proc/kpagecount, errno=22 (Invalid argument) stress-ng: error: [1691] physpage: cannot read page count for address 0x7ffff207c3a8 in /proc/kpagecount, errno=22 (Invalid argument) stress-ng: error: [1691] physpage: cannot read page count for address 0x134b0000 in /proc/kpagecount, errno=22 (Invalid argument) ... After applying this patch, the kernel can pass the test. # stress-ng --verbose --physpage 1 -t 1 stress-ng: debug: [1701] physpage: [1701] started (instance 0 on CPU 3) stress-ng: debug: [1701] physpage: [1701] exited (instance 0 on CPU 3) stress-ng: debug: [1700] physpage: [1701] terminated (success) Cc: stable@vger.kernel.org # 6.8+ Fixes: ff6c3d81f2e8 ("NUMA: optimize detection of memory with no node id assigned by firmware") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index edcfdfcad7d22..90cb3ca96f085 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -387,6 +387,9 @@ static void __init check_kernel_sections_mem(void) */ static void __init arch_mem_init(char **cmdline_p) { + /* Recalculate max_low_pfn for "mem=xxx" */ + max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + if (usermem) pr_info("User-defined physical RAM map overwrite\n"); From 3109d5ff484b7bc7b955f166974c6776d91f247b Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 371/503] LoongArch: Set hugetlb mmap base address aligned with pmd size With ltp test case "testcases/bin/hugefork02", there is a dmesg error report message such as: kernel BUG at mm/hugetlb.c:5550! Oops - BUG[#1]: CPU: 0 UID: 0 PID: 1517 Comm: hugefork02 Not tainted 6.14.0-rc2+ #241 Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 pc 90000000004eaf1c ra 9000000000485538 tp 900000010edbc000 sp 900000010edbf940 a0 900000010edbfb00 a1 9000000108d20280 a2 00007fffe9474000 a3 00007ffff3474000 a4 0000000000000000 a5 0000000000000003 a6 00000000003cadd3 a7 0000000000000000 t0 0000000001ffffff t1 0000000001474000 t2 900000010ecd7900 t3 00007fffe9474000 t4 00007fffe9474000 t5 0000000000000040 t6 900000010edbfb00 t7 0000000000000001 t8 0000000000000005 u0 90000000004849d0 s9 900000010edbfa00 s0 9000000108d20280 s1 00007fffe9474000 s2 0000000002000000 s3 9000000108d20280 s4 9000000002b38b10 s5 900000010edbfb00 s6 00007ffff3474000 s7 0000000000000406 s8 900000010edbfa08 ra: 9000000000485538 unmap_vmas+0x130/0x218 ERA: 90000000004eaf1c __unmap_hugepage_range+0x6f4/0x7d0 PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000007 (+FPE +SXE +ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) Process hugefork02 (pid: 1517, threadinfo=00000000a670eaf4, task=000000007a95fc64) Call Trace: [<90000000004eaf1c>] __unmap_hugepage_range+0x6f4/0x7d0 [<9000000000485534>] unmap_vmas+0x12c/0x218 [<9000000000494068>] exit_mmap+0xe0/0x308 [<900000000025fdc4>] mmput+0x74/0x180 [<900000000026a284>] do_exit+0x294/0x898 [<900000000026aa30>] do_group_exit+0x30/0x98 [<900000000027bed4>] get_signal+0x83c/0x868 [<90000000002457b4>] arch_do_signal_or_restart+0x54/0xfa0 [<90000000015795e8>] irqentry_exit_to_user_mode+0xb8/0x138 [<90000000002572d0>] tlb_do_page_fault_1+0x114/0x1b4 The problem is that base address allocated from hugetlbfs is not aligned with pmd size. Here add a checking for hugetlbfs and align base address with pmd size. After this patch the test case "testcases/bin/hugefork02" passes to run. This is similar to the commit 7f24cbc9c4d42db8a3c8484d1 ("mm/mmap: teach generic_get_unmapped_area{_topdown} to handle hugetlb mappings"). Cc: stable@vger.kernel.org # 6.13+ Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/mm/mmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index 914e82ff3f656..1df9e99582cc6 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -3,6 +3,7 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include +#include #include #include #include @@ -63,8 +64,11 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, } info.length = len; - info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; info.align_offset = pgoff << PAGE_SHIFT; + if (filp && is_file_hugepages(filp)) + info.align_mask = huge_page_mask_align(filp); + else + info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; if (dir == DOWN) { info.flags = VM_UNMAPPED_AREA_TOPDOWN; From 6fb1867d5a44b0a061cf39d2492d23d314bcb8ce Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:59 +0800 Subject: [PATCH 372/503] LoongArch: KVM: Add interrupt checking for AVEC There is a newly added macro INT_AVEC with CSR ESTAT register, which is bit 14 used for LoongArch AVEC support. AVEC interrupt status bit 14 is supported with macro CSR_ESTAT_IS, so here replace the hard-coded value 0x1fff with macro CSR_ESTAT_IS so that the AVEC interrupt status is also supported by KVM. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 20f941af3e9ea..9e1a9b4aa4c6a 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -311,7 +311,7 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) { int ret = RESUME_GUEST; unsigned long estat = vcpu->arch.host_estat; - u32 intr = estat & 0x1fff; /* Ignore NMI */ + u32 intr = estat & CSR_ESTAT_IS; u32 ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; vcpu->mode = OUTSIDE_GUEST_MODE; From 78d7bc5a02e1468df53896df354fa80727f35b7d Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:52:01 +0800 Subject: [PATCH 373/503] LoongArch: KVM: Reload guest CSR registers after sleep On host, the HW guest CSR registers are lost after suspend and resume operation. Since last_vcpu of boot CPU still records latest vCPU pointer so that the guest CSR register skips to reload when boot CPU resumes and vCPU is scheduled. Here last_vcpu is cleared so that guest CSR registers will reload from scheduled vCPU context after suspend and resume. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index f6d3242b9234a..b6864d6e5ec8d 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -317,6 +317,13 @@ int kvm_arch_enable_virtualization_cpu(void) kvm_debug("GCFG:%lx GSTAT:%lx GINTC:%lx GTLBC:%lx", read_csr_gcfg(), read_csr_gstat(), read_csr_gintc(), read_csr_gtlbc()); + /* + * HW Guest CSR registers are lost after CPU suspend and resume. + * Clear last_vcpu so that Guest CSR registers forced to reload + * from vCPU SW state. + */ + this_cpu_ptr(vmcs)->last_vcpu = NULL; + return 0; } From 6bdbb73dc8d99fbb77f5db79dbb6f108708090b4 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:52:04 +0800 Subject: [PATCH 374/503] LoongArch: KVM: Fix GPA size issue about VM Physical address space is 48 bit on Loongson-3A5000 physical machine, however it is 47 bit for VM on Loongson-3A5000 system. Size of physical address space of VM is the same with the size of virtual user space (a half) of physical machine. Variable cpu_vabits represents user address space, kernel address space is not included (user space and kernel space are both a half of total). Here cpu_vabits, rather than cpu_vabits - 1, is to represent the size of guest physical address space. Also there is strict checking about page fault GPA address, inject error if it is larger than maximum GPA address of VM. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/exit.c | 6 ++++++ arch/loongarch/kvm/vm.c | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index c1e8ec5b941b2..ea321403644ad 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -669,6 +669,12 @@ static int kvm_handle_rdwr_fault(struct kvm_vcpu *vcpu, bool write) struct kvm_run *run = vcpu->run; unsigned long badv = vcpu->arch.badv; + /* Inject ADE exception if exceed max GPA size */ + if (unlikely(badv >= vcpu->kvm->arch.gpa_size)) { + kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEM); + return RESUME_GUEST; + } + ret = kvm_handle_mm_fault(vcpu, badv, write); if (ret) { /* Treat as MMIO */ diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index b8b3e1972d6ea..edccfc8c9cd80 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -48,7 +48,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (kvm_pvtime_supported()) kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME); - kvm->arch.gpa_size = BIT(cpu_vabits - 1); + /* + * cpu_vabits means user address space only (a half of total). + * GPA size of VM is the same with the size of user address space. + */ + kvm->arch.gpa_size = BIT(cpu_vabits); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; kvm->arch.invalid_ptes[0] = 0; kvm->arch.invalid_ptes[1] = (unsigned long)invalid_pte_table; From 0704a15b930cf97073ce091a0cd7ad32f2304329 Mon Sep 17 00:00:00 2001 From: Thomas Mizrahi Date: Sat, 8 Mar 2025 01:06:28 -0300 Subject: [PATCH 375/503] ASoC: amd: yc: Support mic on another Lenovo ThinkPad E16 Gen 2 model The internal microphone on the Lenovo ThinkPad E16 model requires a quirk entry to work properly. This was fixed in a previous patch (linked below), but depending on the specific variant of the model, the product name may be "21M5" or "21M6". The following patch fixed this issue for the 21M5 variant: https://lore.kernel.org/all/20240725065442.9293-1-tiwai@suse.de/ This patch adds support for the microphone on the 21M6 variant. Link: https://github.com/ramaureirac/thinkpad-e14-linux/issues/31 Cc: stable@vger.kernel.org Signed-off-by: Thomas Mizrahi Link: https://patch.msgid.link/20250308041303.198765-1-thomasmizra@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index b16587d8f97a8..a7637056972aa 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -248,6 +248,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21M5"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21M6"), + } + }, { .driver_data = &acp6x_card, .matches = { From 058a6bec37c6c3b826158f6d26b75de43816a880 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 7 Mar 2025 23:02:56 +0100 Subject: [PATCH 376/503] x86/microcode/AMD: Add some forgotten models to the SHA check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add some more forgotten models to the SHA check. Fixes: 50cef76d5cb0 ("x86/microcode/AMD: Load only SHA256-checksummed patches") Reported-by: Toralf Förster Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Tested-by: Toralf Förster Link: https://lore.kernel.org/r/20250307220256.11816-1-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 95ac1c6a84fbe..c69b1bc454834 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -175,23 +175,29 @@ static bool need_sha_check(u32 cur_rev) { switch (cur_rev >> 8) { case 0x80012: return cur_rev <= 0x800126f; break; + case 0x80082: return cur_rev <= 0x800820f; break; case 0x83010: return cur_rev <= 0x830107c; break; case 0x86001: return cur_rev <= 0x860010e; break; case 0x86081: return cur_rev <= 0x8608108; break; case 0x87010: return cur_rev <= 0x8701034; break; case 0x8a000: return cur_rev <= 0x8a0000a; break; + case 0xa0010: return cur_rev <= 0xa00107a; break; case 0xa0011: return cur_rev <= 0xa0011da; break; case 0xa0012: return cur_rev <= 0xa001243; break; + case 0xa0082: return cur_rev <= 0xa00820e; break; case 0xa1011: return cur_rev <= 0xa101153; break; case 0xa1012: return cur_rev <= 0xa10124e; break; case 0xa1081: return cur_rev <= 0xa108109; break; case 0xa2010: return cur_rev <= 0xa20102f; break; case 0xa2012: return cur_rev <= 0xa201212; break; + case 0xa4041: return cur_rev <= 0xa404109; break; + case 0xa5000: return cur_rev <= 0xa500013; break; case 0xa6012: return cur_rev <= 0xa60120a; break; case 0xa7041: return cur_rev <= 0xa704109; break; case 0xa7052: return cur_rev <= 0xa705208; break; case 0xa7080: return cur_rev <= 0xa708009; break; case 0xa70c0: return cur_rev <= 0xa70C009; break; + case 0xaa001: return cur_rev <= 0xaa00116; break; case 0xaa002: return cur_rev <= 0xaa00218; break; default: break; } From 80e54e84911a923c40d7bee33a34c1b4be148d7a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Mar 2025 13:45:25 -1000 Subject: [PATCH 377/503] Linux 6.14-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6a8e5be6b0043..1d6a9ec8a2ace 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* From aed709355fd05ef747e1af24a1d5d78cd7feb81e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 10 Feb 2025 11:34:41 -0800 Subject: [PATCH 378/503] drm/hyperv: Fix address space leak when Hyper-V DRM device is removed When a Hyper-V DRM device is probed, the driver allocates MMIO space for the vram, and maps it cacheable. If the device removed, or in the error path for device probing, the MMIO space is released but no unmap is done. Consequently the kernel address space for the mapping is leaked. Fix this by adding iounmap() calls in the device removal path, and in the error path during device probing. Fixes: f1f63cbb705d ("drm/hyperv: Fix an error handling path in hyperv_vmbus_probe()") Fixes: a0ab5abced55 ("drm/hyperv : Removing the restruction of VRAM allocation with PCI bar size") Signed-off-by: Michael Kelley Reviewed-by: Saurabh Sengar Tested-by: Saurabh Sengar Link: https://lore.kernel.org/r/20250210193441.2414-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250210193441.2414-1-mhklinux@outlook.com> --- drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index f59abfa7622ac..0d49f168a919d 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -154,6 +154,7 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, return 0; err_free_mmio: + iounmap(hv->vram); vmbus_free_mmio(hv->mem->start, hv->fb_size); err_vmbus_close: vmbus_close(hdev->channel); @@ -172,6 +173,7 @@ static void hyperv_vmbus_remove(struct hv_device *hdev) vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); + iounmap(hv->vram); vmbus_free_mmio(hv->mem->start, hv->fb_size); } From 304386373007aaca9236a3f36afac0bbedcd2bf0 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 18 Feb 2025 15:01:30 -0800 Subject: [PATCH 379/503] fbdev: hyperv_fb: Fix hang in kdump kernel when on Hyper-V Gen 2 VMs Gen 2 Hyper-V VMs boot via EFI and have a standard EFI framebuffer device. When the kdump kernel runs in such a VM, loading the efifb driver may hang because of accessing the framebuffer at the wrong memory address. The scenario occurs when the hyperv_fb driver in the original kernel moves the framebuffer to a different MMIO address because of conflicts with an already-running efifb or simplefb driver. The hyperv_fb driver then informs Hyper-V of the change, which is allowed by the Hyper-V FB VMBus device protocol. However, when the kexec command loads the kdump kernel into crash memory via the kexec_file_load() system call, the system call doesn't know the framebuffer has moved, and it sets up the kdump screen_info using the original framebuffer address. The transition to the kdump kernel does not go through the Hyper-V host, so Hyper-V does not reset the framebuffer address like it would do on a reboot. When efifb tries to run, it accesses a non-existent framebuffer address, which traps to the Hyper-V host. After many such accesses, the Hyper-V host thinks the guest is being malicious, and throttles the guest to the point that it runs very slowly or appears to have hung. When the kdump kernel is loaded into crash memory via the kexec_load() system call, the problem does not occur. In this case, the kexec command builds the screen_info table itself in user space from data returned by the FBIOGET_FSCREENINFO ioctl against /dev/fb0, which gives it the new framebuffer location. This problem was originally reported in 2020 [1], resulting in commit 3cb73bc3fa2a ("hyperv_fb: Update screen_info after removing old framebuffer"). This commit solved the problem by setting orig_video_isVGA to 0, so the kdump kernel was unaware of the EFI framebuffer. The efifb driver did not try to load, and no hang occurred. But in 2024, commit c25a19afb81c ("fbdev/hyperv_fb: Do not clear global screen_info") effectively reverted 3cb73bc3fa2a. Commit c25a19afb81c has no reference to 3cb73bc3fa2a, so perhaps it was done without knowing the implications that were reported with 3cb73bc3fa2a. In any case, as of commit c25a19afb81c, the original problem came back again. Interestingly, the hyperv_drm driver does not have this problem because it never moves the framebuffer. The difference is that the hyperv_drm driver removes any conflicting framebuffers *before* allocating an MMIO address, while the hyperv_fb drivers removes conflicting framebuffers *after* allocating an MMIO address. With the "after" ordering, hyperv_fb may encounter a conflict and move the framebuffer to a different MMIO address. But the conflict is essentially bogus because it is removed a few lines of code later. Rather than fix the problem with the approach from 2020 in commit 3cb73bc3fa2a, instead slightly reorder the steps in hyperv_fb so conflicting framebuffers are removed before allocating an MMIO address. Then the default framebuffer MMIO address should always be available, and there's never any confusion about which framebuffer address the kdump kernel should use -- it's always the original address provided by the Hyper-V host. This approach is already used by the hyperv_drm driver, and is consistent with the usage guidelines at the head of the module with the function aperture_remove_conflicting_devices(). This approach also solves a related minor problem when kexec_load() is used to load the kdump kernel. With current code, unbinding and rebinding the hyperv_fb driver could result in the framebuffer moving back to the default framebuffer address, because on the rebind there are no conflicts. If such a move is done after the kdump kernel is loaded with the new framebuffer address, at kdump time it could again have the wrong address. This problem and fix are described in terms of the kdump kernel, but it can also occur with any kernel started via kexec. See extensive discussion of the problem and solution at [2]. [1] https://lore.kernel.org/linux-hyperv/20201014092429.1415040-1-kasong@redhat.com/ [2] https://lore.kernel.org/linux-hyperv/BLAPR10MB521793485093FDB448F7B2E5FDE92@BLAPR10MB5217.namprd10.prod.outlook.com/ Reported-by: Thomas Tai Fixes: c25a19afb81c ("fbdev/hyperv_fb: Do not clear global screen_info") Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20250218230130.3207-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250218230130.3207-1-mhklinux@outlook.com> --- drivers/video/fbdev/hyperv_fb.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 363e4ccfcdb77..ce23d0ef5702a 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -989,6 +989,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) base = pci_resource_start(pdev, 0); size = pci_resource_len(pdev, 0); + aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME); /* * For Gen 1 VM, we can directly use the contiguous memory @@ -1010,11 +1011,21 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) goto getmem_done; } pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n"); + } else { + aperture_remove_all_conflicting_devices(KBUILD_MODNAME); } /* - * Cannot use the contiguous physical memory. - * Allocate mmio space for framebuffer. + * Cannot use contiguous physical memory, so allocate MMIO space for + * the framebuffer. At this point in the function, conflicting devices + * that might have claimed the framebuffer MMIO space based on + * screen_info.lfb_base must have already been removed so that + * vmbus_allocate_mmio() does not allocate different MMIO space. If the + * kdump image were to be loaded using kexec_file_load(), the + * framebuffer location in the kdump image would be set from + * screen_info.lfb_base at the time that kdump is enabled. If the + * framebuffer has moved elsewhere, this could be the wrong location, + * causing kdump to hang when efifb (for example) loads. */ dio_fb_size = screen_width * screen_height * screen_depth / 8; @@ -1051,11 +1062,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) info->screen_size = dio_fb_size; getmem_done: - if (base && size) - aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME); - else - aperture_remove_all_conflicting_devices(KBUILD_MODNAME); - if (!gen2vm) pci_dev_put(pdev); From f5e728a50bb17336a20803dde488515b833ecd1d Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Sat, 1 Mar 2025 08:16:30 -0800 Subject: [PATCH 380/503] fbdev: hyperv_fb: Simplify hvfb_putmem The device object required in 'hvfb_release_phymem' function for 'dma_free_coherent' can also be obtained from the 'info' pointer, making 'hdev' parameter in 'hvfb_putmem' redundant. Remove the unnecessary 'hdev' argument from 'hvfb_putmem'. Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Tested-by: Michael Kelley Link: https://lore.kernel.org/r/1740845791-19977-2-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1740845791-19977-2-git-send-email-ssengar@linux.microsoft.com> --- drivers/video/fbdev/hyperv_fb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index ce23d0ef5702a..9798a34ac571f 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -952,7 +952,7 @@ static phys_addr_t hvfb_get_phymem(struct hv_device *hdev, } /* Release contiguous physical memory */ -static void hvfb_release_phymem(struct hv_device *hdev, +static void hvfb_release_phymem(struct device *device, phys_addr_t paddr, unsigned int size) { unsigned int order = get_order(size); @@ -960,7 +960,7 @@ static void hvfb_release_phymem(struct hv_device *hdev, if (order <= MAX_PAGE_ORDER) __free_pages(pfn_to_page(paddr >> PAGE_SHIFT), order); else - dma_free_coherent(&hdev->device, + dma_free_coherent(device, round_up(size, PAGE_SIZE), phys_to_virt(paddr), paddr); @@ -1080,7 +1080,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) } /* Release the framebuffer */ -static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) +static void hvfb_putmem(struct fb_info *info) { struct hvfb_par *par = info->par; @@ -1089,7 +1089,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) iounmap(par->mmio_vp); vmbus_free_mmio(par->mem->start, screen_fb_size); } else { - hvfb_release_phymem(hdev, info->fix.smem_start, + hvfb_release_phymem(info->device, info->fix.smem_start, screen_fb_size); } @@ -1203,7 +1203,7 @@ static int hvfb_probe(struct hv_device *hdev, error: fb_deferred_io_cleanup(info); - hvfb_putmem(hdev, info); + hvfb_putmem(info); error2: vmbus_close(hdev->channel); error1: @@ -1232,7 +1232,7 @@ static void hvfb_remove(struct hv_device *hdev) vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); - hvfb_putmem(hdev, info); + hvfb_putmem(info); framebuffer_release(info); } From ea2f45ab0e53b255f72c85ccd99e2b394fc5fceb Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Sat, 1 Mar 2025 08:16:31 -0800 Subject: [PATCH 381/503] fbdev: hyperv_fb: Allow graceful removal of framebuffer When a Hyper-V framebuffer device is unbind, hyperv_fb driver tries to release the framebuffer forcefully. If this framebuffer is in use it produce the following WARN and hence this framebuffer is never released. [ 44.111220] WARNING: CPU: 35 PID: 1882 at drivers/video/fbdev/core/fb_info.c:70 framebuffer_release+0x2c/0x40 < snip > [ 44.111289] Call Trace: [ 44.111290] [ 44.111291] ? show_regs+0x6c/0x80 [ 44.111295] ? __warn+0x8d/0x150 [ 44.111298] ? framebuffer_release+0x2c/0x40 [ 44.111300] ? report_bug+0x182/0x1b0 [ 44.111303] ? handle_bug+0x6e/0xb0 [ 44.111306] ? exc_invalid_op+0x18/0x80 [ 44.111308] ? asm_exc_invalid_op+0x1b/0x20 [ 44.111311] ? framebuffer_release+0x2c/0x40 [ 44.111313] ? hvfb_remove+0x86/0xa0 [hyperv_fb] [ 44.111315] vmbus_remove+0x24/0x40 [hv_vmbus] [ 44.111323] device_remove+0x40/0x80 [ 44.111325] device_release_driver_internal+0x20b/0x270 [ 44.111327] ? bus_find_device+0xb3/0xf0 Fix this by moving the release of framebuffer and assosiated memory to fb_ops.fb_destroy function, so that framebuffer framework handles it gracefully. While we fix this, also replace manual registrations/unregistration of framebuffer with devm_register_framebuffer. Fixes: 68a2d20b79b1 ("drivers/video: add Hyper-V Synthetic Video Frame Buffer Driver") Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Tested-by: Michael Kelley Link: https://lore.kernel.org/r/1740845791-19977-3-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1740845791-19977-3-git-send-email-ssengar@linux.microsoft.com> --- drivers/video/fbdev/hyperv_fb.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 9798a34ac571f..75338ffc703fb 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -282,6 +282,8 @@ static uint screen_depth; static uint screen_fb_size; static uint dio_fb_size; /* FB size for deferred IO */ +static void hvfb_putmem(struct fb_info *info); + /* Send message to Hyper-V host */ static inline int synthvid_send(struct hv_device *hdev, struct synthvid_msg *msg) @@ -862,6 +864,17 @@ static void hvfb_ops_damage_area(struct fb_info *info, u32 x, u32 y, u32 width, hvfb_ondemand_refresh_throttle(par, x, y, width, height); } +/* + * fb_ops.fb_destroy is called by the last put_fb_info() call at the end + * of unregister_framebuffer() or fb_release(). Do any cleanup related to + * framebuffer here. + */ +static void hvfb_destroy(struct fb_info *info) +{ + hvfb_putmem(info); + framebuffer_release(info); +} + /* * TODO: GEN1 codepaths allocate from system or DMA-able memory. Fix the * driver to use the _SYSMEM_ or _DMAMEM_ helpers in these cases. @@ -877,6 +890,7 @@ static const struct fb_ops hvfb_ops = { .fb_set_par = hvfb_set_par, .fb_setcolreg = hvfb_setcolreg, .fb_blank = hvfb_blank, + .fb_destroy = hvfb_destroy, }; /* Get options from kernel paramenter "video=" */ @@ -1178,7 +1192,7 @@ static int hvfb_probe(struct hv_device *hdev, if (ret) goto error; - ret = register_framebuffer(info); + ret = devm_register_framebuffer(&hdev->device, info); if (ret) { pr_err("Unable to register framebuffer\n"); goto error; @@ -1226,14 +1240,10 @@ static void hvfb_remove(struct hv_device *hdev) fb_deferred_io_cleanup(info); - unregister_framebuffer(info); cancel_delayed_work_sync(&par->dwork); vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); - - hvfb_putmem(info); - framebuffer_release(info); } static int hvfb_suspend(struct hv_device *hdev) From 09beefefb57bbc3a06d98f319d85db4d719d7bcb Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 26 Feb 2025 12:06:06 -0800 Subject: [PATCH 382/503] x86/hyperv: Fix output argument to hypercall that changes page visibility The hypercall in hv_mark_gpa_visibility() is invoked with an input argument and an output argument. The output argument ostensibly returns the number of pages that were processed. But in fact, the hypercall does not provide any output, so the output argument is spurious. The spurious argument is harmless because Hyper-V ignores it, but in the interest of correctness and to avoid the potential for future problems, remove it. Signed-off-by: Michael Kelley Reviewed-by: Nuno Das Neves Link: https://lore.kernel.org/r/20250226200612.2062-2-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250226200612.2062-2-mhklinux@outlook.com> --- arch/x86/hyperv/ivm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index dd68d9ad9b22c..ec7880271cf98 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -464,7 +464,6 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], enum hv_mem_host_visibility visibility) { struct hv_gpa_range_for_visibility *input; - u16 pages_processed; u64 hv_status; unsigned long flags; @@ -493,7 +492,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn)); hv_status = hv_do_rep_hypercall( HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count, - 0, input, &pages_processed); + 0, input, NULL); local_irq_restore(flags); if (hv_result_success(hv_status)) From a2add513311b48cc924a699a8174db2c61ed5e8a Mon Sep 17 00:00:00 2001 From: Jeff LaBundy Date: Sun, 9 Mar 2025 20:29:59 -0500 Subject: [PATCH 383/503] Input: iqs7222 - preserve system status register Some register groups reserve a byte at the end of their continuous address space. Depending on the variant of silicon, this field may share the same memory space as the lower byte of the system status register (0x10). In these cases, caching the reserved byte and writing it later may effectively reset the device depending on what happened in between the read and write operations. Solve this problem by avoiding any access to this last byte within offending register groups. This method replaces a workaround which attempted to write the reserved byte with up-to-date contents, but left a small window in which updates by the device could have been clobbered. Now that the driver does not touch these reserved bytes, the order in which the device's registers are written no longer matters, and they can be written in their natural order. The new method is also much more generic, and can be more easily extended to new variants of silicon with different register maps. As part of this change, the register read and write functions must be gently updated to support byte access instead of word access. Fixes: 2e70ef525b73 ("Input: iqs7222 - acknowledge reset before writing registers") Signed-off-by: Jeff LaBundy Link: https://lore.kernel.org/r/Z85Alw+d9EHKXx2e@nixie71 Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/iqs7222.c | 50 ++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/drivers/input/misc/iqs7222.c b/drivers/input/misc/iqs7222.c index 22022d11470db..80b917944b51e 100644 --- a/drivers/input/misc/iqs7222.c +++ b/drivers/input/misc/iqs7222.c @@ -100,11 +100,11 @@ enum iqs7222_reg_key_id { enum iqs7222_reg_grp_id { IQS7222_REG_GRP_STAT, - IQS7222_REG_GRP_FILT, IQS7222_REG_GRP_CYCLE, IQS7222_REG_GRP_GLBL, IQS7222_REG_GRP_BTN, IQS7222_REG_GRP_CHAN, + IQS7222_REG_GRP_FILT, IQS7222_REG_GRP_SLDR, IQS7222_REG_GRP_TPAD, IQS7222_REG_GRP_GPIO, @@ -286,6 +286,7 @@ static const struct iqs7222_event_desc iqs7222_tp_events[] = { struct iqs7222_reg_grp_desc { u16 base; + u16 val_len; int num_row; int num_col; }; @@ -342,6 +343,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAC00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -400,6 +402,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAC00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -454,6 +457,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xC400, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -496,6 +500,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xC400, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -543,6 +548,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAA00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -600,6 +606,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAA00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -656,6 +663,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAE00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -712,6 +720,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAE00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -768,6 +777,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAE00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -1604,7 +1614,7 @@ static int iqs7222_force_comms(struct iqs7222_private *iqs7222) } static int iqs7222_read_burst(struct iqs7222_private *iqs7222, - u16 reg, void *val, u16 num_val) + u16 reg, void *val, u16 val_len) { u8 reg_buf[sizeof(__be16)]; int ret, i; @@ -1619,7 +1629,7 @@ static int iqs7222_read_burst(struct iqs7222_private *iqs7222, { .addr = client->addr, .flags = I2C_M_RD, - .len = num_val * sizeof(__le16), + .len = val_len, .buf = (u8 *)val, }, }; @@ -1675,7 +1685,7 @@ static int iqs7222_read_word(struct iqs7222_private *iqs7222, u16 reg, u16 *val) __le16 val_buf; int error; - error = iqs7222_read_burst(iqs7222, reg, &val_buf, 1); + error = iqs7222_read_burst(iqs7222, reg, &val_buf, sizeof(val_buf)); if (error) return error; @@ -1685,10 +1695,9 @@ static int iqs7222_read_word(struct iqs7222_private *iqs7222, u16 reg, u16 *val) } static int iqs7222_write_burst(struct iqs7222_private *iqs7222, - u16 reg, const void *val, u16 num_val) + u16 reg, const void *val, u16 val_len) { int reg_len = reg > U8_MAX ? sizeof(reg) : sizeof(u8); - int val_len = num_val * sizeof(__le16); int msg_len = reg_len + val_len; int ret, i; struct i2c_client *client = iqs7222->client; @@ -1747,7 +1756,7 @@ static int iqs7222_write_word(struct iqs7222_private *iqs7222, u16 reg, u16 val) { __le16 val_buf = cpu_to_le16(val); - return iqs7222_write_burst(iqs7222, reg, &val_buf, 1); + return iqs7222_write_burst(iqs7222, reg, &val_buf, sizeof(val_buf)); } static int iqs7222_ati_trigger(struct iqs7222_private *iqs7222) @@ -1831,30 +1840,14 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir) /* * Acknowledge reset before writing any registers in case the device - * suffers a spurious reset during initialization. Because this step - * may change the reserved fields of the second filter beta register, - * its cache must be updated. - * - * Writing the second filter beta register, in turn, may clobber the - * system status register. As such, the filter beta register pair is - * written first to protect against this hazard. + * suffers a spurious reset during initialization. */ if (dir == WRITE) { - u16 reg = dev_desc->reg_grps[IQS7222_REG_GRP_FILT].base + 1; - u16 filt_setup; - error = iqs7222_write_word(iqs7222, IQS7222_SYS_SETUP, iqs7222->sys_setup[0] | IQS7222_SYS_SETUP_ACK_RESET); if (error) return error; - - error = iqs7222_read_word(iqs7222, reg, &filt_setup); - if (error) - return error; - - iqs7222->filt_setup[1] &= GENMASK(7, 0); - iqs7222->filt_setup[1] |= (filt_setup & ~GENMASK(7, 0)); } /* @@ -1883,6 +1876,7 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir) int num_col = dev_desc->reg_grps[i].num_col; u16 reg = dev_desc->reg_grps[i].base; __le16 *val_buf; + u16 val_len = dev_desc->reg_grps[i].val_len ? : num_col * sizeof(*val_buf); u16 *val; if (!num_col) @@ -1900,7 +1894,7 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir) switch (dir) { case READ: error = iqs7222_read_burst(iqs7222, reg, - val_buf, num_col); + val_buf, val_len); for (k = 0; k < num_col; k++) val[k] = le16_to_cpu(val_buf[k]); break; @@ -1909,7 +1903,7 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir) for (k = 0; k < num_col; k++) val_buf[k] = cpu_to_le16(val[k]); error = iqs7222_write_burst(iqs7222, reg, - val_buf, num_col); + val_buf, val_len); break; default: @@ -1962,7 +1956,7 @@ static int iqs7222_dev_info(struct iqs7222_private *iqs7222) int error, i; error = iqs7222_read_burst(iqs7222, IQS7222_PROD_NUM, dev_id, - ARRAY_SIZE(dev_id)); + sizeof(dev_id)); if (error) return error; @@ -2915,7 +2909,7 @@ static int iqs7222_report(struct iqs7222_private *iqs7222) __le16 status[IQS7222_MAX_COLS_STAT]; error = iqs7222_read_burst(iqs7222, IQS7222_SYS_STATUS, status, - num_stat); + num_stat * sizeof(*status)); if (error) return error; From 823869e1e61607ab0d433de3c8abed221dc80a5e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Mar 2025 08:46:57 +0000 Subject: [PATCH 384/503] afs: Fix afs_atcell_get_link() to handle RCU pathwalk The ->get_link() method may be entered under RCU pathwalk conditions (in which case, the dentry pointer is NULL). This is not taken account of by afs_atcell_get_link() and lockdep will complain when it tries to lock an rwsem. Fix this by marking net->ws_cell as __rcu and using RCU access macros on it and by making afs_atcell_get_link() just return a pointer to the name in RCU pathwalk without taking net->cells_lock or a ref on the cell as RCU will protect the name storage (the cell is already freed via call_rcu()). Fixes: 30bca65bbbae ("afs: Make /afs/@cell and /afs/.@cell symlinks") Reported-by: Alexander Viro Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20250310094206.801057-2-dhowells@redhat.com/ # v4 --- fs/afs/cell.c | 11 ++++++----- fs/afs/dynroot.c | 15 +++++++++++++-- fs/afs/internal.h | 2 +- fs/afs/proc.c | 4 ++-- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index cee42646736c8..96a6781f36530 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -64,7 +64,8 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net, return ERR_PTR(-ENAMETOOLONG); if (!name) { - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, + lockdep_is_held(&net->cells_lock)); if (!cell) return ERR_PTR(-EDESTADDRREQ); goto found; @@ -388,8 +389,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) /* install the new cell */ down_write(&net->cells_lock); afs_see_cell(new_root, afs_cell_trace_see_ws); - old_root = net->ws_cell; - net->ws_cell = new_root; + old_root = rcu_replace_pointer(net->ws_cell, new_root, + lockdep_is_held(&net->cells_lock)); up_write(&net->cells_lock); afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws); @@ -945,8 +946,8 @@ void afs_cell_purge(struct afs_net *net) _enter(""); down_write(&net->cells_lock); - ws = net->ws_cell; - net->ws_cell = NULL; + ws = rcu_replace_pointer(net->ws_cell, NULL, + lockdep_is_held(&net->cells_lock)); up_write(&net->cells_lock); afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index d8bf52f77d930..008698d706caa 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -314,12 +314,23 @@ static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inod const char *name; bool dotted = vnode->fid.vnode == 3; - if (!net->ws_cell) + if (!dentry) { + /* We're in RCU-pathwalk. */ + cell = rcu_dereference(net->ws_cell); + if (dotted) + name = cell->name - 1; + else + name = cell->name; + /* Shouldn't need to set a delayed call. */ + return name; + } + + if (!rcu_access_pointer(net->ws_cell)) return ERR_PTR(-ENOENT); down_read(&net->cells_lock); - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock)); if (dotted) name = cell->name - 1; else diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 90f407774a9a1..df30bd62da79e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -287,7 +287,7 @@ struct afs_net { /* Cell database */ struct rb_root cells; - struct afs_cell *ws_cell; + struct afs_cell __rcu *ws_cell; struct work_struct cells_manager; struct timer_list cells_timer; atomic_t cells_outstanding; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index e7614f4f30c21..12c88d8be3fe8 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -206,7 +206,7 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v) net = afs_seq2net_single(m); down_read(&net->cells_lock); - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock)); if (cell) seq_printf(m, "%s\n", cell->name); up_read(&net->cells_lock); @@ -242,7 +242,7 @@ static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size) ret = -EEXIST; inode_lock(file_inode(file)); - if (!net->ws_cell) + if (!rcu_access_pointer(net->ws_cell)) ret = afs_cell_init(net, buf); else printk("busy\n"); From 65be5c95d08eedda570a6c888a12384c77fe7614 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 9 Mar 2025 18:22:16 +0100 Subject: [PATCH 385/503] x86/sgx: Warn explicitly if X86_FEATURE_SGX_LC is not enabled The kernel requires X86_FEATURE_SGX_LC to be able to create SGX enclaves, not just X86_FEATURE_SGX. There is quite a number of hardware which has X86_FEATURE_SGX but not X86_FEATURE_SGX_LC. A kernel running on such hardware does not create the /dev/sgx_enclave file and does so silently. Explicitly warn if X86_FEATURE_SGX_LC is not enabled to properly notify users that the kernel disabled the SGX driver. The X86_FEATURE_SGX_LC, a.k.a. SGX Launch Control, is a CPU feature that enables LE (Launch Enclave) hash MSRs to be writable (with additional opt-in required in the 'feature control' MSR) when running enclaves, i.e. using a custom root key rather than the Intel proprietary key for enclave signing. I've hit this issue myself and have spent some time researching where my /dev/sgx_enclave file went on SGX-enabled hardware. Related links: https://github.com/intel/linux-sgx/issues/837 https://patchwork.kernel.org/project/platform-driver-x86/patch/20180827185507.17087-3-jarkko.sakkinen@linux.intel.com/ [ mingo: Made the error message a bit more verbose, and added other cases where the kernel fails to create the /dev/sgx_enclave device node. ] Signed-off-by: Vladis Dronov Signed-off-by: Ingo Molnar Acked-by: Kai Huang Cc: Jarkko Sakkinen Cc: Andy Lutomirski Cc: Sean Christopherson Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250309172215.21777-2-vdronov@redhat.com --- arch/x86/kernel/cpu/sgx/driver.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index 22b65a5f5ec6c..7f8d1e11dbee2 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -150,13 +150,15 @@ int __init sgx_drv_init(void) u64 xfrm_mask; int ret; - if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) + if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) { + pr_info("SGX disabled: SGX launch control CPU feature is not available, /dev/sgx_enclave disabled.\n"); return -ENODEV; + } cpuid_count(SGX_CPUID, 0, &eax, &ebx, &ecx, &edx); if (!(eax & 1)) { - pr_err("SGX disabled: SGX1 instruction support not available.\n"); + pr_info("SGX disabled: SGX1 instruction support not available, /dev/sgx_enclave disabled.\n"); return -ENODEV; } @@ -173,8 +175,10 @@ int __init sgx_drv_init(void) } ret = misc_register(&sgx_dev_enclave); - if (ret) + if (ret) { + pr_info("SGX disabled: Unable to register the /dev/sgx_enclave driver (%d).\n", ret); return ret; + } return 0; } From 247fba13416af65b155949bae582d55c310f58b6 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 10 Mar 2025 16:04:40 +0800 Subject: [PATCH 386/503] ASoC: rt722-sdca: add missing readable registers SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_FU15, RT722_SDCA_CTL_FU_CH_GAIN, CH_01) ... SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_FU15, RT722_SDCA_CTL_FU_CH_GAIN, CH_04) are used by the "FU15 Boost Volume" control, but not marked as readable. And the mbq size are 2 for those registers. Fixes: 7f5d6036ca005 ("ASoC: rt722-sdca: Add RT722 SDCA driver") Signed-off-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Shuming Fan Link: https://patch.msgid.link/20250310080440.58797-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt722-sdca-sdw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c index 25fc13687bc83..4d3043627bd04 100644 --- a/sound/soc/codecs/rt722-sdca-sdw.c +++ b/sound/soc/codecs/rt722-sdca-sdw.c @@ -86,6 +86,10 @@ static bool rt722_sdca_mbq_readable_register(struct device *dev, unsigned int re case 0x6100067: case 0x6100070 ... 0x610007c: case 0x6100080: + case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_FU15, RT722_SDCA_CTL_FU_CH_GAIN, + CH_01) ... + SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_FU15, RT722_SDCA_CTL_FU_CH_GAIN, + CH_04): case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_USER_FU1E, RT722_SDCA_CTL_FU_VOLUME, CH_01): case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_USER_FU1E, RT722_SDCA_CTL_FU_VOLUME, From f3fa0e40df175acd60b71036b9a1fd62310aec03 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 5 Feb 2025 11:24:38 +0800 Subject: [PATCH 387/503] sched/clock: Don't define sched_clock_irqtime as static key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sched_clock_irqtime was defined as a static key in: 8722903cbb8f ("sched: Define sched_clock_irqtime as static key") However, this change introduces a 'sleeping in atomic context' warning: arch/x86/kernel/tsc.c:1214 mark_tsc_unstable() warn: sleeping in atomic context As analyzed by Dan, the affected code path is as follows: vcpu_load() <- disables preempt -> kvm_arch_vcpu_load() -> mark_tsc_unstable() <- sleeps virt/kvm/kvm_main.c 166 void vcpu_load(struct kvm_vcpu *vcpu) 167 { 168 int cpu = get_cpu(); ^^^^^^^^^^ This get_cpu() disables preemption. 169 170 __this_cpu_write(kvm_running_vcpu, vcpu); 171 preempt_notifier_register(&vcpu->preempt_notifier); 172 kvm_arch_vcpu_load(vcpu, cpu); 173 put_cpu(); 174 } arch/x86/kvm/x86.c 4979 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) { 4980 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : 4981 rdtsc() - vcpu->arch.last_host_tsc; 4982 if (tsc_delta < 0) 4983 mark_tsc_unstable("KVM discovered backwards TSC"); arch/x86/kernel/tsc.c 1206 void mark_tsc_unstable(char *reason) 1207 { 1208 if (tsc_unstable) 1209 return; 1210 1211 tsc_unstable = 1; 1212 if (using_native_sched_clock()) 1213 clear_sched_clock_stable(); --> 1214 disable_sched_clock_irqtime(); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ kernel/jump_label.c 245 void static_key_disable(struct static_key *key) 246 { 247 cpus_read_lock(); ^^^^^^^^^^^^^^^^ This lock has a might_sleep() in it which triggers the static checker warning. 248 static_key_disable_cpuslocked(key); 249 cpus_read_unlock(); 250 } Let revert this change for now as {disable,enable}_sched_clock_irqtime are used in many places, as pointed out by Sean, including the following: The code path in clocksource_watchdog(): clocksource_watchdog() | -> spin_lock(&watchdog_lock); | -> __clocksource_unstable() | -> clocksource.mark_unstable() == tsc_cs_mark_unstable() | -> disable_sched_clock_irqtime() And the code path in sched_clock_register(): /* Cannot register a sched_clock with interrupts on */ local_irq_save(flags); ... /* Enable IRQ time accounting if we have a fast enough sched_clock() */ if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) enable_sched_clock_irqtime(); local_irq_restore(flags); [ lkp@intel.com: reported a build error in the prev version ] [ mingo: cherry-picked it over into sched/urgent ] Closes: https://lore.kernel.org/kvm/37a79ba3-9ce0-479c-a5b0-2bd75d573ed3@stanley.mountain/ Fixes: 8722903cbb8f ("sched: Define sched_clock_irqtime as static key") Reported-by: Dan Carpenter Debugged-by: Dan Carpenter Debugged-by: Sean Christopherson Debugged-by: Michal Koutný Signed-off-by: Yafang Shao Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Vincent Guittot Link: https://lkml.kernel.org/r/20250205032438.14668-1-laoar.shao@gmail.com --- kernel/sched/cputime.c | 8 ++++---- kernel/sched/sched.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5d9143dd08791..6dab4854c6c08 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -9,8 +9,6 @@ #ifdef CONFIG_IRQ_TIME_ACCOUNTING -DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime); - /* * There are no locks covering percpu hardirq/softirq time. * They are only modified in vtime_account, on corresponding CPU @@ -24,14 +22,16 @@ DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime); */ DEFINE_PER_CPU(struct irqtime, cpu_irqtime); +int sched_clock_irqtime; + void enable_sched_clock_irqtime(void) { - static_branch_enable(&sched_clock_irqtime); + sched_clock_irqtime = 1; } void disable_sched_clock_irqtime(void) { - static_branch_disable(&sched_clock_irqtime); + sched_clock_irqtime = 0; } static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c8512a9fb0229..023b844159c94 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3259,11 +3259,11 @@ struct irqtime { }; DECLARE_PER_CPU(struct irqtime, cpu_irqtime); -DECLARE_STATIC_KEY_FALSE(sched_clock_irqtime); +extern int sched_clock_irqtime; static inline int irqtime_enabled(void) { - return static_branch_likely(&sched_clock_irqtime); + return sched_clock_irqtime; } /* From e3e89178a9f4a80092578af3ff3c8478f9187d59 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 10 Mar 2025 15:42:43 +0100 Subject: [PATCH 388/503] x86/microcode/AMD: Fix out-of-bounds on systems with CPU-less NUMA nodes Currently, load_microcode_amd() iterates over all NUMA nodes, retrieves their CPU masks and unconditionally accesses per-CPU data for the first CPU of each mask. According to Documentation/admin-guide/mm/numaperf.rst: "Some memory may share the same node as a CPU, and others are provided as memory only nodes." Therefore, some node CPU masks may be empty and wouldn't have a "first CPU". On a machine with far memory (and therefore CPU-less NUMA nodes): - cpumask_of_node(nid) is 0 - cpumask_first(0) is CONFIG_NR_CPUS - cpu_data(CONFIG_NR_CPUS) accesses the cpu_info per-CPU array at an index that is 1 out of bounds This does not have any security implications since flashing microcode is a privileged operation but I believe this has reliability implications by potentially corrupting memory while flashing a microcode update. When booting with CONFIG_UBSAN_BOUNDS=y on an AMD machine that flashes a microcode update. I get the following splat: UBSAN: array-index-out-of-bounds in arch/x86/kernel/cpu/microcode/amd.c:X:Y index 512 is out of range for type 'unsigned long[512]' [...] Call Trace: dump_stack __ubsan_handle_out_of_bounds load_microcode_amd request_microcode_amd reload_store kernfs_fop_write_iter vfs_write ksys_write do_syscall_64 entry_SYSCALL_64_after_hwframe Change the loop to go over only NUMA nodes which have CPUs before determining whether the first CPU on the respective node needs microcode update. [ bp: Massage commit message, fix typo. ] Fixes: 7ff6edf4fef3 ("x86/microcode/AMD: Fix mixed steppings support") Signed-off-by: Florent Revest Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250310144243.861978-1-revest@chromium.org --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index c69b1bc454834..138689b8e1d83 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -1074,7 +1074,7 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz if (ret != UCODE_OK) return ret; - for_each_node(nid) { + for_each_node_with_cpus(nid) { cpu = cpumask_first(cpumask_of_node(nid)); c = &cpu_data(cpu); From 10c7988418d8f759ba70c4a558961e0bfa74647f Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 6 Mar 2025 18:42:11 +0530 Subject: [PATCH 389/503] drm/xe: Release guc ids before cancelling work A GT resets can be occurring in parallel while cancelling work in async call which can requeue these workers. to avoid that, lets first release guc ids and then cancel work so they don't requeued. Fixes: 8ae8a2e8dd21 ("drm/xe: Long running job update") Fixes: 12c2f962fe71 ("drm/xe: cancel pending job timer before freeing scheduler") Signed-off-by: Tejas Upadhyay Suggested-by: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250306131211.975503-1-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 8e8d76f62329127b31c64a034b052fb9e30e92af) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index b6a2dd742ebdc..1a5fe4822a62e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1246,11 +1246,11 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) xe_pm_runtime_get(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); + release_guc_id(guc, q); if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&ge->sched.base.work_tdr); - release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); From 9106713bd2ab0cacd380cda0d3f0219f2e488086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 7 Mar 2025 11:01:09 +0100 Subject: [PATCH 390/503] drm/xe/userptr: Fix an incorrect assert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The assert incorrectly checks the total length processed which can in fact be greater than the number of pages. Fix. Fixes: 0a98219bcc96 ("drm/xe/hmm: Don't dereference struct page pointers without notifier lock") Cc: Matthew Auld Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20250307100109.21397-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 70e5043ba85eae199b232e39921abd706b5c1fa4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hmm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 392102515f3d8..c3cc0fa105e84 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -138,13 +138,17 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, i += size; if (unlikely(j == st->nents - 1)) { + xe_assert(xe, i >= npages); if (i > npages) size -= (i - npages); + sg_mark_end(sgl); + } else { + xe_assert(xe, i < npages); } + sg_set_page(sgl, page, size << PAGE_SHIFT, 0); } - xe_assert(xe, i == npages); return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); From 6266f4a78131c795631440ea9c7b66cdfd399484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 18 Feb 2025 23:18:55 +0200 Subject: [PATCH 391/503] drm/i915/cdclk: Do cdclk post plane programming later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently call intel_set_cdclk_post_plane_update() far too early. When pipes are active during the reprogramming the current spot only works for the cd2x divider update case, as that is synchronize to the pipe's vblank. Squashing and crawling are not synchronized in any way, so doing the programming while the pipes/planes are potentially still using the old hardware state could lead to underruns. Move the post plane reprgramming to a spot where we know that the pipes/planes have switched over the new hardware state. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250218211913.27867-2-ville.syrjala@linux.intel.com Reviewed-by: Vinod Govindapillai (cherry picked from commit fb64f5568c0e0b5730733d70a012ae26b1a55815) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 41128469f12a2..c9dcf2bbd4c73 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7830,9 +7830,6 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_program_dpkgc_latency(state); - if (state->modeset) - intel_set_cdclk_post_plane_update(state); - intel_wait_for_vblank_workers(state); /* FIXME: We should call drm_atomic_helper_commit_hw_done() here @@ -7906,6 +7903,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_verify_planes(state); intel_sagv_post_plane_update(state); + if (state->modeset) + intel_set_cdclk_post_plane_update(state); intel_pmdemand_post_plane_update(state); drm_atomic_helper_commit_hw_done(&state->base); From 3e331a6715ee26f2fabc59dad6bb36d810707028 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 7 Mar 2025 19:56:35 -0500 Subject: [PATCH 392/503] drm/xe/pm: Temporarily disable D3Cold on BMG Currently, many instability cases related to D3Cold -> D0 transition on BMG are under investigation. Among them some bad cases where the device is lost after 1 to 3 transitions from D3Cold to D0 on the runtime pm, with pcieport upstream bridge port link retrain failure. In other cases, it works fine, but with some sudden random memory corruptions after D3cold, that could be 0xffff missed ack on GT forcewake or GuC reload related failures. In some other cases though, D3Cold -> D0 works pretty reliably. It looks like it is a combination of GPU cards and Host boards at this point. So, there is no possible/available quirk at this time. This patch disables the D3Cold by default on BMG by reducing the vram_d3cold_threshold to 0. Users and developers who wants to enable it are still able to via $ echo 300 > /sys/bus/pci/devices//vram_d3cold_threshold Fixes: 3adcf970dc7e ("drm/xe/bmg: Drop force_probe requirement") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4037 Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4395 Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4396 Cc: Karthik Poosa Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250308005636.1475420-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit d945cc876277851053c0cf37927c8d7bd9d0e880) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index c9cc0c091dfdd..89fd2c043136e 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -267,6 +267,15 @@ int xe_pm_init_early(struct xe_device *xe) } ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ +static u32 vram_threshold_value(struct xe_device *xe) +{ + /* FIXME: D3Cold temporarily disabled by default on BMG */ + if (xe->info.platform == XE_BATTLEMAGE) + return 0; + + return DEFAULT_VRAM_THRESHOLD; +} + /** * xe_pm_init - Initialize Xe Power Management * @xe: xe device instance @@ -277,6 +286,7 @@ ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ */ int xe_pm_init(struct xe_device *xe) { + u32 vram_threshold; int err; /* For now suspend/resume is only allowed with GuC */ @@ -290,7 +300,8 @@ int xe_pm_init(struct xe_device *xe) if (err) return err; - err = xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + vram_threshold = vram_threshold_value(xe); + err = xe_pm_set_vram_threshold(xe, vram_threshold); if (err) return err; } From c605acb53f449f6289f042790307d7dc9e62d03d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 7 Mar 2025 11:03:07 -0500 Subject: [PATCH 393/503] drm/xe/guc_pc: Retry and wait longer for GuC PC start In a rare situation of thermal limit during resume, GuC can be slow and run into delays like this: xe 0000:00:02.0: [drm] GT1: excessive init time: 667ms! \ [status = 0x8002F034, timeouts = 0] xe 0000:00:02.0: [drm] GT1: excessive init time: \ [freq = 100MHz (req = 800MHz), before = 100MHz, \ perf_limit_reasons = 0x1C001000] xe 0000:00:02.0: [drm] *ERROR* GT1: GuC PC Start failed ------------[ cut here ]------------ xe 0000:00:02.0: [drm] GT1: Failed to start GuC PC: -EIO When this happens, it will block entirely the GPU to be used. So, let's try and with a huge timeout in the hope it comes back. Also, let's collect some information on how long it is usually taking on situations like this, so perhaps the time can be tuned later. Cc: Vinay Belgaumkar Cc: Jonathan Cavitt Cc: John Harrison Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20250307160307.1093391-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit b4b05e53b550a886b4754b87fd0dd2b304579e85) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 53 +++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index df7f130fb663f..b995d1d51aed0 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -6,6 +6,7 @@ #include "xe_guc_pc.h" #include +#include #include #include @@ -19,6 +20,7 @@ #include "xe_gt.h" #include "xe_gt_idle.h" #include "xe_gt_printk.h" +#include "xe_gt_throttle.h" #include "xe_gt_types.h" #include "xe_guc.h" #include "xe_guc_ct.h" @@ -49,6 +51,9 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ +#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ + /** * DOC: GuC Power Conservation (PC) * @@ -113,9 +118,10 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc) FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count)) static int wait_for_pc_state(struct xe_guc_pc *pc, - enum slpc_global_state state) + enum slpc_global_state state, + int timeout_ms) { - int timeout_us = 5000; /* rought 5ms, but no need for precision */ + int timeout_us = 1000 * timeout_ms; int slept, wait = 10; xe_device_assert_mem_access(pc_to_xe(pc)); @@ -164,7 +170,8 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc) }; int ret; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) return -EAGAIN; /* Blocking here to ensure the results are ready before reading them */ @@ -187,7 +194,8 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) }; int ret; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); @@ -208,7 +216,8 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; int ret; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); @@ -440,6 +449,15 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) return freq; } +static u32 get_cur_freq(struct xe_gt *gt) +{ + u32 freq; + + freq = xe_mmio_read32(>->mmio, RPNSWREQ); + freq = REG_FIELD_GET(REQ_RATIO_MASK, freq); + return decode_freq(freq); +} + /** * xe_guc_pc_get_cur_freq - Get Current requested frequency * @pc: The GuC PC @@ -463,10 +481,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) return -ETIMEDOUT; } - *freq = xe_mmio_read32(>->mmio, RPNSWREQ); - - *freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq); - *freq = decode_freq(*freq); + *freq = get_cur_freq(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; @@ -1002,6 +1017,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); unsigned int fw_ref; + ktime_t earlier; int ret; xe_gt_assert(gt, xe_device_uc_enabled(xe)); @@ -1026,14 +1042,25 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) memset(pc->bo->vmap.vaddr, 0, size); slpc_shared_data_write(pc, header.size, size); + earlier = ktime_get(); ret = pc_action_reset(pc); if (ret) goto out; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) { - xe_gt_err(gt, "GuC PC Start failed\n"); - ret = -EIO; - goto out; + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) { + xe_gt_warn(gt, "GuC PC start taking longer than normal [freq = %dMHz (req = %dMHz), perf_limit_reasons = 0x%08X]\n", + xe_guc_pc_get_act_freq(pc), get_cur_freq(gt), + xe_gt_throttle_get_limit_reasons(gt)); + + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_EXTENDED_TIMEOUT_MS)) { + xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n"); + goto out; + } + + xe_gt_warn(gt, "GuC PC excessive start time: %lldms", + ktime_ms_delta(ktime_get(), earlier)); } ret = pc_init_freqs(pc); From 3f674e7b670b7b7d9261935820e4eba3c059f835 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 6 Mar 2025 14:25:57 -0800 Subject: [PATCH 394/503] nvme-pci: fix stuck reset on concurrent DPC and HP The PCIe error handling has the nvme driver quiesce the device, attempt to restart it, then wait for that restart to complete. A PCIe DPC event also toggles the PCIe link. If the slot doesn't have out-of-band presence detection, this will trigger a pciehp re-enumeration. The error handling that calls nvme_error_resume is holding the device lock while this happens. This lock blocks pciehp's request to disconnect the driver from proceeding. Meanwhile the nvme's reset can't make forward progress because its device isn't there anymore with outstanding IO, and the timeout handler won't do anything to fix it because the device is undergoing error handling. End result: deadlocked. Fix this by having the timeout handler short cut the disabling for a disconnected PCIe device. The downside is that we're relying on an IO timeout to clean up this mess, which could be a minute by default. Tested-by: Nilay Shroff Reviewed-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 640590b217282..e59aad269abf8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1411,9 +1411,20 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) struct nvme_dev *dev = nvmeq->dev; struct request *abort_req; struct nvme_command cmd = { }; + struct pci_dev *pdev = to_pci_dev(dev->dev); u32 csts = readl(dev->bar + NVME_REG_CSTS); u8 opcode; + /* + * Shutdown the device immediately if we see it is disconnected. This + * unblocks PCIe error handling if the nvme driver is waiting in + * error_resume for a device that has been removed. We can't unbind the + * driver while the driver's error callback is waiting to complete, so + * we're relying on a timeout to break that deadlock if a removal + * occurs while reset work is running. + */ + if (pci_dev_is_disconnected(pdev)) + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); if (nvme_state_terminal(&dev->ctrl)) goto disable; @@ -1421,7 +1432,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * the recovery mechanism will surely fail. */ mb(); - if (pci_channel_offline(to_pci_dev(dev->dev))) + if (pci_channel_offline(pdev)) return BLK_EH_RESET_TIMER; /* From de93ddf88088f7624b589d0ff3af9effb87e8f3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 19 Feb 2025 18:02:39 +0200 Subject: [PATCH 395/503] drm/atomic: Filter out redundant DPMS calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Video players (eg. mpv) do periodic XResetScreenSaver() calls to keep the screen on while the video playing. The modesetting ddx plumbs these straight through into the kernel as DPMS setproperty ioctls, without any filtering whatsoever. When implemented via atomic these end up as empty commits on the crtc (which will nonetheless take one full frame), which leads to a dropped frame every time XResetScreenSaver() is called. Let's just filter out redundant DPMS property changes in the kernel to avoid this issue. v2: Explain the resulting commits a bit better (Sima) Document the behaviour in uapi docs (Sima) Cc: stable@vger.kernel.org Testcase: igt/kms_flip/flip-vs-dpms-on-nop Reviewed-by: Simona Vetter Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250219160239.17502-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_atomic_uapi.c | 4 ++++ drivers/gpu/drm/drm_connector.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 370dc676e3aa5..fd36b8fd54e9e 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -956,6 +956,10 @@ int drm_atomic_connector_commit_dpms(struct drm_atomic_state *state, if (mode != DRM_MODE_DPMS_ON) mode = DRM_MODE_DPMS_OFF; + + if (connector->dpms == mode) + goto out; + connector->dpms = mode; crtc = connector->state->crtc; diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 5f24d6b41cc6d..48b08c9611a7b 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1427,6 +1427,10 @@ EXPORT_SYMBOL(drm_hdmi_connector_get_output_format_name); * callback. For atomic drivers the remapping to the "ACTIVE" property is * implemented in the DRM core. * + * On atomic drivers any DPMS setproperty ioctl where the value does not + * change is completely skipped, otherwise a full atomic commit will occur. + * On legacy drivers the exact behavior is driver specific. + * * Note that this property cannot be set through the MODE_ATOMIC ioctl, * userspace must use "ACTIVE" on the CRTC instead. * From 73fe9073c0cc28056cb9de0c8a516dac070f1d1f Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 9 Mar 2025 20:52:08 -0700 Subject: [PATCH 396/503] Drivers: hv: vmbus: Don't release fb_mmio resource in vmbus_free_mmio() The VMBus driver manages the MMIO space it owns via the hyperv_mmio resource tree. Because the synthetic video framebuffer portion of the MMIO space is initially setup by the Hyper-V host for each guest, the VMBus driver does an early reserve of that portion of MMIO space in the hyperv_mmio resource tree. It saves a pointer to that resource in fb_mmio. When a VMBus driver requests MMIO space and passes "true" for the "fb_overlap_ok" argument, the reserved framebuffer space is used if possible. In that case it's not necessary to do another request against the "shadow" hyperv_mmio resource tree because that resource was already requested in the early reserve steps. However, the vmbus_free_mmio() function currently does no special handling for the fb_mmio resource. When a framebuffer device is removed, or the driver is unbound, the current code for vmbus_free_mmio() releases the reserved resource, leaving fb_mmio pointing to memory that has been freed. If the same or another driver is subsequently bound to the device, vmbus_allocate_mmio() checks against fb_mmio, and potentially gets garbage. Furthermore a second unbind operation produces this "nonexistent resource" error because of the unbalanced behavior between vmbus_allocate_mmio() and vmbus_free_mmio(): [ 55.499643] resource: Trying to free nonexistent resource <0x00000000f0000000-0x00000000f07fffff> Fix this by adding logic to vmbus_free_mmio() to recognize when MMIO space in the fb_mmio reserved area would be released, and don't release it. This filtering ensures the fb_mmio resource always exists, and makes vmbus_free_mmio() more parallel with vmbus_allocate_mmio(). Fixes: be000f93e5d7 ("drivers:hv: Track allocations of children of hv_vmbus in private resource tree") Signed-off-by: Michael Kelley Tested-by: Saurabh Sengar Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20250310035208.275764-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250310035208.275764-1-mhklinux@outlook.com> --- drivers/hv/vmbus_drv.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 0f6cd44fff292..6e55a1a2613d3 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2262,12 +2262,25 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size) struct resource *iter; mutex_lock(&hyperv_mmio_lock); + + /* + * If all bytes of the MMIO range to be released are within the + * special case fb_mmio shadow region, skip releasing the shadow + * region since no corresponding __request_region() was done + * in vmbus_allocate_mmio(). + */ + if (fb_mmio && start >= fb_mmio->start && + (start + size - 1 <= fb_mmio->end)) + goto skip_shadow_release; + for (iter = hyperv_mmio; iter; iter = iter->sibling) { if ((iter->start >= start + size) || (iter->end <= start)) continue; __release_region(iter, start, size); } + +skip_shadow_release: release_mem_region(start, size); mutex_unlock(&hyperv_mmio_lock); From 6fbafe1cbed10e53b3cf236a8a1987425206dd8e Mon Sep 17 00:00:00 2001 From: Panagiotis Foliadis Date: Sat, 8 Mar 2025 16:49:05 +0000 Subject: [PATCH 397/503] rust: task: fix `SAFETY` comment in `Task::wake_up` The `SAFETY` comment inside the `wake_up` method references erroneously the `signal_pending` C function instead of the `wake_up_process` which is actually called. Fix the comment to reference the correct C function. Fixes: fe95f58320e6 ("rust: task: adjust safety comments in Task methods") Signed-off-by: Panagiotis Foliadis Reviewed-by: Charalampos Mitrodimas Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250308-comment-fix-v1-1-4bba709fd36d@posteo.net [ Slightly reworded. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/task.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 07bc22a7645c0..38da555a2bdbb 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -320,7 +320,7 @@ impl Task { /// Wakes up the task. pub fn wake_up(&self) { - // SAFETY: It's always safe to call `signal_pending` on a valid task, even if the task + // SAFETY: It's always safe to call `wake_up_process` on a valid task, even if the task // running. unsafe { bindings::wake_up_process(self.as_ptr()) }; } From bf9b8020a80d32f6aa80591297a087d0519dc931 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 6 Mar 2025 17:55:48 +0900 Subject: [PATCH 398/503] nvmet: pci-epf: Set NVMET_PCI_EPF_Q_LIVE when a queue is fully created The function nvmet_pci_epf_create_sq() use test_and_set_bit() to check that a submission queue is not already live and if not, set the NVMET_PCI_EPF_Q_LIVE queue flag to declare the sq live (ready to use). However, this is done on entry to the function, before the submission queue is actually fully initialized and ready to use. This creates a race situation with the function nvmet_pci_epf_poll_sqs_work() which looks at the NVMET_PCI_EPF_Q_LIVE queue flag to poll the submission queue when it is live. This race can lead to invalid DMA transfers if nvmet_pci_epf_poll_sqs_work() runs after the NVMET_PCI_EPF_Q_LIVE flag is set but before setting the sq pci address and doorbell ofset. Avoid this race by only testing the NVMET_PCI_EPF_Q_LIVE flag on entry to nvmet_pci_epf_create_sq() and setting it after the submission queue is fully setup before nvmet_pci_epf_create_sq() returns success. Since the function nvmet_pci_epf_create_cq() also has the same racy flag setting pattern, also make a similar change in that function. Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 565d2bd36dcde..d55ad334670c5 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1265,7 +1265,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid]; u16 status; - if (test_and_set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) + if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) return NVME_SC_QID_INVALID | NVME_STATUS_DNR; if (!(flags & NVME_QUEUE_PHYS_CONTIG)) @@ -1300,6 +1300,8 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, if (status != NVME_SC_SUCCESS) goto err; + set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); + dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", cqid, qsize, cq->qes, cq->vector); @@ -1307,7 +1309,6 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, err: clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags); - clear_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); return status; } @@ -1333,7 +1334,7 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, struct nvmet_pci_epf_queue *sq = &ctrl->sq[sqid]; u16 status; - if (test_and_set_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags)) + if (test_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags)) return NVME_SC_QID_INVALID | NVME_STATUS_DNR; if (!(flags & NVME_QUEUE_PHYS_CONTIG)) @@ -1355,7 +1356,7 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, status = nvmet_sq_create(tctrl, &sq->nvme_sq, sqid, sq->depth); if (status != NVME_SC_SUCCESS) - goto out_clear_bit; + return status; sq->iod_wq = alloc_workqueue("sq%d_wq", WQ_UNBOUND, min_t(int, sq->depth, WQ_MAX_ACTIVE), sqid); @@ -1365,6 +1366,8 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, goto out_destroy_sq; } + set_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags); + dev_dbg(ctrl->dev, "SQ[%u]: %u entries of %zu B\n", sqid, qsize, sq->qes); @@ -1372,8 +1375,6 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, out_destroy_sq: nvmet_sq_destroy(&sq->nvme_sq); -out_clear_bit: - clear_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags); return status; } From 39393f5c5c795992507aa5005a9d58396a5b07f1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 6 Mar 2025 17:55:49 +0900 Subject: [PATCH 399/503] nvmet: pci-epf: Do not add an IRQ vector if not needed The function nvmet_pci_epf_create_cq() always unconditionally calls nvmet_pci_epf_add_irq_vector() to add an IRQ vector for a completion queue. But this is not correct if the host requested the creation of a completion queue for polling, without an IRQ vector specified (i.e. the flag NVME_CQ_IRQ_ENABLED is not set). Fix this by calling nvmet_pci_epf_add_irq_vector() and setting the queue flag NVMET_PCI_EPF_Q_IRQ_ENABLED for the cq only if NVME_CQ_IRQ_ENABLED is set. While at it, also fix the error path to add the missing removal of the added IRQ vector if nvmet_cq_create() fails. Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index d55ad334670c5..b1e31483f1574 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1271,9 +1271,6 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, if (!(flags & NVME_QUEUE_PHYS_CONTIG)) return NVME_SC_INVALID_QUEUE | NVME_STATUS_DNR; - if (flags & NVME_CQ_IRQ_ENABLED) - set_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags); - cq->pci_addr = pci_addr; cq->qid = cqid; cq->depth = qsize + 1; @@ -1290,10 +1287,11 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, cq->qes = ctrl->io_cqes; cq->pci_size = cq->qes * cq->depth; - cq->iv = nvmet_pci_epf_add_irq_vector(ctrl, vector); - if (!cq->iv) { - status = NVME_SC_INTERNAL | NVME_STATUS_DNR; - goto err; + if (flags & NVME_CQ_IRQ_ENABLED) { + cq->iv = nvmet_pci_epf_add_irq_vector(ctrl, vector); + if (!cq->iv) + return NVME_SC_INTERNAL | NVME_STATUS_DNR; + set_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags); } status = nvmet_cq_create(tctrl, &cq->nvme_cq, cqid, cq->depth); @@ -1308,7 +1306,8 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, return NVME_SC_SUCCESS; err: - clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags); + if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) + nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); return status; } From bb39ed47065455604729404729d9116868638d31 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 5 Mar 2025 21:21:43 +0900 Subject: [PATCH 400/503] ksmbd: fix use-after-free in ksmbd_free_work_struct ->interim_entry of ksmbd_work could be deleted after oplock is freed. We don't need to manage it with linked list. The interim request could be immediately sent whenever a oplock break wait is needed. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/ksmbd_work.c | 3 --- fs/smb/server/ksmbd_work.h | 1 - fs/smb/server/oplock.c | 37 +++++++++++++++---------------------- fs/smb/server/oplock.h | 1 - 4 files changed, 15 insertions(+), 27 deletions(-) diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c index 4af2e6007c29d..72b00ca6e4551 100644 --- a/fs/smb/server/ksmbd_work.c +++ b/fs/smb/server/ksmbd_work.c @@ -26,7 +26,6 @@ struct ksmbd_work *ksmbd_alloc_work_struct(void) INIT_LIST_HEAD(&work->request_entry); INIT_LIST_HEAD(&work->async_request_entry); INIT_LIST_HEAD(&work->fp_entry); - INIT_LIST_HEAD(&work->interim_entry); INIT_LIST_HEAD(&work->aux_read_list); work->iov_alloc_cnt = 4; work->iov = kcalloc(work->iov_alloc_cnt, sizeof(struct kvec), @@ -56,8 +55,6 @@ void ksmbd_free_work_struct(struct ksmbd_work *work) kfree(work->tr_buf); kvfree(work->request_buf); kfree(work->iov); - if (!list_empty(&work->interim_entry)) - list_del(&work->interim_entry); if (work->async_id) ksmbd_release_id(&work->conn->async_ida, work->async_id); diff --git a/fs/smb/server/ksmbd_work.h b/fs/smb/server/ksmbd_work.h index 8ca2c813246e6..d36393ff8310c 100644 --- a/fs/smb/server/ksmbd_work.h +++ b/fs/smb/server/ksmbd_work.h @@ -89,7 +89,6 @@ struct ksmbd_work { /* List head at conn->async_requests */ struct list_head async_request_entry; struct list_head fp_entry; - struct list_head interim_entry; }; /** diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 3a3fe4afbdf0d..2febd1c8e278c 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -46,7 +46,6 @@ static struct oplock_info *alloc_opinfo(struct ksmbd_work *work, opinfo->fid = id; opinfo->Tid = Tid; INIT_LIST_HEAD(&opinfo->op_entry); - INIT_LIST_HEAD(&opinfo->interim_list); init_waitqueue_head(&opinfo->oplock_q); init_waitqueue_head(&opinfo->oplock_brk); atomic_set(&opinfo->refcount, 1); @@ -803,7 +802,6 @@ static void __smb2_lease_break_noti(struct work_struct *wk) static int smb2_lease_break_noti(struct oplock_info *opinfo) { struct ksmbd_conn *conn = opinfo->conn; - struct list_head *tmp, *t; struct ksmbd_work *work; struct lease_break_info *br_info; struct lease *lease = opinfo->o_lease; @@ -831,16 +829,6 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo) work->sess = opinfo->sess; if (opinfo->op_state == OPLOCK_ACK_WAIT) { - list_for_each_safe(tmp, t, &opinfo->interim_list) { - struct ksmbd_work *in_work; - - in_work = list_entry(tmp, struct ksmbd_work, - interim_entry); - setup_async_work(in_work, NULL, NULL); - smb2_send_interim_resp(in_work, STATUS_PENDING); - list_del_init(&in_work->interim_entry); - release_async_work(in_work); - } INIT_WORK(&work->work, __smb2_lease_break_noti); ksmbd_queue_work(work); wait_for_break_ack(opinfo); @@ -871,7 +859,8 @@ static void wait_lease_breaking(struct oplock_info *opinfo) } } -static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level) +static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level, + struct ksmbd_work *in_work) { int err = 0; @@ -914,9 +903,15 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level) } if (lease->state & (SMB2_LEASE_WRITE_CACHING_LE | - SMB2_LEASE_HANDLE_CACHING_LE)) + SMB2_LEASE_HANDLE_CACHING_LE)) { + if (in_work) { + setup_async_work(in_work, NULL, NULL); + smb2_send_interim_resp(in_work, STATUS_PENDING); + release_async_work(in_work); + } + brk_opinfo->op_state = OPLOCK_ACK_WAIT; - else + } else atomic_dec(&brk_opinfo->breaking_cnt); } else { err = oplock_break_pending(brk_opinfo, req_op_level); @@ -1116,7 +1111,7 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, if (ksmbd_conn_releasing(opinfo->conn)) continue; - oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL); opinfo_put(opinfo); } } @@ -1152,7 +1147,7 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) if (ksmbd_conn_releasing(opinfo->conn)) continue; - oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL); opinfo_put(opinfo); } } @@ -1252,8 +1247,7 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, goto op_break_not_needed; } - list_add(&work->interim_entry, &prev_opinfo->interim_list); - err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II); + err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II, work); opinfo_put(prev_opinfo); if (err == -ENOENT) goto set_lev; @@ -1322,8 +1316,7 @@ static void smb_break_all_write_oplock(struct ksmbd_work *work, } brk_opinfo->open_trunc = is_trunc; - list_add(&work->interim_entry, &brk_opinfo->interim_list); - oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II); + oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II, work); opinfo_put(brk_opinfo); } @@ -1386,7 +1379,7 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, SMB2_LEASE_KEY_SIZE)) goto next; brk_op->open_trunc = is_trunc; - oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE); + oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE, NULL); next: opinfo_put(brk_op); rcu_read_lock(); diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 72bc88a63a408..3f64f07872638 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -67,7 +67,6 @@ struct oplock_info { bool is_lease; bool open_trunc; /* truncate on open */ struct lease *o_lease; - struct list_head interim_list; struct list_head op_entry; struct list_head lease_entry; wait_queue_head_t oplock_q; /* Other server threads */ From 3aa660c059240e0c795217182cf7df32909dd917 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 6 Mar 2025 14:14:58 +0900 Subject: [PATCH 401/503] ksmbd: prevent connection release during oplock break notification ksmbd_work could be freed when after connection release. Increment r_count of ksmbd_conn to indicate that requests are not finished yet and to not release the connection. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 20 ++++++++++++++++++++ fs/smb/server/connection.h | 2 ++ fs/smb/server/oplock.c | 6 ++++++ fs/smb/server/server.c | 14 ++------------ 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index f8a40f65db6ae..c1f22c1291117 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -433,6 +433,26 @@ void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops) default_conn_ops.terminate_fn = ops->terminate_fn; } +void ksmbd_conn_r_count_inc(struct ksmbd_conn *conn) +{ + atomic_inc(&conn->r_count); +} + +void ksmbd_conn_r_count_dec(struct ksmbd_conn *conn) +{ + /* + * Checking waitqueue to dropping pending requests on + * disconnection. waitqueue_active is safe because it + * uses atomic operation for condition. + */ + atomic_inc(&conn->refcnt); + if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q)) + wake_up(&conn->r_count_q); + + if (atomic_dec_and_test(&conn->refcnt)) + kfree(conn); +} + int ksmbd_conn_transport_init(void) { int ret; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index b379ae4fdcdff..91c2318639e76 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -168,6 +168,8 @@ int ksmbd_conn_transport_init(void); void ksmbd_conn_transport_destroy(void); void ksmbd_conn_lock(struct ksmbd_conn *conn); void ksmbd_conn_unlock(struct ksmbd_conn *conn); +void ksmbd_conn_r_count_inc(struct ksmbd_conn *conn); +void ksmbd_conn_r_count_dec(struct ksmbd_conn *conn); /* * WARNING diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 2febd1c8e278c..28886ff1ee577 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -634,6 +634,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk) { struct smb2_oplock_break *rsp = NULL; struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work); + struct ksmbd_conn *conn = work->conn; struct oplock_break_info *br_info = work->request_buf; struct smb2_hdr *rsp_hdr; struct ksmbd_file *fp; @@ -689,6 +690,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk) out: ksmbd_free_work_struct(work); + ksmbd_conn_r_count_dec(conn); } /** @@ -723,6 +725,7 @@ static int smb2_oplock_break_noti(struct oplock_info *opinfo) work->sess = opinfo->sess; if (opinfo->op_state == OPLOCK_ACK_WAIT) { + ksmbd_conn_r_count_inc(conn); INIT_WORK(&work->work, __smb2_oplock_break_noti); ksmbd_queue_work(work); @@ -744,6 +747,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk) { struct smb2_lease_break *rsp = NULL; struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work); + struct ksmbd_conn *conn = work->conn; struct lease_break_info *br_info = work->request_buf; struct smb2_hdr *rsp_hdr; @@ -790,6 +794,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk) out: ksmbd_free_work_struct(work); + ksmbd_conn_r_count_dec(conn); } /** @@ -829,6 +834,7 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo) work->sess = opinfo->sess; if (opinfo->op_state == OPLOCK_ACK_WAIT) { + ksmbd_conn_r_count_inc(conn); INIT_WORK(&work->work, __smb2_lease_break_noti); ksmbd_queue_work(work); wait_for_break_ack(opinfo); diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 601e7fcbcf1e6..ab533c6029879 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -270,17 +270,7 @@ static void handle_ksmbd_work(struct work_struct *wk) ksmbd_conn_try_dequeue_request(work); ksmbd_free_work_struct(work); - /* - * Checking waitqueue to dropping pending requests on - * disconnection. waitqueue_active is safe because it - * uses atomic operation for condition. - */ - atomic_inc(&conn->refcnt); - if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q)) - wake_up(&conn->r_count_q); - - if (atomic_dec_and_test(&conn->refcnt)) - kfree(conn); + ksmbd_conn_r_count_dec(conn); } /** @@ -310,7 +300,7 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn) conn->request_buf = NULL; ksmbd_conn_enqueue_request(work); - atomic_inc(&conn->r_count); + ksmbd_conn_r_count_inc(conn); /* update activity on connection */ conn->last_active = jiffies; INIT_WORK(&work->work, handle_ksmbd_work); From fd10709e28d2fa9015667aee56d92099fc97aa0d Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Fri, 7 Mar 2025 15:37:32 +0100 Subject: [PATCH 402/503] MAINTAINERS: Remove myself from the goodix touchscreen maintainers Haven't authored any commits to that driver in 10 years, and haven't had supported hardware for nearly as long. Signed-off-by: Bastien Nocera Link: https://lore.kernel.org/r/20250307143740.960328-1-hadess@hadess.net Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index baf0eeb9a3554..ca8945fd98bcd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9729,7 +9729,6 @@ S: Maintained F: drivers/media/usb/go7007/ GOODIX TOUCHSCREEN -M: Bastien Nocera M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained From f5d4e81774c42d9c2ea3980e570f3330ff2ed5d2 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 3 Mar 2025 08:49:41 +0800 Subject: [PATCH 403/503] drm/xe: remove redundant check in xe_vm_create_ioctl() The check for args->extensions is repeated twice in xe_vm_create_ioctl(). This commit removes the redundant check to streamline the code. Fixes: 7224788f6756 ("drm/xe: Kill XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS extension") Cc: Rodrigo Vivi Signed-off-by: Xin Wang Reviewed-by: Tejas Upadhyay Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20250303004942.951699-1-x.wang@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 8da8aecf1f2d89c2b8188bcf7aa252ec146ddd12) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ec6ec18ab3faa..5956631c0d40a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1809,9 +1809,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions)) - return -EINVAL; - if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) flags |= XE_VM_FLAG_SCRATCH_PAGE; if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) From df1e82e7acd3c50b65ca0e2e09089b78382d14ab Mon Sep 17 00:00:00 2001 From: David Rosca Date: Thu, 13 Feb 2025 15:30:37 +0100 Subject: [PATCH 404/503] drm/amdgpu/display: Allow DCC for video formats on GFX12 We advertise DCC as supported for NV12/P010 formats on GFX12, but it would fail on this check on atomic commit. Signed-off-by: David Rosca Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit ba795235a2b99ba9bbef647ab003b2f3145d9bbb) Cc: stable@vger.kernel.org # 6.12.x --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 774cc3f4f3fd9..92472109f84a9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -277,8 +277,11 @@ static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev, if (!dcc->enable) return 0; - if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || - !dc->cap_funcs.get_dcc_compression_cap) + if (adev->family < AMDGPU_FAMILY_GC_12_0_0 && + format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return -EINVAL; + + if (!dc->cap_funcs.get_dcc_compression_cap) return -EINVAL; input.format = format; From e204aab79e01bc8ff750645666993ed8b719de57 Mon Sep 17 00:00:00 2001 From: Aliaksei Urbanski Date: Thu, 6 Mar 2025 13:36:03 +0300 Subject: [PATCH 405/503] drm/amd/display: fix missing .is_two_pixels_per_container Starting from 6.11, AMDGPU driver, while being loaded with amdgpu.dc=1, due to lack of .is_two_pixels_per_container function in dce60_tg_funcs, causes a NULL pointer dereference on PCs with old GPUs, such as R9 280X. So this fix adds missing .is_two_pixels_per_container to dce60_tg_funcs. Reported-by: Rosen Penev Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3942 Fixes: e6a901a00822 ("drm/amd/display: use even ODM slice width for two pixels per container") Signed-off-by: Aliaksei Urbanski Signed-off-by: Alex Deucher (cherry picked from commit bd4b125eb949785c6f8a53b0494e32795421209d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c index e5fb0e8333e43..e691a1cf33567 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c @@ -239,6 +239,7 @@ static const struct timing_generator_funcs dce60_tg_funcs = { dce60_timing_generator_enable_advanced_request, .configure_crc = dce60_configure_crc, .get_crc = dce110_get_crc, + .is_two_pixels_per_container = dce110_is_two_pixels_per_container, }; void dce60_timing_generator_construct( From 4afacc9948e1f8fdbca401d259ae65ad93d298c0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 6 Mar 2025 12:51:24 -0600 Subject: [PATCH 406/503] drm/amd: Keep display off while going into S4 When userspace invokes S4 the flow is: 1) amdgpu_pmops_prepare() 2) amdgpu_pmops_freeze() 3) Create hibernation image 4) amdgpu_pmops_thaw() 5) Write out image to disk 6) Turn off system Then on resume amdgpu_pmops_restore() is called. This flow has a problem that because amdgpu_pmops_thaw() is called it will call amdgpu_device_resume() which will resume all of the GPU. This includes turning the display hardware back on and discovering connectors again. This is an unexpected experience for the display to turn back on. Adjust the flow so that during the S4 sequence display hardware is not turned back on. Reported-by: Xaver Hugl Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2038 Cc: Muhammad Usama Anjum Tested-by: Muhammad Usama Anjum Acked-by: Alex Deucher Acked-by: Harry Wentland Link: https://lore.kernel.org/r/20250306185124.44780-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 68bfdc8dc0a1a7fdd9ab61e69907ae71a6fd3d91) --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++++++++-- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 95a05b03f799d..c0ddbe7d6f0bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2555,7 +2555,6 @@ static int amdgpu_pmops_freeze(struct device *dev) int r; r = amdgpu_device_suspend(drm_dev, true); - adev->in_s4 = false; if (r) return r; @@ -2567,8 +2566,13 @@ static int amdgpu_pmops_freeze(struct device *dev) static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + int r; - return amdgpu_device_resume(drm_dev, true); + r = amdgpu_device_resume(drm_dev, true); + adev->in_s4 = false; + + return r; } static int amdgpu_pmops_poweroff(struct device *dev) @@ -2581,6 +2585,9 @@ static int amdgpu_pmops_poweroff(struct device *dev) static int amdgpu_pmops_restore(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + + adev->in_s4 = false; return amdgpu_device_resume(drm_dev, true); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9d9645a2d18ef..f4fed93f2915a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3373,6 +3373,11 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) return 0; } + + /* leave display off for S4 sequence */ + if (adev->in_s4) + return 0; + /* Recreate dc_state - DC invalidates it when setting power state to S3. */ dc_state_release(dm_state->context); dm_state->context = dc_state_create(dm->dc, NULL); From 40b8c14936bd2726354c856251f6baed9869e760 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 20 Feb 2025 16:20:26 -0500 Subject: [PATCH 407/503] drm/amd/display: Disable unneeded hpd interrupts during dm_init [Why] It seems HPD interrupts are enabled by default for all connectors, even if the hpd source isn't valid. An eDP for example, does not have a valid hpd source (but does have a valid hpdrx source; see construct_phy()). Thus, eDPs should have their hpd interrupt disabled. In the past, this wasn't really an issue. Although the driver gets interrupted, then acks by writing to hw registers, there weren't any subscribed handlers that did anything meaningful (see register_hpd_handlers()). But things changed with the introduction of IPS. s2idle requires that the driver allows IPS for DMUB fw to put hw to sleep. Since register access requires hw to be awake, the driver will block IPS entry to do so. And no IPS means no hw sleep during s2idle. This was the observation on DCN35 systems with an eDP. During suspend, the eDP toggled its hpd pin as part of the panel power down sequence. The driver was then interrupted, and acked by writing to registers, blocking IPS entry. [How] Since DC marks eDP connections as having invalid hpd sources (see construct_phy()), DM should disable them at the hw level. Do so in amdgpu_dm_hpd_init() by disabling all hpd ints first, then selectively enabling ones for connectors that have valid hpd sources. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Leo Li Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 7b1ba19eb15f88e70782642ce2d934211269337b) Cc: stable@vger.kernel.org --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 64 +++++++++++++------ 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index c4a7fd453e5fc..a215234151ac3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -894,8 +894,16 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int irq_type; int i; + /* First, clear all hpd and hpdrx interrupts */ + for (i = DC_IRQ_SOURCE_HPD1; i <= DC_IRQ_SOURCE_HPD6RX; i++) { + if (!dc_interrupt_set(adev->dm.dc, i, false)) + drm_err(dev, "Failed to clear hpd(rx) source=%d on init\n", + i); + } + drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { struct amdgpu_dm_connector *amdgpu_dm_connector; @@ -908,10 +916,31 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) dc_link = amdgpu_dm_connector->dc_link; + /* + * Get a base driver irq reference for hpd ints for the lifetime + * of dm. Note that only hpd interrupt types are registered with + * base driver; hpd_rx types aren't. IOW, amdgpu_irq_get/put on + * hpd_rx isn't available. DM currently controls hpd_rx + * explicitly with dc_interrupt_set() + */ if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { - dc_interrupt_set(adev->dm.dc, - dc_link->irq_source_hpd, - true); + irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1; + /* + * TODO: There's a mismatch between mode_info.num_hpd + * and what bios reports as the # of connectors with hpd + * sources. Since the # of hpd source types registered + * with base driver == mode_info.num_hpd, we have to + * fallback to dc_interrupt_set for the remaining types. + */ + if (irq_type < adev->mode_info.num_hpd) { + if (amdgpu_irq_get(adev, &adev->hpd_irq, irq_type)) + drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", + dc_link->irq_source_hpd); + } else { + dc_interrupt_set(adev->dm.dc, + dc_link->irq_source_hpd, + true); + } } if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) { @@ -921,12 +950,6 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); - - /* Update reference counts for HPDs */ - for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { - if (amdgpu_irq_get(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) - drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", i); - } } /** @@ -942,7 +965,7 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; - int i; + int irq_type; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -956,9 +979,18 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) dc_link = amdgpu_dm_connector->dc_link; if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { - dc_interrupt_set(adev->dm.dc, - dc_link->irq_source_hpd, - false); + irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1; + + /* TODO: See same TODO in amdgpu_dm_hpd_init() */ + if (irq_type < adev->mode_info.num_hpd) { + if (amdgpu_irq_put(adev, &adev->hpd_irq, irq_type)) + drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", + dc_link->irq_source_hpd); + } else { + dc_interrupt_set(adev->dm.dc, + dc_link->irq_source_hpd, + false); + } } if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) { @@ -968,10 +1000,4 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); - - /* Update reference counts for HPDs */ - for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { - if (amdgpu_irq_put(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) - drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", i); - } } From b5a981e1b34e44f94a5967f730fff4166f2101e8 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 22 Feb 2025 23:37:32 -0600 Subject: [PATCH 408/503] drm/amd/display: fix default brightness [Why] To avoid flickering during boot default brightness level set by BIOS should be maintained for as much of the boot as feasible. commit 2fe87f54abdc ("drm/amd/display: Set default brightness according to ACPI") attempted to set the right levels for AC vs DC, but brightness still got reset to maximum level in initialization code for setup_backlight_device(). [How] Remove the hardcoded initialization in setup_backlight_device() and instead program brightness value to match BIOS (AC or DC). This avoids a brightness flicker from kernel changing the value. Userspace may however still change it during boot. Fixes: 2fe87f54abdc ("drm/amd/display: Set default brightness according to ACPI") Acked-by: Wayne Lin Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 0747acf3311229e22009bec4a9e7fc30c879e842) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f4fed93f2915a..4bd7f82c582dc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4911,6 +4911,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) dm->backlight_dev[aconnector->bl_idx] = backlight_device_register(bl_name, aconnector->base.kdev, dm, &amdgpu_dm_backlight_ops, &props); + dm->brightness[aconnector->bl_idx] = props.brightness; if (IS_ERR(dm->backlight_dev[aconnector->bl_idx])) { DRM_ERROR("DM: Backlight registration failed!\n"); @@ -4978,7 +4979,6 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm, aconnector->bl_idx = bl_idx; amdgpu_dm_update_backlight_caps(dm, bl_idx); - dm->brightness[bl_idx] = AMDGPU_MAX_BL_LEVEL; dm->backlight_link[bl_idx] = link; dm->num_of_edps++; From 5760388d9681ac743038b846b9082b9023969551 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 23 Feb 2025 00:04:35 -0600 Subject: [PATCH 409/503] drm/amd/display: Restore correct backlight brightness after a GPU reset [Why] GPU reset will attempt to restore cached state, but brightness doesn't get restored. It will come back at 100% brightness, but userspace thinks it's the previous value. [How] When running resume sequence if GPU is in reset restore brightness to previous value. Acked-by: Wayne Lin Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 5e19e2b57b6bb640d68dfc7991e1e182922cf867) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4bd7f82c582dc..74ad0d1240feb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -245,6 +245,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector); static void handle_hpd_rx_irq(void *param); +static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, + int bl_idx, + u32 user_brightness); + static bool is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state, struct drm_crtc_state *new_crtc_state); @@ -3371,6 +3375,12 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) mutex_unlock(&dm->dc_lock); + /* set the backlight after a reset */ + for (i = 0; i < dm->num_of_edps; i++) { + if (dm->backlight_dev[i]) + amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); + } + return 0; } From 79e31396fdd7037c503e6add15af7cb00633ea92 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Feb 2025 16:36:25 -0700 Subject: [PATCH 410/503] drm/amd/display: Assign normalized_pix_clk when color depth = 14 [WHY & HOW] A warning message "WARNING: CPU: 4 PID: 459 at ... /dc_resource.c:3397 calculate_phy_pix_clks+0xef/0x100 [amdgpu]" occurs because the display_color_depth == COLOR_DEPTH_141414 is not handled. This is observed in Radeon RX 6600 XT. It is fixed by assigning pix_clk * (14 * 3) / 24 - same as the rests. Also fixes the indentation in get_norm_pix_clk. Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 274a87eb389f58eddcbc5659ab0b180b37e92775) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index a45037cb4cc01..298668e9729c7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3389,10 +3389,13 @@ static int get_norm_pix_clk(const struct dc_crtc_timing *timing) break; case COLOR_DEPTH_121212: normalized_pix_clk = (pix_clk * 36) / 24; - break; + break; + case COLOR_DEPTH_141414: + normalized_pix_clk = (pix_clk * 42) / 24; + break; case COLOR_DEPTH_161616: normalized_pix_clk = (pix_clk * 48) / 24; - break; + break; default: ASSERT(0); break; From e65e7bea220c3ce8c4c793b4ba35557f4994ab2b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 13:18:14 -0600 Subject: [PATCH 411/503] drm/amd/display: Fix slab-use-after-free on hdcp_work [Why] A slab-use-after-free is reported when HDCP is destroyed but the property_validate_dwork queue is still running. [How] Cancel the delayed work when destroying workqueue. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4006 Fixes: da3fd7ac0bcf ("drm/amd/display: Update CP property based on HW query") Cc: Alex Deucher Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 725a04ba5a95e89c89633d4322430cfbca7ce128) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index e339c7a8d541c..c0dc232440490 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -455,6 +455,7 @@ void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) for (i = 0; i < hdcp_work->max_link; i++) { cancel_delayed_work_sync(&hdcp_work[i].callback_dwork); cancel_delayed_work_sync(&hdcp_work[i].watchdog_timer_dwork); + cancel_delayed_work_sync(&hdcp_work[i].property_validate_dwork); } sysfs_remove_bin_file(kobj, &hdcp_work[0].attr); From ded6ad4c6e2005e959ea09abba16c451433dd34b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 9 Mar 2025 12:26:50 -0400 Subject: [PATCH 412/503] drm/amdgpu/vce2: fix ip block reference Need to use the correct IP block type. VCE vs VCN. Fixes mclk issues on Hawaii. Suggested by selendym. Fixes: 82ae6619a450 ("drm/amdgpu: update the handle ptr in wait_for_idle") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3997 Reviewed-by: Boyuan Zhang Cc: Sunil Khatri Signed-off-by: Alex Deucher (cherry picked from commit 02438acd252395628d74cfac692efbb676d21521) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index c633b7ff29438..09fd6ef99b3d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -284,7 +284,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev) return 0; } - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCN); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE); if (!ip_block) return -EINVAL; From 366fef794bd2b7c2e9df933f6828dd9739bfba84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 10 Mar 2025 14:21:58 +0200 Subject: [PATCH 413/503] : Allow the passing of both iomem and non-iomem pointers to no_free_ptr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling no_free_ptr() for an __iomem pointer results in Sparse complaining about the types: warning: incorrect type in argument 1 (different address spaces) expected void const volatile *val got void [noderef] __iomem *__val [ The example is from drivers/platform/x86/intel/pmc/core_ssram.c:283 ] The problem is caused by the signature of __must_check_fn() added in: 85be6d842447 ("cleanup: Make no_free_ptr() __must_check") ... to enforce that the return value is always used. Use __force to allow both iomem and non-iomem pointers to be given for no_free_ptr(). Reported-by: kernel test robot Signed-off-by: Ilpo Järvinen Signed-off-by: Ingo Molnar Reviewed-by: Andy Shevchenko Reviewed-by: Dan Williams Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250310122158.20966-1-ilpo.jarvinen@linux.intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202403050547.qnZtuNlN-lkp@intel.com/ --- include/linux/cleanup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index ec00e3f7af2b3..ee2614adb7858 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -212,7 +212,7 @@ const volatile void * __must_check_fn(const volatile void *val) { return val; } #define no_free_ptr(p) \ - ((typeof(p)) __must_check_fn(__get_and_null(p, NULL))) + ((typeof(p)) __must_check_fn((__force const volatile void *)__get_and_null(p, NULL))) #define return_ptr(p) return no_free_ptr(p) From eab0396353be1c778eba1c0b5180176f04dd21ce Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Fri, 7 Mar 2025 10:18:20 +0800 Subject: [PATCH 414/503] net/mlx5: handle errors in mlx5_chains_create_table() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In mlx5_chains_create_table(), the return value of mlx5_get_fdb_sub_ns() and mlx5_get_flow_namespace() must be checked to prevent NULL pointer dereferences. If either function fails, the function should log error message with mlx5_core_warn() and return error pointer. Fixes: 39ac237ce009 ("net/mlx5: E-Switch, Refactor chains and priorities") Signed-off-by: Wentao Liang Reviewed-by: Tariq Toukan Link: https://patch.msgid.link/20250307021820.2646-1-vulab@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index a80ecb672f33d..711d14dea2485 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -196,6 +196,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, ns = mlx5_get_flow_namespace(chains->dev, chains->ns); } + if (!ns) { + mlx5_core_warn(chains->dev, "Failed to get flow namespace\n"); + return ERR_PTR(-EOPNOTSUPP); + } + ft_attr.autogroup.num_reserved_entries = 2; ft_attr.autogroup.max_num_groups = chains->group_num; ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); From d0a4a1b36d7a71b45972ef33762c3fc082bec1db Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Fri, 7 Mar 2025 10:12:55 +0100 Subject: [PATCH 415/503] net: ethtool: tsinfo: Fix dump command Fix missing initialization of ts_info->phc_index in the dump command, which could cause a netdev interface to incorrectly display a PTP provider at index 0 instead of "none". Fix it by initializing the phc_index to -1. In the same time, restore missing initialization of ts_info.cmd for the IOCTL case, as it was before the transition from ethnl_default_dumpit to custom ethnl_tsinfo_dumpit. Also, remove unnecessary zeroing of ts_info, as it is embedded within reply_data, which is fully zeroed two lines earlier. Fixes: b9e3f7dc9ed95 ("net: ethtool: tsinfo: Enhance tsinfo to support several hwtstamp by net topology") Signed-off-by: Kory Maincent Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250307091255.463559-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/tsinfo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 691be6c445b38..ad3866c5a902b 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -290,7 +290,8 @@ static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb, reply_data = ctx->reply_data; memset(reply_data, 0, sizeof(*reply_data)); reply_data->base.dev = dev; - memset(&reply_data->ts_info, 0, sizeof(reply_data->ts_info)); + reply_data->ts_info.cmd = ETHTOOL_GET_TS_INFO; + reply_data->ts_info.phc_index = -1; return ehdr; } From cfa693bf9d5361608e2963f5dae053b3695af8eb Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 7 Mar 2025 11:12:23 +0100 Subject: [PATCH 416/503] net: usb: lan78xx: Sanitize return values of register read/write functions usb_control_msg() returns the number of transferred bytes or a negative error code. The current implementation propagates the transferred byte count, which is unintended. This affects code paths that assume a boolean success/failure check, such as the EEPROM detection logic. Fix this by ensuring lan78xx_read_reg() and lan78xx_write_reg() return only 0 on success and preserve negative error codes. This approach is consistent with existing usage, as the transferred byte count is not explicitly checked elsewhere. Fixes: 8b1b2ca83b20 ("net: usb: lan78xx: Improve error handling in EEPROM and OTP operations") Reported-by: Mark Brown Closes: https://lore.kernel.org/all/ac965de8-f320-430f-80f6-b16f4e1ba06d@sirena.org.uk Signed-off-by: Oleksij Rempel Tested-by: Mark Brown Link: https://patch.msgid.link/20250307101223.3025632-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a91bf9c7e31d2..137adf6d5b08a 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -627,7 +627,7 @@ static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data) kfree(buf); - return ret; + return ret < 0 ? ret : 0; } static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data) @@ -658,7 +658,7 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data) kfree(buf); - return ret; + return ret < 0 ? ret : 0; } static int lan78xx_update_reg(struct lan78xx_net *dev, u32 reg, u32 mask, From 9f7b2aa5034e24d3c49db73d5f760c0435fe31c2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:12 +0000 Subject: [PATCH 417/503] eth: bnxt: fix truesize for mb-xdp-pass case When mb-xdp is set and return is XDP_PASS, packet is converted from xdp_buff to sk_buff with xdp_update_skb_shared_info() in bnxt_xdp_build_skb(). bnxt_xdp_build_skb() passes incorrect truesize argument to xdp_update_skb_shared_info(). The truesize is calculated as BNXT_RX_PAGE_SIZE * sinfo->nr_frags but the skb_shared_info was wiped by napi_build_skb() before. So it stores sinfo->nr_frags before bnxt_xdp_build_skb() and use it instead of getting skb_shared_info from xdp_get_shared_info_from_buff(). Splat looks like: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 0 at net/core/skbuff.c:6072 skb_try_coalesce+0x504/0x590 Modules linked in: xt_nat xt_tcpudp veth af_packet xt_conntrack nft_chain_nat xt_MASQUERADE nf_conntrack_netlink xfrm_user xt_addrtype nft_coms CPU: 2 UID: 0 PID: 0 Comm: swapper/2 Not tainted 6.14.0-rc2+ #3 RIP: 0010:skb_try_coalesce+0x504/0x590 Code: 4b fd ff ff 49 8b 34 24 40 80 e6 40 0f 84 3d fd ff ff 49 8b 74 24 48 40 f6 c6 01 0f 84 2e fd ff ff 48 8d 4e ff e9 25 fd ff ff <0f> 0b e99 RSP: 0018:ffffb62c4120caa8 EFLAGS: 00010287 RAX: 0000000000000003 RBX: ffffb62c4120cb14 RCX: 0000000000000ec0 RDX: 0000000000001000 RSI: ffffa06e5d7dc000 RDI: 0000000000000003 RBP: ffffa06e5d7ddec0 R08: ffffa06e6120a800 R09: ffffa06e7a119900 R10: 0000000000002310 R11: ffffa06e5d7dcec0 R12: ffffe4360575f740 R13: ffffe43600000000 R14: 0000000000000002 R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffffa0755f700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f147b76b0f8 CR3: 00000001615d4000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? __warn+0x84/0x130 ? skb_try_coalesce+0x504/0x590 ? report_bug+0x18a/0x1a0 ? handle_bug+0x53/0x90 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? skb_try_coalesce+0x504/0x590 inet_frag_reasm_finish+0x11f/0x2e0 ip_defrag+0x37a/0x900 ip_local_deliver+0x51/0x120 ip_sublist_rcv_finish+0x64/0x70 ip_sublist_rcv+0x179/0x210 ip_list_rcv+0xf9/0x130 How to reproduce: ip link set $interface1 xdp obj xdp_pass.o ip link set $interface1 mtu 9000 up ip a a 10.0.0.1/24 dev $interface1 ip link set $interfac2 mtu 9000 up ip a a 10.0.0.2/24 dev $interface2 ping 10.0.0.1 -s 65000 Following ping.py patch adds xdp-mb-pass case. so ping.py is going to be able to reproduce this issue. Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250309134219.91670-2-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7b8b5b39c7bbe..6b5fe4ee7a99b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2038,6 +2038,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, struct rx_cmp_ext *rxcmp1; u32 tmp_raw_cons = *raw_cons; u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons); + struct skb_shared_info *sinfo; struct bnxt_sw_rx_bd *rx_buf; unsigned int len; u8 *data_ptr, agg_bufs, cmp_type; @@ -2164,6 +2165,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, false); if (!frag_len) goto oom_next_rx; + } xdp_active = true; } @@ -2173,6 +2175,12 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, rc = 1; goto next_rx; } + if (xdp_buff_has_frags(&xdp)) { + sinfo = xdp_get_shared_info_from_buff(&xdp); + agg_bufs = sinfo->nr_frags; + } else { + agg_bufs = 0; + } } if (len <= bp->rx_copybreak) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index e6c64e4bd66c3..e9b49cb5b735b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -476,7 +476,7 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, } xdp_update_skb_shared_info(skb, num_frags, sinfo->xdp_frags_size, - BNXT_RX_PAGE_SIZE * sinfo->nr_frags, + BNXT_RX_PAGE_SIZE * num_frags, xdp_buff_is_frag_pfmemalloc(xdp)); return skb; } From ca2456e073957781e1184de68551c65161b2bd30 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:13 +0000 Subject: [PATCH 418/503] eth: bnxt: return fail if interface is down in bnxt_queue_mem_alloc() The bnxt_queue_mem_alloc() is called to allocate new queue memory when a queue is restarted. It internally accesses rx buffer descriptor corresponding to the index. The rx buffer descriptor is allocated and set when the interface is up and it's freed when the interface is down. So, if queue is restarted if interface is down, kernel panic occurs. Splat looks like: BUG: unable to handle page fault for address: 000000000000b240 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 UID: 0 PID: 1563 Comm: ncdevmem2 Not tainted 6.14.0-rc2+ #9 844ddba6e7c459cafd0bf4db9a3198e Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 RIP: 0010:bnxt_queue_mem_alloc+0x3f/0x4e0 [bnxt_en] Code: 41 54 4d 89 c4 4d 69 c0 c0 05 00 00 55 48 89 f5 53 48 89 fb 4c 8d b5 40 05 00 00 48 83 ec 15 RSP: 0018:ffff9dcc83fef9e8 EFLAGS: 00010202 RAX: ffffffffc0457720 RBX: ffff934ed8d40000 RCX: 0000000000000000 RDX: 000000000000001f RSI: ffff934ea508f800 RDI: ffff934ea508f808 RBP: ffff934ea508f800 R08: 000000000000b240 R09: ffff934e84f4b000 R10: ffff9dcc83fefa30 R11: ffff934e84f4b000 R12: 000000000000001f R13: ffff934ed8d40ac0 R14: ffff934ea508fd40 R15: ffff934e84f4b000 FS: 00007fa73888c740(0000) GS:ffff93559f780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000b240 CR3: 0000000145a2e000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? __die+0x20/0x70 ? page_fault_oops+0x15a/0x460 ? exc_page_fault+0x6e/0x180 ? asm_exc_page_fault+0x22/0x30 ? __pfx_bnxt_queue_mem_alloc+0x10/0x10 [bnxt_en 7f85e76f4d724ba07471d7e39d9e773aea6597b7] ? bnxt_queue_mem_alloc+0x3f/0x4e0 [bnxt_en 7f85e76f4d724ba07471d7e39d9e773aea6597b7] netdev_rx_queue_restart+0xc5/0x240 net_devmem_bind_dmabuf_to_queue+0xf8/0x200 netdev_nl_bind_rx_doit+0x3a7/0x450 genl_family_rcv_msg_doit+0xd9/0x130 genl_rcv_msg+0x184/0x2b0 ? __pfx_netdev_nl_bind_rx_doit+0x10/0x10 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 ... Reviewed-by: Somnath Kotur Reviewed-by: Jakub Kicinski Fixes: 2d694c27d32e ("bnxt_en: implement netdev_queue_mgmt_ops") Signed-off-by: Taehee Yoo Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250309134219.91670-3-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6b5fe4ee7a99b..acb9500ef9307 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15447,6 +15447,9 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) struct bnxt_ring_struct *ring; int rc; + if (!bp->rx_ring) + return -ENETDOWN; + rxr = &bp->rx_ring[idx]; clone = qmem; memcpy(clone, rxr, sizeof(*rxr)); From 661958552eda5bf64bfafb4821cbdded935f1f68 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:14 +0000 Subject: [PATCH 419/503] eth: bnxt: do not use BNXT_VNIC_NTUPLE unconditionally in queue restart logic When a queue is restarted, it sets MRU to 0 for stopping packet flow. MRU variable is a member of vnic_info[], the first vnic_info is default and the second is ntuple. Only when ntuple is enabled(ethtool -K eth0 ntuple on), vnic_info for ntuple is allocated in init logic. The bp->nr_vnics indicates how many vnic_info are allocated. However bnxt_queue_{start | stop}() accesses vnic_info[BNXT_VNIC_NTUPLE] regardless of ntuple state. Reviewed-by: Somnath Kotur Fixes: b9d2956e869c ("bnxt_en: stop packet flow during bnxt_queue_stop/start") Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250309134219.91670-4-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index acb9500ef9307..218109ee1c234 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15643,7 +15643,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) cpr = &rxr->bnapi->cp_ring; cpr->sw_stats->rx.rx_resets++; - for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { + for (i = 0; i <= bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); @@ -15671,7 +15671,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) struct bnxt_vnic_info *vnic; int i; - for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { + for (i = 0; i <= bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; vnic->mru = 0; bnxt_hwrm_vnic_update(bp, vnic, From c03e7d05aa0e2f7e9a9ce5ad8a12471a53f941dc Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:15 +0000 Subject: [PATCH 420/503] eth: bnxt: do not update checksum in bnxt_xdp_build_skb() The bnxt_rx_pkt() updates ip_summed value at the end if checksum offload is enabled. When the XDP-MB program is attached and it returns XDP_PASS, the bnxt_xdp_build_skb() is called to update skb_shared_info. The main purpose of bnxt_xdp_build_skb() is to update skb_shared_info, but it updates ip_summed value too if checksum offload is enabled. This is actually duplicate work. When the bnxt_rx_pkt() updates ip_summed value, it checks if ip_summed is CHECKSUM_NONE or not. It means that ip_summed should be CHECKSUM_NONE at this moment. But ip_summed may already be updated to CHECKSUM_UNNECESSARY in the XDP-MB-PASS path. So the by skb_checksum_none_assert() WARNS about it. This is duplicate work and updating ip_summed in the bnxt_xdp_build_skb() is not needed. Splat looks like: WARNING: CPU: 3 PID: 5782 at ./include/linux/skbuff.h:5155 bnxt_rx_pkt+0x479b/0x7610 [bnxt_en] Modules linked in: bnxt_re bnxt_en rdma_ucm rdma_cm iw_cm ib_cm ib_uverbs veth xt_nat xt_tcpudp xt_conntrack nft_chain_nat xt_MASQUERADE nf_] CPU: 3 UID: 0 PID: 5782 Comm: socat Tainted: G W 6.14.0-rc4+ #27 Tainted: [W]=WARN Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 RIP: 0010:bnxt_rx_pkt+0x479b/0x7610 [bnxt_en] Code: 54 24 0c 4c 89 f1 4c 89 ff c1 ea 1f ff d3 0f 1f 00 49 89 c6 48 85 c0 0f 84 4c e5 ff ff 48 89 c7 e8 ca 3d a0 c8 e9 8f f4 ff ff <0f> 0b f RSP: 0018:ffff88881ba09928 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00000000c7590303 RCX: 0000000000000000 RDX: 1ffff1104e7d1610 RSI: 0000000000000001 RDI: ffff8881c91300b8 RBP: ffff88881ba09b28 R08: ffff888273e8b0d0 R09: ffff888273e8b070 R10: ffff888273e8b010 R11: ffff888278b0f000 R12: ffff888273e8b080 R13: ffff8881c9130e00 R14: ffff8881505d3800 R15: ffff888273e8b000 FS: 00007f5a2e7be080(0000) GS:ffff88881ba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff2e708ff8 CR3: 000000013e3b0000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? __warn+0xcd/0x2f0 ? bnxt_rx_pkt+0x479b/0x7610 ? report_bug+0x326/0x3c0 ? handle_bug+0x53/0xa0 ? exc_invalid_op+0x14/0x50 ? asm_exc_invalid_op+0x16/0x20 ? bnxt_rx_pkt+0x479b/0x7610 ? bnxt_rx_pkt+0x3e41/0x7610 ? __pfx_bnxt_rx_pkt+0x10/0x10 ? napi_complete_done+0x2cf/0x7d0 __bnxt_poll_work+0x4e8/0x1220 ? __pfx___bnxt_poll_work+0x10/0x10 ? __pfx_mark_lock.part.0+0x10/0x10 bnxt_poll_p5+0x36a/0xfa0 ? __pfx_bnxt_poll_p5+0x10/0x10 __napi_poll.constprop.0+0xa0/0x440 net_rx_action+0x899/0xd00 ... Following ping.py patch adds xdp-mb-pass case. so ping.py is going to be able to reproduce this issue. Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") Signed-off-by: Taehee Yoo Reviewed-by: Somnath Kotur Link: https://patch.msgid.link/20250309134219.91670-5-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 11 ++--------- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 3 +-- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 218109ee1c234..9afb2c5072b13 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2218,7 +2218,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (!skb) goto oom_next_rx; } else { - skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1); + skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, + rxr->page_pool, &xdp); if (!skb) { /* we should be able to free the old skb here */ bnxt_xdp_buff_frags_free(rxr, &xdp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index e9b49cb5b735b..299822cacca48 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -460,20 +460,13 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) struct sk_buff * bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, - struct page_pool *pool, struct xdp_buff *xdp, - struct rx_cmp_ext *rxcmp1) + struct page_pool *pool, struct xdp_buff *xdp) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); if (!skb) return NULL; - skb_checksum_none_assert(skb); - if (RX_CMP_L4_CS_OK(rxcmp1)) { - if (bp->dev->features & NETIF_F_RXCSUM) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = RX_CMP_ENCAP(rxcmp1); - } - } + xdp_update_skb_shared_info(skb, num_frags, sinfo->xdp_frags_size, BNXT_RX_PAGE_SIZE * num_frags, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 0122782400b8a..220285e190fcd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -33,6 +33,5 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, struct xdp_buff *xdp); struct sk_buff *bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, struct page_pool *pool, - struct xdp_buff *xdp, - struct rx_cmp_ext *rxcmp1); + struct xdp_buff *xdp); #endif From f09af5fdfbd9b0fcee73aab1116904c53b199e97 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:16 +0000 Subject: [PATCH 421/503] eth: bnxt: fix kernel panic in the bnxt_get_queue_stats{rx | tx} When qstats-get operation is executed, callbacks of netdev_stats_ops are called. The bnxt_get_queue_stats{rx | tx} collect per-queue stats from sw_stats in the rings. But {rx | tx | cp}_ring are allocated when the interface is up. So, these rings are not allocated when the interface is down. The qstats-get is allowed even if the interface is down. However, the bnxt_get_queue_stats{rx | tx}() accesses cp_ring and tx_ring without null check. So, it needs to avoid accessing rings if the interface is down. Reproducer: ip link set $interface down ./cli.py --spec netdev.yaml --dump qstats-get OR ip link set $interface down python ./stats.py Splat looks like: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 1680fa067 P4D 1680fa067 PUD 16be3b067 PMD 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 UID: 0 PID: 1495 Comm: python3 Not tainted 6.14.0-rc4+ #32 5cd0f999d5a15c574ac72b3e4b907341 Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 RIP: 0010:bnxt_get_queue_stats_rx+0xf/0x70 [bnxt_en] Code: c6 87 b5 18 00 00 02 eb a2 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 01 RSP: 0018:ffffabef43cdb7e0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffffffc04c8710 RCX: 0000000000000000 RDX: ffffabef43cdb858 RSI: 0000000000000000 RDI: ffff8d504e850000 RBP: ffff8d506c9f9c00 R08: 0000000000000004 R09: ffff8d506bcd901c R10: 0000000000000015 R11: ffff8d506bcd9000 R12: 0000000000000000 R13: ffffabef43cdb8c0 R14: ffff8d504e850000 R15: 0000000000000000 FS: 00007f2c5462b080(0000) GS:ffff8d575f600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000167fd0000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? __die+0x20/0x70 ? page_fault_oops+0x15a/0x460 ? sched_balance_find_src_group+0x58d/0xd10 ? exc_page_fault+0x6e/0x180 ? asm_exc_page_fault+0x22/0x30 ? bnxt_get_queue_stats_rx+0xf/0x70 [bnxt_en cdd546fd48563c280cfd30e9647efa420db07bf1] netdev_nl_stats_by_netdev+0x2b1/0x4e0 ? xas_load+0x9/0xb0 ? xas_find+0x183/0x1d0 ? xa_find+0x8b/0xe0 netdev_nl_qstats_get_dumpit+0xbf/0x1e0 genl_dumpit+0x31/0x90 netlink_dump+0x1a8/0x360 Fixes: af7b3b4adda5 ("eth: bnxt: support per-queue statistics") Signed-off-by: Taehee Yoo Reviewed-by: Somnath Kotur Link: https://patch.msgid.link/20250309134219.91670-6-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9afb2c5072b13..bee12d9b57aba 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15384,6 +15384,9 @@ static void bnxt_get_queue_stats_rx(struct net_device *dev, int i, struct bnxt_cp_ring_info *cpr; u64 *sw; + if (!bp->bnapi) + return; + cpr = &bp->bnapi[i]->cp_ring; sw = cpr->stats.sw_stats; @@ -15407,6 +15410,9 @@ static void bnxt_get_queue_stats_tx(struct net_device *dev, int i, struct bnxt_napi *bnapi; u64 *sw; + if (!bp->tx_ring) + return; + bnapi = bp->tx_ring[bp->tx_ring_map[i]].bnapi; sw = bnapi->cp_ring.stats.sw_stats; From 87dd2850835dd7886726b428a8ef7d73a60520c7 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:17 +0000 Subject: [PATCH 422/503] eth: bnxt: fix memory leak in queue reset When the queue is reset, the bnxt_alloc_one_tpa_info() is called to allocate tpa_info for the new queue. And then the old queue's tpa_info should be removed by the bnxt_free_one_tpa_info(), but it is not called. So memory leak occurs. It adds the bnxt_free_one_tpa_info() in the bnxt_queue_mem_free(). unreferenced object 0xffff888293cc0000 (size 16384): comm "ncdevmem", pid 2076, jiffies 4296604081 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 40 75 78 93 82 88 ff ff ........@ux..... 40 75 78 93 02 00 00 00 00 00 00 00 00 00 00 00 @ux............. backtrace (crc 5d7d4798): ___kmalloc_large_node+0x10d/0x1b0 __kmalloc_large_node_noprof+0x17/0x60 __kmalloc_noprof+0x3f6/0x520 bnxt_alloc_one_tpa_info+0x5f/0x300 [bnxt_en] bnxt_queue_mem_alloc+0x8e8/0x14f0 [bnxt_en] netdev_rx_queue_restart+0x233/0x620 net_devmem_bind_dmabuf_to_queue+0x2a3/0x600 netdev_nl_bind_rx_doit+0xc00/0x10a0 genl_family_rcv_msg_doit+0x1d4/0x2b0 genl_rcv_msg+0x3fb/0x6c0 netlink_rcv_skb+0x12c/0x360 genl_rcv+0x24/0x40 netlink_unicast+0x447/0x710 netlink_sendmsg+0x712/0xbc0 __sys_sendto+0x3fd/0x4d0 __x64_sys_sendto+0xdc/0x1b0 Fixes: 2d694c27d32e ("bnxt_en: implement netdev_queue_mgmt_ops") Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250309134219.91670-7-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index bee12d9b57aba..55f553debd3b2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15539,6 +15539,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem) struct bnxt_ring_struct *ring; bnxt_free_one_rx_ring_skbs(bp, rxr); + bnxt_free_one_tpa_info(bp, rxr); xdp_rxq_info_unreg(&rxr->xdp_rxq); From a70f891e0fa0435379ad4950e156a15a4ef88b4d Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:18 +0000 Subject: [PATCH 423/503] net: devmem: do not WARN conditionally after netdev_rx_queue_restart() When devmem socket is closed, netdev_rx_queue_restart() is called to reset queue by the net_devmem_unbind_dmabuf(). But callback may return -ENETDOWN if the interface is down because queues are already freed when the interface is down so queue reset is not needed. So, it should not warn if the return value is -ENETDOWN. Signed-off-by: Taehee Yoo Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250309134219.91670-8-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index 3bba3f018df03..0e5a2c672efd8 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -109,6 +109,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) struct netdev_rx_queue *rxq; unsigned long xa_idx; unsigned int rxq_idx; + int err; if (binding->list.next) list_del(&binding->list); @@ -120,7 +121,8 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) rxq_idx = get_netdev_rx_queue_index(rxq); - WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx)); + err = netdev_rx_queue_restart(binding->dev, rxq_idx); + WARN_ON(err && err != -ENETDOWN); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); From 75cc19c8ff8932d7da23480a49d1f9a050289c37 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:19 +0000 Subject: [PATCH 424/503] selftests: drv-net: add xdp cases for ping.py ping.py has 3 cases, test_v4, test_v6 and test_tcp. But these cases are not executed on the XDP environment. So, it adds XDP environment, existing tests(test_v4, test_v6, and test_tcp) are executed too on the below XDP environment. So, it adds XDP cases. 1. xdp-generic + single-buffer 2. xdp-generic + multi-buffer 3. xdp-native + single-buffer 4. xdp-native + multi-buffer 5. xdp-offload It also makes test_{v4 | v6 | tcp} sending large size packets. this may help to check whether multi-buffer is working or not. Note that the physical interface may be down and then up when xdp is attached or detached. This takes some period to activate traffic. So sleep(10) is added if the test interface is the physical interface. netdevsim and veth type interfaces skip sleep. Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250309134219.91670-9-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 200 ++++++++++++++++-- .../testing/selftests/net/lib/xdp_dummy.bpf.c | 6 + 2 files changed, 191 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index eb83e7b487978..93f4b411b378f 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -1,49 +1,219 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +import os +import random, string, time from lib.py import ksft_run, ksft_exit -from lib.py import ksft_eq -from lib.py import NetDrvEpEnv +from lib.py import ksft_eq, KsftSkipEx, KsftFailEx +from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port +from lib.py import ethtool, ip +remote_ifname="" +no_sleep=False -def test_v4(cfg) -> None: +def _test_v4(cfg) -> None: cfg.require_v4() cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}") cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote) + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v4}") + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v4}", host=cfg.remote) - -def test_v6(cfg) -> None: +def _test_v6(cfg) -> None: cfg.require_v6() - cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}") - cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote) - + cmd(f"ping -c 1 -W5 {cfg.remote_v6}") + cmd(f"ping -c 1 -W5 {cfg.v6}", host=cfg.remote) + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v6}") + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v6}", host=cfg.remote) -def test_tcp(cfg) -> None: +def _test_tcp(cfg) -> None: cfg.require_cmd("socat", remote=True) port = rand_port() listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT" + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) with bkg(listen_cmd, exit_wait=True) as nc: wait_port_listen(port) - cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", shell=True, host=cfg.remote) - ksft_eq(nc.stdout.strip(), "ping") + ksft_eq(nc.stdout.strip(), test_string) + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: wait_port_listen(port, host=cfg.remote) - cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) - ksft_eq(nc.stdout.strip(), "ping") - + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) + ksft_eq(nc.stdout.strip(), test_string) + +def _set_offload_checksum(cfg, netnl, on) -> None: + try: + ethtool(f" -K {cfg.ifname} rx {on} tx {on} ") + except: + return + +def _set_xdp_generic_sb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_generic_mb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_sb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) + xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if xdp_info['xdp']['mode'] != 1: + """ + If the interface doesn't support native-mode, it falls back to generic mode. + The mode value 1 is native and 2 is generic. + So it raises an exception if mode is not 1(native mode). + """ + raise KsftSkipEx('device does not support native-XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_mb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + try: + cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) + except Exception as e: + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + raise KsftSkipEx('device does not support native-multi-buffer XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_offload_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + try: + cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True) + except Exception as e: + raise KsftSkipEx('device does not support offloaded XDP') + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + + if no_sleep != True: + time.sleep(10) + +def get_interface_info(cfg) -> None: + global remote_ifname + global no_sleep + + remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_v4} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout + remote_ifname = remote_info.rstrip('\n') + if remote_ifname == "": + raise KsftFailEx('Can not get remote interface') + local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": + no_sleep=True + if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth": + no_sleep=True + +def set_interface_init(cfg) -> None: + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + +def test_default(cfg, netnl) -> None: + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_generic_sb(cfg, netnl) -> None: + _set_xdp_generic_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpgeneric off" % cfg.ifname) + +def test_xdp_generic_mb(cfg, netnl) -> None: + _set_xdp_generic_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpgeneric off" % cfg.ifname) + +def test_xdp_native_sb(cfg, netnl) -> None: + _set_xdp_native_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdp off" % cfg.ifname) + +def test_xdp_native_mb(cfg, netnl) -> None: + _set_xdp_native_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdp off" % cfg.ifname) + +def test_xdp_offload(cfg, netnl) -> None: + _set_xdp_offload_on(cfg) + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpoffload off" % cfg.ifname) def main() -> None: with NetDrvEpEnv(__file__) as cfg: - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + get_interface_info(cfg) + set_interface_init(cfg) + ksft_run([test_default, + test_xdp_generic_sb, + test_xdp_generic_mb, + test_xdp_native_sb, + test_xdp_native_mb, + test_xdp_offload], + args=(cfg, EthtoolFamily())) + set_interface_init(cfg) ksft_exit() diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c index d988b2e0cee84..e73fab3edd9f7 100644 --- a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c @@ -10,4 +10,10 @@ int xdp_dummy_prog(struct xdp_md *ctx) return XDP_PASS; } +SEC("xdp.frags") +int xdp_dummy_prog_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; From 77b2ab31fc65c595ca0a339f6c5b8ef3adfae5c6 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 7 Mar 2025 15:47:31 +0000 Subject: [PATCH 425/503] MAINTAINERS: sfc: remove Martin Habets Martin has left AMD and no longer works on the sfc driver. Signed-off-by: Edward Cree Link: https://patch.msgid.link/20250307154731.211368-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ca11a553d4121..07b2d3c7ae5d7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21502,7 +21502,6 @@ F: include/linux/slimbus.h SFC NETWORK DRIVER M: Edward Cree -M: Martin Habets L: netdev@vger.kernel.org L: linux-net-drivers@amd.com S: Maintained From 12d8f318347b1d4feac48e8ac351d3786af39599 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 7 Mar 2025 20:31:52 +0200 Subject: [PATCH 426/503] drm/dp_mst: Fix locking when skipping CSN before topology probing The handling of the MST Connection Status Notify message is skipped if the probing of the topology is still pending. Acquiring the drm_dp_mst_topology_mgr::probe_lock for this in drm_dp_mst_handle_up_req() is problematic: the task/work this function is called from is also responsible for handling MST down-request replies (in drm_dp_mst_handle_down_rep()). Thus drm_dp_mst_link_probe_work() - holding already probe_lock - could be blocked waiting for an MST down-request reply while drm_dp_mst_handle_up_req() is waiting for probe_lock while processing a CSN message. This leads to the probe work's down-request message timing out. A scenario similar to the above leading to a down-request timeout is handling a CSN message in drm_dp_mst_handle_conn_stat(), holding the probe_lock and sending down-request messages while a second CSN message sent by the sink subsequently is handled by drm_dp_mst_handle_up_req(). Fix the above by moving the logic to skip the CSN handling to drm_dp_mst_process_up_req(). This function is called from a work (separate from the task/work handling new up/down messages), already holding probe_lock. This solves the above timeout issue, since handling of down-request replies won't be blocked by probe_lock. Fixes: ddf983488c3e ("drm/dp_mst: Skip CSN if topology probing is not done yet") Cc: Wayne Lin Cc: Lyude Paul Cc: stable@vger.kernel.org # v6.6+ Reviewed-by: Wayne Lin Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20250307183152.3822170-1-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 06c91c5b7f7c8..6d09bef671da0 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -4025,6 +4025,22 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) return 0; } +static bool primary_mstb_probing_is_done(struct drm_dp_mst_topology_mgr *mgr) +{ + bool probing_done = false; + + mutex_lock(&mgr->lock); + + if (mgr->mst_primary && drm_dp_mst_topology_try_get_mstb(mgr->mst_primary)) { + probing_done = mgr->mst_primary->link_address_sent; + drm_dp_mst_topology_put_mstb(mgr->mst_primary); + } + + mutex_unlock(&mgr->lock); + + return probing_done; +} + static inline bool drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_pending_up_req *up_req) @@ -4055,8 +4071,12 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, /* TODO: Add missing handler for DP_RESOURCE_STATUS_NOTIFY events */ if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) { - dowork = drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat); - hotplug = true; + if (!primary_mstb_probing_is_done(mgr)) { + drm_dbg_kms(mgr->dev, "Got CSN before finish topology probing. Skip it.\n"); + } else { + dowork = drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat); + hotplug = true; + } } drm_dp_mst_topology_put_mstb(mstb); @@ -4138,10 +4158,11 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) drm_dp_send_up_ack_reply(mgr, mst_primary, up_req->msg.req_type, false); + drm_dp_mst_topology_put_mstb(mst_primary); + if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { const struct drm_dp_connection_status_notify *conn_stat = &up_req->msg.u.conn_stat; - bool handle_csn; drm_dbg_kms(mgr->dev, "Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", conn_stat->port_number, @@ -4150,16 +4171,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) conn_stat->message_capability_status, conn_stat->input_port, conn_stat->peer_device_type); - - mutex_lock(&mgr->probe_lock); - handle_csn = mst_primary->link_address_sent; - mutex_unlock(&mgr->probe_lock); - - if (!handle_csn) { - drm_dbg_kms(mgr->dev, "Got CSN before finish topology probing. Skip it."); - kfree(up_req); - goto out_put_primary; - } } else if (up_req->msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { const struct drm_dp_resource_status_notify *res_stat = &up_req->msg.u.resource_stat; @@ -4174,9 +4185,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) list_add_tail(&up_req->next, &mgr->up_req_list); mutex_unlock(&mgr->up_req_lock); queue_work(system_long_wq, &mgr->up_req_work); - -out_put_primary: - drm_dp_mst_topology_put_mstb(mst_primary); out_clear_reply: reset_msg_rx_state(&mgr->up_req_recv); return ret; From 62531a1effa87bdab12d5104015af72e60d926ff Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 5 Mar 2025 14:15:09 +0200 Subject: [PATCH 427/503] net: switchdev: Convert blocking notification chain to a raw one A blocking notification chain uses a read-write semaphore to protect the integrity of the chain. The semaphore is acquired for writing when adding / removing notifiers to / from the chain and acquired for reading when traversing the chain and informing notifiers about an event. In case of the blocking switchdev notification chain, recursive notifications are possible which leads to the semaphore being acquired twice for reading and to lockdep warnings being generated [1]. Specifically, this can happen when the bridge driver processes a SWITCHDEV_BRPORT_UNOFFLOADED event which causes it to emit notifications about deferred events when calling switchdev_deferred_process(). Fix this by converting the notification chain to a raw notification chain in a similar fashion to the netdev notification chain. Protect the chain using the RTNL mutex by acquiring it when modifying the chain. Events are always informed under the RTNL mutex, but add an assertion in call_switchdev_blocking_notifiers() to make sure this is not violated in the future. Maintain the "blocking" prefix as events are always emitted from process context and listeners are allowed to block. [1]: WARNING: possible recursive locking detected 6.14.0-rc4-custom-g079270089484 #1 Not tainted -------------------------------------------- ip/52731 is trying to acquire lock: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 but task is already holding lock: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock((switchdev_blocking_notif_chain).rwsem); lock((switchdev_blocking_notif_chain).rwsem); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ip/52731: #0: ffffffff84f795b0 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x727/0x1dc0 #1: ffffffff8731f628 (&net->rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x790/0x1dc0 #2: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 stack backtrace: ... ? __pfx_down_read+0x10/0x10 ? __pfx_mark_lock+0x10/0x10 ? __pfx_switchdev_port_attr_set_deferred+0x10/0x10 blocking_notifier_call_chain+0x58/0xa0 switchdev_port_attr_notify.constprop.0+0xb3/0x1b0 ? __pfx_switchdev_port_attr_notify.constprop.0+0x10/0x10 ? mark_held_locks+0x94/0xe0 ? switchdev_deferred_process+0x11a/0x340 switchdev_port_attr_set_deferred+0x27/0xd0 switchdev_deferred_process+0x164/0x340 br_switchdev_port_unoffload+0xc8/0x100 [bridge] br_switchdev_blocking_event+0x29f/0x580 [bridge] notifier_call_chain+0xa2/0x440 blocking_notifier_call_chain+0x6e/0xa0 switchdev_bridge_port_unoffload+0xde/0x1a0 ... Fixes: f7a70d650b0b6 ("net: bridge: switchdev: Ensure deferred event delivery on unoffload") Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Reviewed-by: Simon Horman Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://patch.msgid.link/20250305121509.631207-1-amcohen@nvidia.com Signed-off-by: Paolo Abeni --- net/switchdev/switchdev.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 6488ead9e4645..4d5fbacef496f 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -472,7 +472,7 @@ bool switchdev_port_obj_act_is_deferred(struct net_device *dev, EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); -static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); +static RAW_NOTIFIER_HEAD(switchdev_blocking_notif_chain); /** * register_switchdev_notifier - Register notifier @@ -518,17 +518,27 @@ EXPORT_SYMBOL_GPL(call_switchdev_notifiers); int register_switchdev_blocking_notifier(struct notifier_block *nb) { - struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; + struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; + int err; + + rtnl_lock(); + err = raw_notifier_chain_register(chain, nb); + rtnl_unlock(); - return blocking_notifier_chain_register(chain, nb); + return err; } EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier); int unregister_switchdev_blocking_notifier(struct notifier_block *nb) { - struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; + struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; + int err; - return blocking_notifier_chain_unregister(chain, nb); + rtnl_lock(); + err = raw_notifier_chain_unregister(chain, nb); + rtnl_unlock(); + + return err; } EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier); @@ -536,10 +546,11 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info, struct netlink_ext_ack *extack) { + ASSERT_RTNL(); info->dev = dev; info->extack = extack; - return blocking_notifier_call_chain(&switchdev_blocking_notif_chain, - val, info); + return raw_notifier_call_chain(&switchdev_blocking_notif_chain, + val, info); } EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); From d4234d131b0a3f9e65973f1cdc71bb3560f5d14b Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Tue, 4 Mar 2025 15:27:00 +0800 Subject: [PATCH 428/503] arm64: mm: Populate vmemmap at the page level if not section aligned On the arm64 platform with 4K base page config, SECTION_SIZE_BITS is set to 27, making one section 128M. The related page struct which vmemmap points to is 2M then. Commit c1cc1552616d ("arm64: MMU initialisation") optimizes the vmemmap to populate at the PMD section level which was suitable initially since hot plug granule is always one section(128M). However, commit ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug") introduced a 2M(SUBSECTION_SIZE) hot plug granule, which disrupted the existing arm64 assumptions. The first problem is that if start or end is not aligned to a section boundary, such as when a subsection is hot added, populating the entire section is wasteful. The next problem is if we hotplug something that spans part of 128 MiB section (subsections, let's call it memblock1), and then hotplug something that spans another part of a 128 MiB section(subsections, let's call it memblock2), and subsequently unplug memblock1, vmemmap_free() will clear the entire PMD entry which also supports memblock2 even though memblock2 is still active. Assuming hotplug/unplug sizes are guaranteed to be symmetric. Do the fix similar to x86-64: populate to pages levels if start/end is not aligned with section boundary. Cc: stable@vger.kernel.org # v5.4+ Fixes: ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug") Acked-by: David Hildenbrand Signed-off-by: Zhenhua Huang Reviewed-by: Oscar Salvador Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20250304072700.3405036-1-quic_zhenhuah@quicinc.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b4df5bc5b1b8b..1dfe1a8efdbe4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1177,8 +1177,11 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); + /* [start, end] should be within one section */ + WARN_ON_ONCE(end - start > PAGES_PER_SECTION * sizeof(struct page)); - if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES)) + if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES) || + (end - start < PAGES_PER_SECTION * sizeof(struct page))) return vmemmap_populate_basepages(start, end, node, altmap); else return vmemmap_populate_hugepages(start, end, node, altmap); From f7edb07ad7c66eab3dce57384f33b9799d579133 Mon Sep 17 00:00:00 2001 From: Piotr Jaroszynski Date: Tue, 4 Mar 2025 00:51:27 -0800 Subject: [PATCH 429/503] Fix mmu notifiers for range-based invalidates Update the __flush_tlb_range_op macro not to modify its parameters as these are unexepcted semantics. In practice, this fixes the call to mmu_notifier_arch_invalidate_secondary_tlbs() in __flush_tlb_range_nosync() to use the correct range instead of an empty range with start=end. The empty range was (un)lucky as it results in taking the invalidate-all path that doesn't cause correctness issues, but can certainly result in suboptimal perf. This has been broken since commit 6bbd42e2df8f ("mmu_notifiers: call invalidate_range() when invalidating TLBs") when the call to the notifiers was added to __flush_tlb_range(). It predates the addition of the __flush_tlb_range_op() macro from commit 360839027a6e ("arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range") that made the bug hard to spot. Fixes: 6bbd42e2df8f ("mmu_notifiers: call invalidate_range() when invalidating TLBs") Signed-off-by: Piotr Jaroszynski Cc: Catalin Marinas Cc: Will Deacon Cc: Robin Murphy Cc: Alistair Popple Cc: Raghavendra Rao Ananta Cc: SeongJae Park Cc: Jason Gunthorpe Cc: John Hubbard Cc: Nicolin Chen Cc: linux-arm-kernel@lists.infradead.org Cc: iommu@lists.linux.dev Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Reviewed-by: Catalin Marinas Reviewed-by: Alistair Popple Link: https://lore.kernel.org/r/20250304085127.2238030-1-pjaroszynski@nvidia.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index bc94e036a26b9..8104aee4f9a08 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -396,33 +396,35 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) #define __flush_tlb_range_op(op, start, pages, stride, \ asid, tlb_level, tlbi_user, lpa2) \ do { \ + typeof(start) __flush_start = start; \ + typeof(pages) __flush_pages = pages; \ int num = 0; \ int scale = 3; \ int shift = lpa2 ? 16 : PAGE_SHIFT; \ unsigned long addr; \ \ - while (pages > 0) { \ + while (__flush_pages > 0) { \ if (!system_supports_tlb_range() || \ - pages == 1 || \ - (lpa2 && start != ALIGN(start, SZ_64K))) { \ - addr = __TLBI_VADDR(start, asid); \ + __flush_pages == 1 || \ + (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \ + addr = __TLBI_VADDR(__flush_start, asid); \ __tlbi_level(op, addr, tlb_level); \ if (tlbi_user) \ __tlbi_user_level(op, addr, tlb_level); \ - start += stride; \ - pages -= stride >> PAGE_SHIFT; \ + __flush_start += stride; \ + __flush_pages -= stride >> PAGE_SHIFT; \ continue; \ } \ \ - num = __TLBI_RANGE_NUM(pages, scale); \ + num = __TLBI_RANGE_NUM(__flush_pages, scale); \ if (num >= 0) { \ - addr = __TLBI_VADDR_RANGE(start >> shift, asid, \ + addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \ scale, num, tlb_level); \ __tlbi(r##op, addr); \ if (tlbi_user) \ __tlbi_user(r##op, addr); \ - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ - pages -= __TLBI_RANGE_PAGES(num, scale); \ + __flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ + __flush_pages -= __TLBI_RANGE_PAGES(num, scale);\ } \ scale--; \ } \ From 986a6f5eacb900ea0f6036ef724b26e76be40f65 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Mar 2025 15:25:31 -0700 Subject: [PATCH 430/503] vboxsf: Add __nonstring annotations for unterminated strings When a character array without a terminating NUL character has a static initializer, GCC 15's -Wunterminated-string-initialization will only warn if the array lacks the "nonstring" attribute[1]. Mark the arrays with __nonstring to and correctly identify the char array as "not a C string" and thereby eliminate the warning. This effectively reverts the change in 4e7487245abc ("vboxsf: fix building with GCC 15"), to add the annotation that has other uses (i.e. warning if the string is ever used with C string APIs). Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117178 [1] Cc: Hans de Goede Cc: Brahmajit Das Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20250310222530.work.374-kees@kernel.org Reviewed-by: Hans de Goede Signed-off-by: Christian Brauner --- fs/vboxsf/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 1d94bb7841081..0bc96ab6580b3 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -21,8 +21,7 @@ #define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ -static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376', - '\375' }; +static const unsigned char VBSF_MOUNT_SIGNATURE[4] __nonstring = "\000\377\376\375"; static int follow_symlinks; module_param(follow_symlinks, int, 0444); From f5d83cf0eeb90fade4d5c4d17d24b8bee9ceeecc Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 6 Mar 2025 10:32:45 +0800 Subject: [PATCH 431/503] net: mctp: unshare packets when reassembling Ensure that the frag_list used for reassembly isn't shared with other packets. This avoids incorrect reassembly when packets are cloned, and prevents a memory leak due to circular references between fragments and their skb_shared_info. The upcoming MCTP-over-USB driver uses skb_clone which can trigger the problem - other MCTP drivers don't share SKBs. A kunit test is added to reproduce the issue. Signed-off-by: Matt Johnston Fixes: 4a992bbd3650 ("mctp: Implement message fragmentation & reassembly") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306-matt-mctp-usb-v1-1-085502b3dd28@codeconstruct.com.au Signed-off-by: Paolo Abeni --- net/mctp/route.c | 10 +++- net/mctp/test/route-test.c | 109 +++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 2 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 3f2bd65ff5e3c..4c460160914f0 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -332,8 +332,14 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) & MCTP_HDR_SEQ_MASK; if (!key->reasm_head) { - key->reasm_head = skb; - key->reasm_tailp = &(skb_shinfo(skb)->frag_list); + /* Since we're manipulating the shared frag_list, ensure it isn't + * shared with any other SKBs. + */ + key->reasm_head = skb_unshare(skb, GFP_ATOMIC); + if (!key->reasm_head) + return -ENOMEM; + + key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list); key->last_seq = this_seq; return 0; } diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 17165b86ce22d..06c1897b685a8 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -921,6 +921,114 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test) __mctp_route_test_fini(test, dev, rt, sock); } +/* Input route to socket, using a fragmented message created from clones. + */ +static void mctp_test_route_input_cloned_frag(struct kunit *test) +{ + /* 5 packet fragments, forming 2 complete messages */ + const struct mctp_hdr hdrs[5] = { + RX_FRAG(FL_S, 0), + RX_FRAG(0, 1), + RX_FRAG(FL_E, 2), + RX_FRAG(FL_S, 0), + RX_FRAG(FL_E, 1), + }; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct sk_buff *skb[5]; + struct sk_buff *rx_skb; + struct socket *sock; + size_t data_len; + u8 compare[100]; + u8 flat[100]; + size_t total; + void *p; + int rc; + + /* Arbitrary length */ + data_len = 3; + total = data_len + sizeof(struct mctp_hdr); + + __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + + /* Create a single skb initially with concatenated packets */ + skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total); + mctp_test_skb_set_dev(skb[0], dev); + memset(skb[0]->data, 0 * 0x11, skb[0]->len); + memcpy(skb[0]->data, &hdrs[0], sizeof(struct mctp_hdr)); + + /* Extract and populate packets */ + for (int i = 1; i < 5; i++) { + skb[i] = skb_clone(skb[i - 1], GFP_ATOMIC); + KUNIT_ASSERT_TRUE(test, skb[i]); + p = skb_pull(skb[i], total); + KUNIT_ASSERT_TRUE(test, p); + skb_reset_network_header(skb[i]); + memcpy(skb[i]->data, &hdrs[i], sizeof(struct mctp_hdr)); + memset(&skb[i]->data[sizeof(struct mctp_hdr)], i * 0x11, data_len); + } + for (int i = 0; i < 5; i++) + skb_trim(skb[i], total); + + /* SOM packets have a type byte to match the socket */ + skb[0]->data[4] = 0; + skb[3]->data[4] = 0; + + skb_dump("pkt1 ", skb[0], false); + skb_dump("pkt2 ", skb[1], false); + skb_dump("pkt3 ", skb[2], false); + skb_dump("pkt4 ", skb[3], false); + skb_dump("pkt5 ", skb[4], false); + + for (int i = 0; i < 5; i++) { + KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1); + /* Take a reference so we can check refcounts at the end */ + skb_get(skb[i]); + } + + /* Feed the fragments into MCTP core */ + for (int i = 0; i < 5; i++) { + rc = mctp_route_input(&rt->rt, skb[i]); + KUNIT_EXPECT_EQ(test, rc, 0); + } + + /* Receive first reassembled message */ + rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_EQ(test, rx_skb->len, 3 * data_len); + rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len); + for (int i = 0; i < rx_skb->len; i++) + compare[i] = (i / data_len) * 0x11; + /* Set type byte */ + compare[0] = 0; + + KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len); + KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1); + kfree_skb(rx_skb); + + /* Receive second reassembled message */ + rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_EQ(test, rx_skb->len, 2 * data_len); + rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len); + for (int i = 0; i < rx_skb->len; i++) + compare[i] = (i / data_len + 3) * 0x11; + /* Set type byte */ + compare[0] = 0; + + KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len); + KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1); + kfree_skb(rx_skb); + + /* Check input skb refcounts */ + for (int i = 0; i < 5; i++) { + KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1); + kfree_skb(skb[i]); + } + + __mctp_route_test_fini(test, dev, rt, sock); +} + #if IS_ENABLED(CONFIG_MCTP_FLOWS) static void mctp_test_flow_init(struct kunit *test, @@ -1144,6 +1252,7 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE(mctp_test_packet_flow), KUNIT_CASE(mctp_test_fragment_flow), KUNIT_CASE(mctp_test_route_output_key_create), + KUNIT_CASE(mctp_test_route_input_cloned_frag), {} }; From 0c5e145a350de3b38cd5ae77a401b12c46fb7c1d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 6 Mar 2025 02:39:22 +0000 Subject: [PATCH 432/503] bonding: fix incorrect MAC address setting to receive NS messages When validation on the backup slave is enabled, we need to validate the Neighbor Solicitation (NS) messages received on the backup slave. To receive these messages, the correct destination MAC address must be added to the slave. However, the target in bonding is a unicast address, which we cannot use directly. Instead, we should first convert it to a Solicited-Node Multicast Address and then derive the corresponding MAC address. Fix the incorrect MAC address setting on both slave_set_ns_maddr() and slave_set_ns_maddrs(). Since the two function names are similar. Add some description for the functions. Also only use one mac_addr variable in slave_set_ns_maddr() to save some code and logic. Fixes: 8eb36164d1a6 ("bonding: add ns target multicast address to slave device") Acked-by: Jay Vosburgh Reviewed-by: Nikolay Aleksandrov Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306023923.38777-2-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_options.c | 55 +++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 327b6ecdc77e0..d1b095af253bd 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1242,10 +1242,28 @@ static bool slave_can_set_ns_maddr(const struct bonding *bond, struct slave *sla slave->dev->flags & IFF_MULTICAST; } +/** + * slave_set_ns_maddrs - add/del all NS mac addresses for slave + * @bond: bond device + * @slave: slave device + * @add: add or remove all the NS mac addresses + * + * This function tries to add or delete all the NS mac addresses on the slave + * + * Note, the IPv6 NS target address is the unicast address in Neighbor + * Solicitation (NS) message. The dest address of NS message should be + * solicited-node multicast address of the target. The dest mac of NS message + * is converted from the solicited-node multicast address. + * + * This function is called when + * * arp_validate changes + * * enslaving, releasing new slaves + */ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add) { struct in6_addr *targets = bond->params.ns_targets; char slot_maddr[MAX_ADDR_LEN]; + struct in6_addr mcaddr; int i; if (!slave_can_set_ns_maddr(bond, slave)) @@ -1255,7 +1273,8 @@ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool if (ipv6_addr_any(&targets[i])) break; - if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) { + addrconf_addr_solict_mult(&targets[i], &mcaddr); + if (!ndisc_mc_map(&mcaddr, slot_maddr, slave->dev, 0)) { if (add) dev_mc_add(slave->dev, slot_maddr); else @@ -1278,23 +1297,43 @@ void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave) slave_set_ns_maddrs(bond, slave, false); } +/** + * slave_set_ns_maddr - set new NS mac address for slave + * @bond: bond device + * @slave: slave device + * @target: the new IPv6 target + * @slot: the old IPv6 target in the slot + * + * This function tries to replace the old mac address to new one on the slave. + * + * Note, the target/slot IPv6 address is the unicast address in Neighbor + * Solicitation (NS) message. The dest address of NS message should be + * solicited-node multicast address of the target. The dest mac of NS message + * is converted from the solicited-node multicast address. + * + * This function is called when + * * An IPv6 NS target is added or removed. + */ static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave, struct in6_addr *target, struct in6_addr *slot) { - char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN]; + char mac_addr[MAX_ADDR_LEN]; + struct in6_addr mcast_addr; if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave)) return; - /* remove the previous maddr from slave */ + /* remove the previous mac addr from slave */ + addrconf_addr_solict_mult(slot, &mcast_addr); if (!ipv6_addr_any(slot) && - !ndisc_mc_map(slot, slot_maddr, slave->dev, 0)) - dev_mc_del(slave->dev, slot_maddr); + !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0)) + dev_mc_del(slave->dev, mac_addr); - /* add new maddr on slave if target is set */ + /* add new mac addr on slave if target is set */ + addrconf_addr_solict_mult(target, &mcast_addr); if (!ipv6_addr_any(target) && - !ndisc_mc_map(target, target_maddr, slave->dev, 0)) - dev_mc_add(slave->dev, target_maddr); + !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0)) + dev_mc_add(slave->dev, mac_addr); } static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot, From 9318dc2357b6b8b2ea1200ab7f2d5877851b7382 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 6 Mar 2025 02:39:23 +0000 Subject: [PATCH 433/503] selftests: bonding: fix incorrect mac address The correct mac address for NS target 2001:db8::254 is 33:33:ff:00:02:54, not 33:33:00:00:02:54. The same with client maddress. Fixes: 86fb6173d11e ("selftests: bonding: add ns multicast group testing") Acked-by: Jay Vosburgh Reviewed-by: Nikolay Aleksandrov Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306023923.38777-3-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/bonding/bond_options.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index edc56e2cc6069..7bc148889ca72 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -11,8 +11,8 @@ ALL_TESTS=" lib_dir=$(dirname "$0") source ${lib_dir}/bond_topo_3d1c.sh -c_maddr="33:33:00:00:00:10" -g_maddr="33:33:00:00:02:54" +c_maddr="33:33:ff:00:00:10" +g_maddr="33:33:ff:00:02:54" skip_prio() { From 415f135ace7fd824cde083184a922e39156055b5 Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Thu, 6 Mar 2025 15:05:10 +0800 Subject: [PATCH 434/503] rtase: Fix improper release of ring list entries in rtase_sw_reset Since rtase_init_ring, which is called within rtase_sw_reset, adds ring entries already present in the ring list back into the list, it causes the ring list to form a cycle. This results in list_for_each_entry_safe failing to find an endpoint during traversal, leading to an error. Therefore, it is necessary to remove the previously added ring_list nodes before calling rtase_init_ring. Fixes: 079600489960 ("rtase: Implement net_device_ops") Signed-off-by: Justin Lai Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306070510.18129-1-justinlai0215@realtek.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 3bd11cb56294c..2aacc1996796d 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1501,7 +1501,10 @@ static void rtase_wait_for_quiescence(const struct net_device *dev) static void rtase_sw_reset(struct net_device *dev) { struct rtase_private *tp = netdev_priv(dev); + struct rtase_ring *ring, *tmp; + struct rtase_int_vector *ivec; int ret; + u32 i; netif_stop_queue(dev); netif_carrier_off(dev); @@ -1512,6 +1515,13 @@ static void rtase_sw_reset(struct net_device *dev) rtase_tx_clear(tp); rtase_rx_clear(tp); + for (i = 0; i < tp->int_nums; i++) { + ivec = &tp->int_vector[i]; + list_for_each_entry_safe(ring, tmp, &ivec->ring_list, + ring_entry) + list_del(&ring->ring_entry); + } + ret = rtase_init_ring(dev); if (ret) { netdev_err(dev, "unable to init ring\n"); From 3a04334d6282d08fbdd6201e374db17d31927ba3 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Sat, 8 Mar 2025 00:58:27 +0800 Subject: [PATCH 435/503] bcachefs: Fix b->written overflow When bset past end of btree node, we should not add sectors to b->written, which will overflow b->written. Reported-by: syzbot+3cb3d9e8c3f197754825@syzkaller.appspotmail.com Tested-by: syzbot+3cb3d9e8c3f197754825@syzkaller.appspotmail.com Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index dece27d9db04e..756736f9243d7 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1186,7 +1186,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, le64_to_cpu(i->journal_seq), b->written, b->written + sectors, ptr_written); - b->written += sectors; + b->written = min(b->written + sectors, btree_sectors(c)); if (blacklisted && !first) continue; From e5c2bcc0cd47321d78bb4e865d7857304139f95d Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 11 Mar 2025 19:43:58 +0900 Subject: [PATCH 436/503] nvme: move error logging from nvme_end_req() to __nvme_end_req() Before the Commit 1f47ed294a2b ("block: cleanup and fix batch completion adding conditions"), blk_mq_add_to_batch() did not add failed passthrough requests to batch, and returned false. After the commit, blk_mq_add_to_batch() always adds passthrough requests to batch regardless of whether the request failed or not, and returns true. This affected error logging feature in the NVME driver. Before the commit, the call chain of failed passthrough request was as follows: nvme_handle_cqe() blk_mq_add_to_batch() .. false is returned, then call nvme_pci_complete_rq() nvme_pci_complete_rq() nvme_complete_rq() nvme_end_req() nvme_log_err_passthru() .. error logging __nvme_end_req() .. end of the rqeuest After the commit, the call chain is as follows: nvme_handle_cqe() blk_mq_add_to_batch() .. true is returned, then set nvme_pci_complete_batch() .. nvme_pci_complete_batch() nvme_complete_batch() nvme_complete_batch_req() __nvme_end_req() .. end of the request, without error logging To make the error logging feature work again for passthrough requests, move the nvme_log_err_passthru() call from nvme_end_req() to __nvme_end_req(). While at it, move nvme_log_error() call for non-passthrough requests together with nvme_log_err_passthru(). Even though the trigger commit does not affect non-passthrough requests, move it together for code simplicity. Fixes: 1f47ed294a2b ("block: cleanup and fix batch completion adding conditions") Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250311104359.1767728-2-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f028913e2e622..8359d0aa0e44b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -431,6 +431,12 @@ static inline void nvme_end_req_zoned(struct request *req) static inline void __nvme_end_req(struct request *req) { + if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) { + if (blk_rq_is_passthrough(req)) + nvme_log_err_passthru(req); + else + nvme_log_error(req); + } nvme_end_req_zoned(req); nvme_trace_bio_complete(req); if (req->cmd_flags & REQ_NVME_MPATH) @@ -441,12 +447,6 @@ void nvme_end_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) { - if (blk_rq_is_passthrough(req)) - nvme_log_err_passthru(req); - else - nvme_log_error(req); - } __nvme_end_req(req); blk_mq_end_request(req, status); } From ed92bc5264c4357d4fca292c769ea9967cd3d3b6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 10 Mar 2025 18:45:36 +0100 Subject: [PATCH 437/503] ASoC: codecs: wm0010: Fix error handling path in wm0010_spi_probe() Free some resources in the error handling path of the probe, as already done in the remove function. Fixes: e3523e01869d ("ASoC: wm0010: Add initial wm0010 DSP driver") Fixes: fd8b96574456 ("ASoC: wm0010: Clear IRQ as wake source and include missing header") Signed-off-by: Christophe JAILLET Reviewed-by: Charles Keepax Link: https://patch.msgid.link/5139ba1ab8c4c157ce04e56096a0f54a1683195c.1741549792.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- sound/soc/codecs/wm0010.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c index edd2cb185c42c..9e67fbfc2ccaf 100644 --- a/sound/soc/codecs/wm0010.c +++ b/sound/soc/codecs/wm0010.c @@ -920,7 +920,7 @@ static int wm0010_spi_probe(struct spi_device *spi) if (ret) { dev_err(wm0010->dev, "Failed to set IRQ %d as wake source: %d\n", irq, ret); - return ret; + goto free_irq; } if (spi->max_speed_hz) @@ -932,9 +932,18 @@ static int wm0010_spi_probe(struct spi_device *spi) &soc_component_dev_wm0010, wm0010_dai, ARRAY_SIZE(wm0010_dai)); if (ret < 0) - return ret; + goto disable_irq_wake; return 0; + +disable_irq_wake: + irq_set_irq_wake(wm0010->irq, 0); + +free_irq: + if (wm0010->irq) + free_irq(wm0010->irq, wm0010); + + return ret; } static void wm0010_spi_remove(struct spi_device *spi) From d2b9d97e89c79c95f8b517e4fa43fd100f936acc Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Fri, 7 Mar 2025 17:49:52 +0800 Subject: [PATCH 438/503] qlcnic: fix memory leak issues in qlcnic_sriov_common.c Add qlcnic_sriov_free_vlans() in qlcnic_sriov_alloc_vlans() if any sriov_vlans fails to be allocated. Add qlcnic_sriov_free_vlans() to free the memory allocated by qlcnic_sriov_alloc_vlans() if "sriov->allowed_vlans" fails to be allocated. Fixes: 91b7282b613d ("qlcnic: Support VLAN id config.") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Link: https://patch.msgid.link/20250307094952.14874-1-haoxiang_li2024@163.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index f9dd50152b1e3..28d24d59efb84 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -454,8 +454,10 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, num_vlans = sriov->num_allowed_vlans; sriov->allowed_vlans = kcalloc(num_vlans, sizeof(u16), GFP_KERNEL); - if (!sriov->allowed_vlans) + if (!sriov->allowed_vlans) { + qlcnic_sriov_free_vlans(adapter); return -ENOMEM; + } vlans = (u16 *)&cmd->rsp.arg[3]; for (i = 0; i < num_vlans; i++) @@ -2167,8 +2169,10 @@ int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) vf = &sriov->vf_info[i]; vf->sriov_vlans = kcalloc(sriov->num_allowed_vlans, sizeof(*vf->sriov_vlans), GFP_KERNEL); - if (!vf->sriov_vlans) + if (!vf->sriov_vlans) { + qlcnic_sriov_free_vlans(adapter); return -ENOMEM; + } } return 0; From 58517f4df8424ec28dfe7290ccc61908eda57aae Mon Sep 17 00:00:00 2001 From: Roxana Nicolescu Date: Tue, 11 Mar 2025 15:06:10 +0000 Subject: [PATCH 439/503] bcachefs: Initialize from_inode members for bch_io_opts When there is no inode source, all "from_inode" members in the structure bhc_io_opts should be set false. Fixes: 7a7c43a0c1ecf ("bcachefs: Add bch_io_opts fields for indicating whether the opts came from the inode") Reported-by: syzbot+c17ad4b4367b72a853cb@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c17ad4b4367b72a853cb Signed-off-by: Roxana Nicolescu Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 04ec05206f8cf..339b80770f1dd 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1198,6 +1198,7 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, opts->_name##_from_inode = true; \ } else { \ opts->_name = c->opts._name; \ + opts->_name##_from_inode = false; \ } BCH_INODE_OPTS() #undef x From dbac8feb23382af1efa2e1a86049e079b6e42e12 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 11 Mar 2025 10:39:36 -0400 Subject: [PATCH 440/503] bcachefs: Make sure trans is unlocked when submitting read IO We were still using the trans after the unlock, leading to this bug in the retry path: 00255 ------------[ cut here ]------------ 00255 kernel BUG at fs/bcachefs/btree_iter.c:3348! 00255 Internal error: Oops - BUG: 00000000f2000800 [#1] SMP 00255 bcachefs (0ca38fe8-0a26-41f9-9b5d-6a27796c7803): /fiotest offset 86048768: no device to read from: 00255 u64s 8 type extent 4098:168192:U32_MAX len 128 ver 0: durability: 0 crc: c_size 128 size 128 offset 0 nonce 0 csum crc32c 0:8040a368 compress none ec: idx 83 block 1 ptr: 0:302:128 gen 0 00255 bcachefs (0ca38fe8-0a26-41f9-9b5d-6a27796c7803): /fiotest offset 85983232: no device to read from: 00255 u64s 8 type extent 4098:168064:U32_MAX len 128 ver 0: durability: 0 crc: c_size 128 size 128 offset 0 nonce 0 csum crc32c 0:43311336 compress none ec: idx 83 block 1 ptr: 0:302:0 gen 0 00255 Modules linked in: 00255 CPU: 5 UID: 0 PID: 304 Comm: kworker/u70:2 Not tainted 6.14.0-rc6-ktest-g526aae23d67d #16040 00255 Hardware name: linux,dummy-virt (DT) 00255 Workqueue: events_unbound bch2_rbio_retry 00255 pstate: 60001005 (nZCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00255 pc : __bch2_trans_get+0x100/0x378 00255 lr : __bch2_trans_get+0xa0/0x378 00255 sp : ffffff80c865b760 00255 x29: ffffff80c865b760 x28: 0000000000000000 x27: ffffff80d76ed880 00255 x26: 0000000000000018 x25: 0000000000000000 x24: ffffff80f4ec3760 00255 x23: ffffff80f4010140 x22: 0000000000000056 x21: ffffff80f4ec0000 00255 x20: ffffff80f4ec3788 x19: ffffff80d75f8000 x18: 00000000ffffffff 00255 x17: 2065707974203820 x16: 7334367520200a3a x15: 0000000000000008 00255 x14: 0000000000000001 x13: 0000000000000100 x12: 0000000000000006 00255 x11: ffffffc080b47a40 x10: 0000000000000000 x9 : ffffffc08038dea8 00255 x8 : ffffff80d75fc018 x7 : 0000000000000000 x6 : 0000000000003788 00255 x5 : 0000000000003760 x4 : ffffff80c922de80 x3 : ffffff80f18f0000 00255 x2 : ffffff80c922de80 x1 : 0000000000000130 x0 : 0000000000000006 00255 Call trace: 00255 __bch2_trans_get+0x100/0x378 (P) 00255 bch2_read_io_err+0x98/0x260 00255 bch2_read_endio+0xb8/0x2d0 00255 __bch2_read_extent+0xce8/0xfe0 00255 __bch2_read+0x2a8/0x978 00255 bch2_rbio_retry+0x188/0x318 00255 process_one_work+0x154/0x390 00255 worker_thread+0x20c/0x3b8 00255 kthread+0xf0/0x1b0 00255 ret_from_fork+0x10/0x20 00255 Code: 6b01001f 54ffff01 79408460 3617fec0 (d4210000) 00255 ---[ end trace 0000000000000000 ]--- 00255 Kernel panic - not syncing: Oops - BUG: Fatal exception 00255 SMP: stopping secondary CPUs 00255 Kernel Offset: disabled 00255 CPU features: 0x000,00000070,00000010,8240500b 00255 Memory Limit: none 00255 ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]--- Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 8c7b2d3d779df..726da68073e2b 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -951,12 +951,6 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, goto retry_pick; } - /* - * Unlock the iterator while the btree node's lock is still in - * cache, before doing the IO: - */ - bch2_trans_unlock(trans); - if (flags & BCH_READ_NODECODE) { /* * can happen if we retry, and the extent we were going to read @@ -1113,6 +1107,15 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, trace_and_count(c, read_split, &orig->bio); } + /* + * Unlock the iterator while the btree node's lock is still in + * cache, before doing the IO: + */ + if (!(flags & BCH_READ_IN_RETRY)) + bch2_trans_unlock(trans); + else + bch2_trans_unlock_long(trans); + if (!rbio->pick.idx) { if (unlikely(!rbio->have_ioref)) { struct printbuf buf = PRINTBUF; @@ -1160,6 +1163,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, if (likely(!(flags & BCH_READ_IN_RETRY))) { return 0; } else { + bch2_trans_unlock(trans); + int ret; rbio->context = RBIO_CONTEXT_UNBOUND; From 5b1122fc4995f308b21d7cfc64ef9880ac834d20 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Mar 2025 22:48:29 +0300 Subject: [PATCH 441/503] platform/x86/amd/pmf: fix cleanup in amd_pmf_init_smart_pc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a few problems in this code: First, if amd_pmf_tee_init() fails then the function returns directly instead of cleaning up. We cannot simply do a "goto error;" because the amd_pmf_tee_init() cleanup calls tee_shm_free(dev->fw_shm_pool); and amd_pmf_tee_deinit() calls it as well leading to a double free. I have re-written this code to use an unwind ladder to free the allocations. Second, if amd_pmf_start_policy_engine() fails on every iteration though the loop then the code calls amd_pmf_tee_deinit() twice which is also a double free. Call amd_pmf_tee_deinit() inside the loop for each failed iteration. Also on that path the error codes are not necessarily negative kernel error codes. Set the error code to -EINVAL. There is a very subtle third bug which is that if the call to input_register_device() in amd_pmf_register_input_device() fails then we call input_unregister_device() on an input device that wasn't registered. This will lead to a reference counting underflow because of the device_del(&dev->dev) in __input_unregister_device(). It's unlikely that anyone would ever hit this bug in real life. Fixes: 376a8c2a1443 ("platform/x86/amd/pmf: Update PMF Driver for Compatibility with new PMF-TA") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/232231fc-6a71-495e-971b-be2a76f6db4c@stanley.mountain Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/tee-if.c | 36 +++++++++++++++++++-------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index ceaff1ebb7b93..a1e43873a07b0 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -510,18 +510,18 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) ret = amd_pmf_set_dram_addr(dev, true); if (ret) - goto error; + goto err_cancel_work; dev->policy_base = devm_ioremap_resource(dev->dev, dev->res); if (IS_ERR(dev->policy_base)) { ret = PTR_ERR(dev->policy_base); - goto error; + goto err_free_dram_buf; } dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL); if (!dev->policy_buf) { ret = -ENOMEM; - goto error; + goto err_free_dram_buf; } memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz); @@ -531,13 +531,13 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); if (!dev->prev_data) { ret = -ENOMEM; - goto error; + goto err_free_policy; } for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) { ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]); if (ret) - return ret; + goto err_free_prev_data; ret = amd_pmf_start_policy_engine(dev); switch (ret) { @@ -550,27 +550,41 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) status = false; break; default: - goto error; + ret = -EINVAL; + amd_pmf_tee_deinit(dev); + goto err_free_prev_data; } if (status) break; } - if (!status && !pb_side_load) - goto error; + if (!status && !pb_side_load) { + ret = -EINVAL; + goto err_free_prev_data; + } if (pb_side_load) amd_pmf_open_pb(dev, dev->dbgfs_dir); ret = amd_pmf_register_input_device(dev); if (ret) - goto error; + goto err_pmf_remove_pb; return 0; -error: - amd_pmf_deinit_smart_pc(dev); +err_pmf_remove_pb: + if (pb_side_load && dev->esbin) + amd_pmf_remove_pb(dev); + amd_pmf_tee_deinit(dev); +err_free_prev_data: + kfree(dev->prev_data); +err_free_policy: + kfree(dev->policy_buf); +err_free_dram_buf: + kfree(dev->buf); +err_cancel_work: + cancel_delayed_work_sync(&dev->pb_work); return ret; } From a05507cef0ee6a0af402c0d7e994115033ff746b Mon Sep 17 00:00:00 2001 From: Lukas Hetzenecker Date: Tue, 11 Mar 2025 00:28:03 +0100 Subject: [PATCH 442/503] platform/surface: aggregator_registry: Add Support for Surface Pro 11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SAM client device nodes for the Surface Pro 11 (Intel). Like with the Surface Pro 10 already, the node group is compatible, so it can be reused. Signed-off-by: Lukas Hetzenecker Link: https://lore.kernel.org/r/20250310232803.23691-1-lukas@hetzenecker.me Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_aggregator_registry.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index d4f32ad665305..a594d5fcfcfd1 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -371,7 +371,7 @@ static const struct software_node *ssam_node_group_sp8[] = { NULL, }; -/* Devices for Surface Pro 9 (Intel/x86) and 10 */ +/* Devices for Surface Pro 9, 10 and 11 (Intel/x86) */ static const struct software_node *ssam_node_group_sp9[] = { &ssam_node_root, &ssam_node_hub_kip, @@ -430,6 +430,9 @@ static const struct acpi_device_id ssam_platform_hub_acpi_match[] = { /* Surface Pro 10 */ { "MSHW0510", (unsigned long)ssam_node_group_sp9 }, + /* Surface Pro 11 */ + { "MSHW0583", (unsigned long)ssam_node_group_sp9 }, + /* Surface Book 2 */ { "MSHW0107", (unsigned long)ssam_node_group_gen5 }, From a8045e46c508b70fe4b30cc020fd0a2b0709b2e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 6 Mar 2025 13:08:27 -0800 Subject: [PATCH 443/503] drm/i915: Increase I915_PARAM_MMAP_GTT_VERSION version to indicate support for partial mmaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 255fc1703e42 ("drm/i915/gem: Calculate object page offset for partial memory mapping") was the last patch of several patches fixing multiple partial mmaps. But without a bump in I915_PARAM_MMAP_GTT_VERSION there is no clean way for UMD to know if it can do multiple partial mmaps. Fixes: 255fc1703e42 ("drm/i915/gem: Calculate object page offset for partial memory mapping") Cc: Andi Shyti Cc: Nirmoy Das Cc: Lionel Landwerlin Signed-off-by: José Roberto de Souza Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20250306210827.171147-1-jose.souza@intel.com (cherry picked from commit bfef148f3680e6b9d28e7fca46d9520f80c5e50e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 21274aa9bdddc..c3dabb8579605 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -164,6 +164,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) * 4 - Support multiple fault handlers per object depending on object's * backing storage (a.k.a. MMAP_OFFSET). * + * 5 - Support multiple partial mmaps(mmap part of BO + unmap a offset, multiple + * times with different size and offset). + * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine @@ -191,7 +194,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) */ int i915_gem_mmap_gtt_version(void) { - return 4; + return 5; } static inline struct i915_gtt_view From 5daa0c35a1f0e7a6c3b8ba9cb721e7d1ace6e619 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Wed, 8 Jan 2025 23:35:08 +0000 Subject: [PATCH 444/503] rust: Disallow BTF generation with Rust + LTO The kernel cannot currently self-parse BTF containing Rust debug information. pahole uses the language of the CU to determine whether to filter out debug information when generating the BTF. When LTO is enabled, Rust code can cross CU boundaries, resulting in Rust debug information in CUs labeled as C. This results in a system which cannot parse its own BTF. Signed-off-by: Matthew Maurer Cc: stable@vger.kernel.org Fixes: c1177979af9c ("btf, scripts: Exclude Rust CUs with pahole") Link: https://lore.kernel.org/r/20250108-rust-btf-lto-incompat-v1-1-60243ff6d820@google.com Signed-off-by: Miguel Ojeda --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index d0d021b3fa3b3..324c2886b2ea3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1973,7 +1973,7 @@ config RUST depends on !MODVERSIONS || GENDWARFKSYMS depends on !GCC_PLUGIN_RANDSTRUCT depends on !RANDSTRUCT - depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE + depends on !DEBUG_INFO_BTF || (PAHOLE_HAS_LANG_EXCLUDE && !LTO) depends on !CFI_CLANG || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC select CFI_ICALL_NORMALIZE_INTEGERS if CFI_CLANG depends on !CALL_PADDING || RUSTC_VERSION >= 108100 From 2e0f91aba507a3cb59f7a12fc3ea2b7d4d6675b7 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 10 Feb 2025 12:03:24 -0500 Subject: [PATCH 445/503] scripts: generate_rust_analyzer: add missing macros deps The macros crate has depended on std and proc_macro since its introduction in commit 1fbde52bde73 ("rust: add `macros` crate"). These dependencies were omitted from commit 8c4555ccc55c ("scripts: add `generate_rust_analyzer.py`") resulting in missing go-to-definition and autocomplete, and false-positive warnings emitted from rust-analyzer such as: [{ "resource": "/Users/tamird/src/linux/rust/macros/module.rs", "owner": "_generated_diagnostic_collection_name_#1", "code": { "value": "non_snake_case", "target": { "$mid": 1, "path": "/rustc/", "scheme": "https", "authority": "doc.rust-lang.org", "query": "search=non_snake_case" } }, "severity": 4, "message": "Variable `None` should have snake_case name, e.g. `none`", "source": "rust-analyzer", "startLineNumber": 123, "startColumn": 17, "endLineNumber": 123, "endColumn": 21 }] Add the missing dependencies to improve the developer experience. [ Fiona had a different approach (thanks!) at: https://lore.kernel.org/rust-for-linux/20241205115438.234221-1-me@kloenk.dev/ But Tamir and Fiona agreed to this one. - Miguel ] Fixes: 8c4555ccc55c ("scripts: add `generate_rust_analyzer.py`") Reviewed-by: Fiona Behrens Diagnosed-by: Chayim Refael Friedman Link: https://github.com/rust-lang/rust-analyzer/issues/17759#issuecomment-2646328275 Signed-off-by: Tamir Duberstein Tested-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250210-rust-analyzer-macros-core-dep-v3-1-45eb4836f218@gmail.com [ Removed `return`. Changed tag name. Added Link. Slightly reworded. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index aa8ea1a4dbe5f..b40679a90843b 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -57,14 +57,26 @@ def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=Tr crates_indexes[display_name] = len(crates) crates.append(crate) - # First, the ones in `rust/` since they are a bit special. - append_crate( - "core", - sysroot_src / "core" / "src" / "lib.rs", - [], - cfg=crates_cfgs.get("core", []), - is_workspace_member=False, - ) + def append_sysroot_crate( + display_name, + deps, + cfg=[], + ): + append_crate( + display_name, + sysroot_src / display_name / "src" / "lib.rs", + deps, + cfg, + is_workspace_member=False, + ) + + # NB: sysroot crates reexport items from one another so setting up our transitive dependencies + # here is important for ensuring that rust-analyzer can resolve symbols. The sources of truth + # for this dependency graph are `(sysroot_src / crate / "Cargo.toml" for crate in crates)`. + append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", [])) + append_sysroot_crate("alloc", ["core"]) + append_sysroot_crate("std", ["alloc", "core"]) + append_sysroot_crate("proc_macro", ["core", "std"]) append_crate( "compiler_builtins", @@ -75,7 +87,7 @@ def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=Tr append_crate( "macros", srctree / "rust" / "macros" / "lib.rs", - [], + ["std", "proc_macro"], is_proc_macro=True, ) From d1f928052439cad028438a8b8b34c1f01bc06068 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 10 Feb 2025 13:04:16 -0500 Subject: [PATCH 446/503] scripts: generate_rust_analyzer: add missing include_dirs Commit 8c4555ccc55c ("scripts: add `generate_rust_analyzer.py`") specified OBJTREE for the bindings crate, and `source.include_dirs` for the kernel crate, likely in an attempt to support out-of-source builds for those crates where the generated files reside in `objtree` rather than `srctree`. This was insufficient because both bits of configuration are required for each crate; the result is that rust-analyzer is unable to resolve generated files for either crate in an out-of-source build. [ Originally we were not using `OBJTREE` in the `kernel` crate, but we did pass the variable anyway, so conceptually it could have been there since then. Regarding `include_dirs`, it started in `kernel` before being in mainline because we included the bindings directly there (i.e. there was no `bindings` crate). However, when that crate got created, we moved the `OBJTREE` there but not the `include_dirs`. Nowadays, though, we happen to need the `include_dirs` also in the `kernel` crate for `generated_arch_static_branch_asm.rs` which was not there back then -- Tamir confirms it is indeed required for that reason. - Miguel ] Add the missing bits to improve the developer experience. Fixes: 8c4555ccc55c ("scripts: add `generate_rust_analyzer.py`") Signed-off-by: Tamir Duberstein Tested-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250210-rust-analyzer-bindings-include-v2-1-23dff845edc3@gmail.com [ Slightly reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index b40679a90843b..f2d6787e9c0c5 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -97,27 +97,27 @@ def append_sysroot_crate( ["core", "compiler_builtins"], ) - append_crate( - "bindings", - srctree / "rust"/ "bindings" / "lib.rs", - ["core"], - cfg=cfg, - ) - crates[-1]["env"]["OBJTREE"] = str(objtree.resolve(True)) + def append_crate_with_generated( + display_name, + deps, + ): + append_crate( + display_name, + srctree / "rust"/ display_name / "lib.rs", + deps, + cfg=cfg, + ) + crates[-1]["env"]["OBJTREE"] = str(objtree.resolve(True)) + crates[-1]["source"] = { + "include_dirs": [ + str(srctree / "rust" / display_name), + str(objtree / "rust") + ], + "exclude_dirs": [], + } - append_crate( - "kernel", - srctree / "rust" / "kernel" / "lib.rs", - ["core", "macros", "build_error", "bindings"], - cfg=cfg, - ) - crates[-1]["source"] = { - "include_dirs": [ - str(srctree / "rust" / "kernel"), - str(objtree / "rust") - ], - "exclude_dirs": [], - } + append_crate_with_generated("bindings", ["core"]) + append_crate_with_generated("kernel", ["core", "macros", "build_error", "bindings"]) def is_root_crate(build_file, target): try: From a1eb95d6b5f4cf5cc7b081e85e374d1dd98a213b Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 10 Feb 2025 13:04:17 -0500 Subject: [PATCH 447/503] scripts: generate_rust_analyzer: add uapi crate Commit 4e1746656839 ("rust: uapi: Add UAPI crate") did not update rust-analyzer to include the new crate. Add the missing definition to improve the developer experience. Fixes: 4e1746656839 ("rust: uapi: Add UAPI crate") Signed-off-by: Tamir Duberstein Tested-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250210-rust-analyzer-bindings-include-v2-2-23dff845edc3@gmail.com [ Slightly reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index f2d6787e9c0c5..adae71544cbd6 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -117,7 +117,8 @@ def append_crate_with_generated( } append_crate_with_generated("bindings", ["core"]) - append_crate_with_generated("kernel", ["core", "macros", "build_error", "bindings"]) + append_crate_with_generated("uapi", ["core"]) + append_crate_with_generated("kernel", ["core", "macros", "build_error", "bindings", "uapi"]) def is_root_crate(build_file, target): try: From 8ae227f8a7749eec92fc381dfbe213429c852278 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 11 Mar 2025 12:17:04 +0100 Subject: [PATCH 448/503] wifi: mac80211: fix MPDU length parsing for EHT 5/6 GHz The MPDU length is only configured using the EHT capabilities element on 2.4 GHz. On 5/6 GHz it is configured using the VHT or HE capabilities respectively. Fixes: cf0079279727 ("wifi: mac80211: parse A-MSDU len from EHT capabilities") Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20250311121704.0634d31f0883.I28063e4d3ef7d296b7e8a1c303460346a30bf09c@changeid Signed-off-by: Johannes Berg --- net/mac80211/eht.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 7a3116c36df9f..fd41046e3b681 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -2,7 +2,7 @@ /* * EHT handling * - * Copyright(c) 2021-2024 Intel Corporation + * Copyright(c) 2021-2025 Intel Corporation */ #include "ieee80211_i.h" @@ -76,6 +76,13 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta); link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + /* + * The MPDU length bits are reserved on all but 2.4 GHz and get set via + * VHT (5 GHz) or HE (6 GHz) capabilities. + */ + if (sband->band != NL80211_BAND_2GHZ) + return; + switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0], IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) { case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454: From 285df995f90e3d61d97f327d34b9659d92313314 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 28 Feb 2025 15:04:20 +0100 Subject: [PATCH 449/503] i2c: omap: fix IRQ storms On the GTA04A5 writing a reset command to the gyroscope causes IRQ storms because NACK IRQs are enabled and therefore triggered but not acked. Sending a reset command to the gyroscope by i2cset 1 0x69 0x14 0xb6 with an additional debug print in the ISR (not the thread) itself causes [ 363.353515] i2c i2c-1: ioctl, cmd=0x720, arg=0xbe801b00 [ 363.359039] omap_i2c 48072000.i2c: addr: 0x0069, len: 2, flags: 0x0, stop: 1 [ 363.366180] omap_i2c 48072000.i2c: IRQ LL (ISR = 0x1110) [ 363.371673] omap_i2c 48072000.i2c: IRQ (ISR = 0x0010) [ 363.376892] omap_i2c 48072000.i2c: IRQ LL (ISR = 0x0102) [ 363.382263] omap_i2c 48072000.i2c: IRQ LL (ISR = 0x0102) [ 363.387664] omap_i2c 48072000.i2c: IRQ LL (ISR = 0x0102) repeating till infinity [...] (0x2 = NACK, 0x100 = Bus free, which is not enabled) Apparently no other IRQ bit gets set, so this stalls. Do not ignore enabled interrupts and make sure they are acked. If the NACK IRQ is not needed, it should simply not enabled, but according to the above log, caring about it is necessary unless the Bus free IRQ is enabled and handled. The assumption that is will always come with a ARDY IRQ, which was the idea behind ignoring it, proves wrong. It is true for simple reads from an unused address. To still avoid the i2cdetect trouble which is the reason for commit c770657bd261 ("i2c: omap: Fix standard mode false ACK readings"), avoid doing much about NACK in omap_i2c_xfer_data() which is used by both IRQ mode and polling mode, so also the false detection fix is extended to polling usage and IRQ storms are avoided. By changing this, the hardirq handler is not needed anymore to filter stuff. The mentioned gyro reset now just causes a -ETIMEDOUT instead of hanging the system. Fixes: c770657bd261 ("i2c: omap: Fix standard mode false ACK readings"). CC: stable@kernel.org Signed-off-by: Andreas Kemnade Tested-by: Nishanth Menon Reviewed-by: Aniket Limaye Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250228140420.379498-1-andreas@kemnade.info --- drivers/i2c/busses/i2c-omap.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 92faf03d64cfb..f18c3e74b0762 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1048,23 +1048,6 @@ static int omap_i2c_transmit_data(struct omap_i2c_dev *omap, u8 num_bytes, return 0; } -static irqreturn_t -omap_i2c_isr(int irq, void *dev_id) -{ - struct omap_i2c_dev *omap = dev_id; - irqreturn_t ret = IRQ_HANDLED; - u16 mask; - u16 stat; - - stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG); - mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG) & ~OMAP_I2C_STAT_NACK; - - if (stat & mask) - ret = IRQ_WAKE_THREAD; - - return ret; -} - static int omap_i2c_xfer_data(struct omap_i2c_dev *omap) { u16 bits; @@ -1095,8 +1078,13 @@ static int omap_i2c_xfer_data(struct omap_i2c_dev *omap) } if (stat & OMAP_I2C_STAT_NACK) { - err |= OMAP_I2C_STAT_NACK; + omap->cmd_err |= OMAP_I2C_STAT_NACK; omap_i2c_ack_stat(omap, OMAP_I2C_STAT_NACK); + + if (!(stat & ~OMAP_I2C_STAT_NACK)) { + err = -EAGAIN; + break; + } } if (stat & OMAP_I2C_STAT_AL) { @@ -1472,7 +1460,7 @@ omap_i2c_probe(struct platform_device *pdev) IRQF_NO_SUSPEND, pdev->name, omap); else r = devm_request_threaded_irq(&pdev->dev, omap->irq, - omap_i2c_isr, omap_i2c_isr_thread, + NULL, omap_i2c_isr_thread, IRQF_NO_SUSPEND | IRQF_ONESHOT, pdev->name, omap); From 9b5463f349d019a261f1e80803447efca3126151 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 3 Mar 2025 20:53:08 +0100 Subject: [PATCH 450/503] i2c: ali1535: Fix an error handling path in ali1535_probe() If i2c_add_adapter() fails, the request_region() call in ali1535_setup() must be undone by a corresponding release_region() call, as done in the remove function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Christophe JAILLET Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/0daf63d7a2ce74c02e2664ba805bbfadab7d25e5.1741031571.git.christophe.jaillet@wanadoo.fr --- drivers/i2c/busses/i2c-ali1535.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c index 544c94e86b896..1eac358380405 100644 --- a/drivers/i2c/busses/i2c-ali1535.c +++ b/drivers/i2c/busses/i2c-ali1535.c @@ -485,6 +485,8 @@ MODULE_DEVICE_TABLE(pci, ali1535_ids); static int ali1535_probe(struct pci_dev *dev, const struct pci_device_id *id) { + int ret; + if (ali1535_setup(dev)) { dev_warn(&dev->dev, "ALI1535 not detected, module not inserted.\n"); @@ -496,7 +498,15 @@ static int ali1535_probe(struct pci_dev *dev, const struct pci_device_id *id) snprintf(ali1535_adapter.name, sizeof(ali1535_adapter.name), "SMBus ALI1535 adapter at %04x", ali1535_offset); - return i2c_add_adapter(&ali1535_adapter); + ret = i2c_add_adapter(&ali1535_adapter); + if (ret) + goto release_region; + + return 0; + +release_region: + release_region(ali1535_smba, ALI1535_SMB_IOSIZE); + return ret; } static void ali1535_remove(struct pci_dev *dev) From 6e55caaf30c88209d097e575a169b1dface1ab69 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 3 Mar 2025 20:58:06 +0100 Subject: [PATCH 451/503] i2c: ali15x3: Fix an error handling path in ali15x3_probe() If i2c_add_adapter() fails, the request_region() call in ali15x3_setup() must be undone by a corresponding release_region() call, as done in the remove function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Christophe JAILLET Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/9b2090cbcc02659f425188ea05f2e02745c4e67b.1741031878.git.christophe.jaillet@wanadoo.fr --- drivers/i2c/busses/i2c-ali15x3.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c index 4761c72081022..418d11266671e 100644 --- a/drivers/i2c/busses/i2c-ali15x3.c +++ b/drivers/i2c/busses/i2c-ali15x3.c @@ -472,6 +472,8 @@ MODULE_DEVICE_TABLE (pci, ali15x3_ids); static int ali15x3_probe(struct pci_dev *dev, const struct pci_device_id *id) { + int ret; + if (ali15x3_setup(dev)) { dev_err(&dev->dev, "ALI15X3 not detected, module not inserted.\n"); @@ -483,7 +485,15 @@ static int ali15x3_probe(struct pci_dev *dev, const struct pci_device_id *id) snprintf(ali15x3_adapter.name, sizeof(ali15x3_adapter.name), "SMBus ALI15X3 adapter at %04x", ali15x3_smba); - return i2c_add_adapter(&ali15x3_adapter); + ret = i2c_add_adapter(&ali15x3_adapter); + if (ret) + goto release_region; + + return 0; + +release_region: + release_region(ali15x3_smba, ALI15X3_SMB_IOSIZE); + return ret; } static void ali15x3_remove(struct pci_dev *dev) From 2b22459792fcb4def9f0936d64575ac11a95a58d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 3 Mar 2025 21:26:54 +0100 Subject: [PATCH 452/503] i2c: sis630: Fix an error handling path in sis630_probe() If i2c_add_adapter() fails, the request_region() call in sis630_setup() must be undone by a corresponding release_region() call, as done in the remove function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/3d607601f2c38e896b10207963c6ab499ca5c307.1741033587.git.christophe.jaillet@wanadoo.fr Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-sis630.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c index 3505cf29cedda..a19c3d251804d 100644 --- a/drivers/i2c/busses/i2c-sis630.c +++ b/drivers/i2c/busses/i2c-sis630.c @@ -509,6 +509,8 @@ MODULE_DEVICE_TABLE(pci, sis630_ids); static int sis630_probe(struct pci_dev *dev, const struct pci_device_id *id) { + int ret; + if (sis630_setup(dev)) { dev_err(&dev->dev, "SIS630 compatible bus not detected, " @@ -522,7 +524,15 @@ static int sis630_probe(struct pci_dev *dev, const struct pci_device_id *id) snprintf(sis630_adapter.name, sizeof(sis630_adapter.name), "SMBus SIS630 adapter at %04x", smbus_base + SMB_STS); - return i2c_add_adapter(&sis630_adapter); + ret = i2c_add_adapter(&sis630_adapter); + if (ret) + goto release_region; + + return 0; + +release_region: + release_region(smbus_base + SMB_STS, SIS630_SMB_IOREGION); + return ret; } static void sis630_remove(struct pci_dev *dev) From 9bce6b5f8987678b9c6c1fe433af6b5fe41feadc Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 11 Mar 2025 19:43:59 +0900 Subject: [PATCH 453/503] block: change blk_mq_add_to_batch() third argument type to bool Commit 1f47ed294a2b ("block: cleanup and fix batch completion adding conditions") modified the evaluation criteria for the third argument, 'ioerror', in the blk_mq_add_to_batch() function. Initially, the function had checked if 'ioerror' equals zero. Following the commit, it started checking for negative error values, with the presumption that such values, for instance -EIO, would be passed in. However, blk_mq_add_to_batch() callers do not pass negative error values. Instead, they pass status codes defined in various ways: - NVMe PCI and Apple drivers pass NVMe status code - virtio_blk driver passes the virtblk request header status byte - null_blk driver passes blk_status_t These codes are either zero or positive, therefore the revised check fails to function as intended. Specifically, with the NVMe PCI driver, this modification led to the failure of the blktests test case nvme/039. In this test scenario, errors are artificially injected to the NVMe driver, resulting in positive NVMe status codes passed to blk_mq_add_to_batch(), which unexpectedly processes the failed I/O in a batch. Hence the failure. To correct the ioerror check within blk_mq_add_to_batch(), make all callers to uniformly pass the argument as boolean. Modify the callers to check their specific status codes and pass the boolean value 'is_error'. Also describe the arguments of blK_mq_add_to_batch as kerneldoc. Fixes: 1f47ed294a2b ("block: cleanup and fix batch completion adding conditions") Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20250311104359.1767728-3-shinichiro.kawasaki@wdc.com [axboe: fold in documentation update] Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 4 ++-- drivers/block/virtio_blk.c | 5 +++-- drivers/nvme/host/apple.c | 3 ++- drivers/nvme/host/pci.c | 5 +++-- include/linux/blk-mq.h | 16 ++++++++++++---- 5 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index d94ef37480bd4..fdc7a0b2af109 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1549,8 +1549,8 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) cmd = blk_mq_rq_to_pdu(req); cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req), blk_rq_sectors(req)); - if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error, - blk_mq_end_request_batch)) + if (!blk_mq_add_to_batch(req, iob, cmd->error != BLK_STS_OK, + blk_mq_end_request_batch)) blk_mq_end_request(req, cmd->error); nr++; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a4af39fc7ea28..286cab5e53684 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1207,11 +1207,12 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { struct request *req = blk_mq_rq_from_pdu(vbr); + u8 status = virtblk_vbr_status(vbr); found++; if (!blk_mq_complete_request_remote(req) && - !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), - virtblk_complete_batch)) + !blk_mq_add_to_batch(req, iob, status != VIRTIO_BLK_S_OK, + virtblk_complete_batch)) virtblk_request_done(req); } diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index a060f69558e76..8971aca41e63d 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -599,7 +599,8 @@ static inline void apple_nvme_handle_cqe(struct apple_nvme_queue *q, } if (!nvme_try_complete_req(req, cqe->status, cqe->result) && - !blk_mq_add_to_batch(req, iob, nvme_req(req)->status, + !blk_mq_add_to_batch(req, iob, + nvme_req(req)->status != NVME_SC_SUCCESS, apple_nvme_complete_batch)) apple_nvme_complete_rq(req); } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 640590b217282..75de86e235ad7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1130,8 +1130,9 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); if (!nvme_try_complete_req(req, cqe->status, cqe->result) && - !blk_mq_add_to_batch(req, iob, nvme_req(req)->status, - nvme_pci_complete_batch)) + !blk_mq_add_to_batch(req, iob, + nvme_req(req)->status != NVME_SC_SUCCESS, + nvme_pci_complete_batch)) nvme_pci_complete_rq(req); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 71f4f0cc3dac6..aba9c24486aad 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -852,12 +852,20 @@ static inline bool blk_mq_is_reserved_rq(struct request *rq) return rq->rq_flags & RQF_RESV; } -/* +/** + * blk_mq_add_to_batch() - add a request to the completion batch + * @req: The request to add to batch + * @iob: The batch to add the request + * @is_error: Specify true if the request failed with an error + * @complete: The completaion handler for the request + * * Batched completions only work when there is no I/O error and no special * ->end_io handler. + * + * Return: true when the request was added to the batch, otherwise false */ static inline bool blk_mq_add_to_batch(struct request *req, - struct io_comp_batch *iob, int ioerror, + struct io_comp_batch *iob, bool is_error, void (*complete)(struct io_comp_batch *)) { /* @@ -865,7 +873,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, * 1) No batch container * 2) Has scheduler data attached * 3) Not a passthrough request and end_io set - * 4) Not a passthrough request and an ioerror + * 4) Not a passthrough request and failed with an error */ if (!iob) return false; @@ -874,7 +882,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, if (!blk_rq_is_passthrough(req)) { if (req->end_io) return false; - if (ioerror < 0) + if (is_error) return false; } From d653bfeb07ebb3499c403404c21ac58a16531607 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Sun, 9 Mar 2025 17:07:38 +0900 Subject: [PATCH 454/503] netfilter: nf_conncount: Fully initialize struct nf_conncount_tuple in insert_tree() Since commit b36e4523d4d5 ("netfilter: nf_conncount: fix garbage collection confirm race"), `cpu` and `jiffies32` were introduced to the struct nf_conncount_tuple. The commit made nf_conncount_add() initialize `conn->cpu` and `conn->jiffies32` when allocating the struct. In contrast, count_tree() was not changed to initialize them. By commit 34848d5c896e ("netfilter: nf_conncount: Split insert and traversal"), count_tree() was split and the relevant allocation code now resides in insert_tree(). Initialize `conn->cpu` and `conn->jiffies32` in insert_tree(). BUG: KMSAN: uninit-value in find_or_evict net/netfilter/nf_conncount.c:117 [inline] BUG: KMSAN: uninit-value in __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 find_or_evict net/netfilter/nf_conncount.c:117 [inline] __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 count_tree net/netfilter/nf_conncount.c:438 [inline] nf_conncount_count+0x82f/0x1e80 net/netfilter/nf_conncount.c:521 connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 __nft_match_eval net/netfilter/nft_compat.c:403 [inline] nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 NF_HOOK_LIST include/linux/netfilter.h:350 [inline] ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 __netif_receive_skb_list net/core/dev.c:6035 [inline] netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 xdp_recv_frames net/bpf/test_run.c:280 [inline] xdp_test_run_batch net/bpf/test_run.c:361 [inline] bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Uninit was created at: slab_post_alloc_hook mm/slub.c:4121 [inline] slab_alloc_node mm/slub.c:4164 [inline] kmem_cache_alloc_noprof+0x915/0xe10 mm/slub.c:4171 insert_tree net/netfilter/nf_conncount.c:372 [inline] count_tree net/netfilter/nf_conncount.c:450 [inline] nf_conncount_count+0x1415/0x1e80 net/netfilter/nf_conncount.c:521 connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 __nft_match_eval net/netfilter/nft_compat.c:403 [inline] nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 NF_HOOK_LIST include/linux/netfilter.h:350 [inline] ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 __netif_receive_skb_list net/core/dev.c:6035 [inline] netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 xdp_recv_frames net/bpf/test_run.c:280 [inline] xdp_test_run_batch net/bpf/test_run.c:361 [inline] bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Reported-by: syzbot+83fed965338b573115f7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=83fed965338b573115f7 Fixes: b36e4523d4d5 ("netfilter: nf_conncount: fix garbage collection confirm race") Signed-off-by: Kohei Enju Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index ebe38ed2e6f4f..913ede2f57f9a 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -377,6 +377,8 @@ insert_tree(struct net *net, conn->tuple = *tuple; conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; memcpy(rbconn->key, key, sizeof(u32) * data->keylen); nf_conncount_list_init(&rbconn->list); From c21b02fd9cbf15aed6e32c89e0fd70070281e3d1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 11 Mar 2025 12:52:45 +0100 Subject: [PATCH 455/503] selftests: netfilter: skip br_netfilter queue tests if kernel is tainted These scripts fail if the kernel is tainted which leads to wrong test failure reports in CI environments when an unrelated test triggers some splat. Check taint state at start of script and SKIP if its already dodgy. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/netfilter/br_netfilter.sh | 7 +++++++ .../testing/selftests/net/netfilter/br_netfilter_queue.sh | 7 +++++++ tools/testing/selftests/net/netfilter/nft_queue.sh | 1 + 3 files changed, 15 insertions(+) diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index c28379a965d83..1559ba275105e 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -13,6 +13,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh index 6a764d70ab06f..4788641717d93 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -4,6 +4,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 785e3875a6da4..784d1b46912b0 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -593,6 +593,7 @@ EOF echo "PASS: queue program exiting while packets queued" else echo "TAINT: queue program exiting while packets queued" + dmesg ret=1 fi } From 80b78c39eb86e6b55f56363b709eb817527da5aa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Mar 2025 10:45:53 +0300 Subject: [PATCH 456/503] ipvs: prevent integer overflow in do_ip_vs_get_ctl() The get->num_services variable is an unsigned int which is controlled by the user. The struct_size() function ensures that the size calculation does not overflow an unsigned long, however, we are saving the result to an int so the calculation can overflow. Both "len" and "get->num_services" come from the user. This check is just a sanity check to help the user and ensure they are using the API correctly. An integer overflow here is not a big deal. This has no security impact. Save the result from struct_size() type size_t to fix this integer overflow bug. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Dan Carpenter Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7d13110ce1882..0633276d96bfb 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3091,12 +3091,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_SERVICES: { struct ip_vs_get_services *get; - int size; + size_t size; get = (struct ip_vs_get_services *)arg; size = struct_size(get, entrytable, get->num_services); if (*len != size) { - pr_err("length: %u != %u\n", *len, size); + pr_err("length: %u != %zu\n", *len, size); ret = -EINVAL; goto out; } @@ -3132,12 +3132,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_DESTS: { struct ip_vs_get_dests *get; - int size; + size_t size; get = (struct ip_vs_get_dests *)arg; size = struct_size(get, entrytable, get->num_dests); if (*len != size) { - pr_err("length: %u != %u\n", *len, size); + pr_err("length: %u != %zu\n", *len, size); ret = -EINVAL; goto out; } From b11a74ac4f545626d0dc95a8ca8c41df90532bf3 Mon Sep 17 00:00:00 2001 From: Navon John Lukose Date: Sat, 8 Mar 2025 03:03:19 +0530 Subject: [PATCH 457/503] ALSA: hda/realtek: Add mute LED quirk for HP Pavilion x360 14-dy1xxx Add a fixup to enable the mute LED on HP Pavilion x360 Convertible 14-dy1xxx with ALC295 codec. The appropriate coefficient index and bits were identified through a brute-force method, as detailed in https://bbs.archlinux.org/viewtopic.php?pid=2079504#p2079504. Signed-off-by: Navon John Lukose Link: https://patch.msgid.link/20250307213319.35507-1-navonjohnlukose@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d2a1f836dbbf7..a84857a3c2bfb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4790,6 +4790,21 @@ static void alc236_fixup_hp_coef_micmute_led(struct hda_codec *codec, } } +static void alc295_fixup_hp_mute_led_coefbit11(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->mute_led_polarity = 0; + spec->mute_led_coef.idx = 0xb; + spec->mute_led_coef.mask = 3 << 3; + spec->mute_led_coef.on = 1 << 3; + spec->mute_led_coef.off = 1 << 4; + snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set); + } +} + static void alc285_fixup_hp_mute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -7656,6 +7671,7 @@ enum { ALC290_FIXUP_MONO_SPEAKERS_HSJACK, ALC290_FIXUP_SUBWOOFER, ALC290_FIXUP_SUBWOOFER_HSJACK, + ALC295_FIXUP_HP_MUTE_LED_COEFBIT11, ALC269_FIXUP_THINKPAD_ACPI, ALC269_FIXUP_LENOVO_XPAD_ACPI, ALC269_FIXUP_DMIC_THINKPAD_ACPI, @@ -9401,6 +9417,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC283_FIXUP_INT_MIC, }, + [ALC295_FIXUP_HP_MUTE_LED_COEFBIT11] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc295_fixup_hp_mute_led_coefbit11, + }, [ALC298_FIXUP_SAMSUNG_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc298_fixup_samsung_amp, @@ -10451,6 +10471,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x85c6, "HP Pavilion x360 Convertible 14-dy1xxx", ALC295_FIXUP_HP_MUTE_LED_COEFBIT11), SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), From d9e7c172a7f247f7ef0b151fa8c8f044b6a2a070 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 24 Feb 2025 15:40:58 +0100 Subject: [PATCH 458/503] media: rtl2832_sdr: assign vb2 lock before vb2_queue_init Commit c780d01cf1a6 ("media: vb2: vb2_core_queue_init(): sanity check lock and wait_prepare/finish") added a sanity check to ensure that if there are no wait_prepare/finish callbacks set by the driver, then the vb2_queue lock must be set, since otherwise the vb2 core cannot do correct locking. The rtl2832_sdr.c triggered this warning: it turns out that while the driver does set this lock, it sets it too late. So move it up to before the vb2_queue_init() call. Reported-by: Arthur Marsh Closes: https://lore.kernel.org/linux-media/20241211042355.8479-1-user@am64/ Fixes: 8fcd2795d22a ("media: rtl2832_sdr: drop vb2_ops_wait_prepare/finish") Cc: stable@vger.kernel.org Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/dvb-frontends/rtl2832_sdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/rtl2832_sdr.c b/drivers/media/dvb-frontends/rtl2832_sdr.c index 05254d8717db8..0357624968f1b 100644 --- a/drivers/media/dvb-frontends/rtl2832_sdr.c +++ b/drivers/media/dvb-frontends/rtl2832_sdr.c @@ -1363,6 +1363,7 @@ static int rtl2832_sdr_probe(struct platform_device *pdev) dev->vb_queue.ops = &rtl2832_sdr_vb2_ops; dev->vb_queue.mem_ops = &vb2_vmalloc_memops; dev->vb_queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; + dev->vb_queue.lock = &dev->vb_queue_lock; ret = vb2_queue_init(&dev->vb_queue); if (ret) { dev_err(&pdev->dev, "Could not initialize vb2 queue\n"); @@ -1421,7 +1422,6 @@ static int rtl2832_sdr_probe(struct platform_device *pdev) /* Init video_device structure */ dev->vdev = rtl2832_sdr_template; dev->vdev.queue = &dev->vb_queue; - dev->vdev.queue->lock = &dev->vb_queue_lock; video_set_drvdata(&dev->vdev, dev); /* Register the v4l2_device structure */ From 658fb7fe8e7f4014ea17a4da0e0c1d9bc319fa35 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 5 Mar 2025 18:27:32 +0100 Subject: [PATCH 459/503] ASoC: cs42l43: convert to SYSTEM_SLEEP_PM_OPS The custom suspend function causes a build warning when CONFIG_PM_SLEEP is disabled: sound/soc/codecs/cs42l43.c:2405:12: error: unused function 'cs42l43_codec_runtime_force_suspend' [-Werror,-Wunused-function] Change SET_SYSTEM_SLEEP_PM_OPS() to the newer SYSTEM_SLEEP_PM_OPS(), to avoid this. Fixes: 164b7dd4546b ("ASoC: cs42l43: Add jack delay debounce after suspend") Signed-off-by: Arnd Bergmann Reviewed-by: Maciej Strozek Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250305172738.3437513-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index d307b56a7f38e..ea84ac64c775e 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2417,7 +2417,7 @@ static int cs42l43_codec_runtime_force_suspend(struct device *dev) static const struct dev_pm_ops cs42l43_codec_pm_ops = { RUNTIME_PM_OPS(NULL, cs42l43_codec_runtime_resume, NULL) - SET_SYSTEM_SLEEP_PM_OPS(cs42l43_codec_runtime_force_suspend, pm_runtime_force_resume) + SYSTEM_SLEEP_PM_OPS(cs42l43_codec_runtime_force_suspend, pm_runtime_force_resume) }; static const struct platform_device_id cs42l43_codec_id_table[] = { From 18e0885bd2ca738407036434418a26a58394a60e Mon Sep 17 00:00:00 2001 From: Boon Khai Ng Date: Wed, 12 Mar 2025 11:05:44 +0800 Subject: [PATCH 460/503] USB: serial: ftdi_sio: add support for Altera USB Blaster 3 The Altera USB Blaster 3, available as both a cable and an on-board solution, is primarily used for programming and debugging FPGAs. It interfaces with host software such as Quartus Programmer, System Console, SignalTap, and Nios Debugger. The device utilizes either an FT2232 or FT4232 chip. Enabling the support for various configurations of the on-board USB Blaster 3 by including the appropriate VID/PID pairs, allowing it to function as a serial device via ftdi_sio. Note that this check-in does not include support for the cable solution, as it does not support UART functionality. The supported configurations are determined by the hardware design and include: 1) PID 0x6022, FT2232, 1 JTAG port (Port A) + Port B as UART 2) PID 0x6025, FT4232, 1 JTAG port (Port A) + Port C as UART 3) PID 0x6026, FT4232, 1 JTAG port (Port A) + Port C, D as UART 4) PID 0x6029, FT4232, 1 JTAG port (Port B) + Port C as UART 5) PID 0x602a, FT4232, 1 JTAG port (Port B) + Port C, D as UART 6) PID 0x602c, FT4232, 1 JTAG port (Port A) + Port B as UART 7) PID 0x602d, FT4232, 1 JTAG port (Port A) + Port B, C as UART 8) PID 0x602e, FT4232, 1 JTAG port (Port A) + Port B, C, D as UART These configurations allow for flexibility in how the USB Blaster 3 is used, depending on the specific needs of the hardware design. Signed-off-by: Boon Khai Ng Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 14 ++++++++++++++ drivers/usb/serial/ftdi_sio_ids.h | 13 +++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index e07c5e3eb18c0..9b34e23b70919 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1079,6 +1079,20 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, /* GMC devices */ { USB_DEVICE(GMC_VID, GMC_Z216C_PID) }, + /* Altera USB Blaster 3 */ + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6022_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6025_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6026_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6026_PID, 3) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6029_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602A_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602A_PID, 3) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602C_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602D_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602D_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 3) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 5ee60ba2a73cd..52be47d684ea6 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1612,3 +1612,16 @@ */ #define GMC_VID 0x1cd7 #define GMC_Z216C_PID 0x0217 /* GMC Z216C Adapter IR-USB */ + +/* + * Altera USB Blaster 3 (http://www.altera.com). + */ +#define ALTERA_VID 0x09fb +#define ALTERA_UB3_6022_PID 0x6022 +#define ALTERA_UB3_6025_PID 0x6025 +#define ALTERA_UB3_6026_PID 0x6026 +#define ALTERA_UB3_6029_PID 0x6029 +#define ALTERA_UB3_602A_PID 0x602a +#define ALTERA_UB3_602C_PID 0x602c +#define ALTERA_UB3_602D_PID 0x602d +#define ALTERA_UB3_602E_PID 0x602e From 066e053fe208a3b83ee89dc5a192146add688861 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:47 +0100 Subject: [PATCH 461/503] fsnotify: add pre-content hooks on mmap() Pre-content hooks in page faults introduces potential deadlock of HSM handler in userspace with filesystem freezing. The requirement with pre-content event is that for every accessed file range an event covering at least this range will be generated at least once before the file data is accesses. In preparation to disabling pre-content event hooks on page faults, add pre-content hooks at mmap() variants for the entire mmaped range, so HSM can fill content when user requests to map a portion of the file. Note that exec() variant also calls vm_mmap_pgoff() internally to map code sections, so pre-content hooks are also generated in this case. Link: https://lore.kernel.org/linux-fsdevel/7ehxrhbvehlrjwvrduoxsao5k3x4aw275patsb3krkwuq573yv@o2hskrfawbnc/ Suggested-by: Josef Bacik Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-2-amir73il@gmail.com --- include/linux/fsnotify.h | 21 +++++++++++++++++++++ mm/util.c | 3 +++ 2 files changed, 24 insertions(+) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 6a33288bd6a1f..83d3ac97f8262 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -170,6 +170,21 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, return fsnotify_path(&file->f_path, FS_ACCESS_PERM); } +/* + * fsnotify_mmap_perm - permission hook before mmap of file range + */ +static inline int fsnotify_mmap_perm(struct file *file, int prot, + const loff_t off, size_t len) +{ + /* + * mmap() generates only pre-content events. + */ + if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode))) + return 0; + + return fsnotify_pre_content(&file->f_path, &off, len); +} + /* * fsnotify_truncate_perm - permission hook before file truncate */ @@ -223,6 +238,12 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, return 0; } +static inline int fsnotify_mmap_perm(struct file *file, int prot, + const loff_t off, size_t len) +{ + return 0; +} + static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) { return 0; diff --git a/mm/util.c b/mm/util.c index b6b9684a14388..8c965474d329f 100644 --- a/mm/util.c +++ b/mm/util.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -569,6 +570,8 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, LIST_HEAD(uf); ret = security_mmap_file(file, prot, flag); + if (!ret) + ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len); if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; From 0882ca4eecfe8b0013f339144acf886a0a0de41f Mon Sep 17 00:00:00 2001 From: Yifan Zha Date: Wed, 5 Mar 2025 13:14:55 +0800 Subject: [PATCH 462/503] drm/amd/amdkfd: Evict all queues even HWS remove queue failed [Why] If reset is detected and kfd need to evict working queues, HWS moving queue will be failed. Then remaining queues are not evicted and in active state. After reset done, kfd uses HWS to termination remaining activated queues but HWS is resetted. So remove queue will be failed again. [How] Keep removing all queues even if HWS returns failed. It will not affect cpsch as it checks reset_domain->sem. v2: If any queue failed, evict queue returns error. v3: Declare err inside the if-block. Reviewed-by: Felix Kuehling Signed-off-by: Yifan Zha Signed-off-by: Alex Deucher (cherry picked from commit 42c854b8fb0cce512534aa2b7141948e80c6ebb0) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d4593374e7a1e..34c2c42c0f95c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1230,11 +1230,13 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, decrement_queue_count(dqm, qpd, q); if (dqm->dev->kfd->shared_resources.enable_mes) { - retval = remove_queue_mes(dqm, q, qpd); - if (retval) { + int err; + + err = remove_queue_mes(dqm, q, qpd); + if (err) { dev_err(dev, "Failed to evict queue %d\n", q->properties.queue_id); - goto out; + retval = err; } } } From 6cc30748e17ea2a64051ceaf83a8372484e597f1 Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Mon, 10 Mar 2025 18:08:05 +0100 Subject: [PATCH 463/503] drm/amdgpu: NULL-check BO's backing store when determining GFX12 PTE flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PRT BOs may not have any backing store, so bo->tbo.resource will be NULL. Check for that before dereferencing. Fixes: 0cce5f285d9a ("drm/amdkfd: Check correct memory types for is_system variable") Reviewed-by: Christian König Signed-off-by: Natalie Vock Signed-off-by: Alex Deucher (cherry picked from commit 3e3fcd29b505cebed659311337ea03b7698767fc) Cc: stable@vger.kernel.org # 6.12.x --- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index b749f1c3f6a9a..0fb88e6d5d54b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -528,8 +528,9 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; - is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) || - (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); + is_system = bo->tbo.resource && + (bo->tbo.resource->mem_type == TTM_PL_TT || + bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) *flags |= AMDGPU_PTE_DCC; From 0c3057a5a04d07120b3d0ec9c79568fceb9c921e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 6 Mar 2025 15:23:54 -0800 Subject: [PATCH 464/503] net_sched: Prevent creation of classes with TC_H_ROOT The function qdisc_tree_reduce_backlog() uses TC_H_ROOT as a termination condition when traversing up the qdisc tree to update parent backlog counters. However, if a class is created with classid TC_H_ROOT, the traversal terminates prematurely at this class instead of reaching the actual root qdisc, causing parent statistics to be incorrectly maintained. In case of DRR, this could lead to a crash as reported by Mingi Cho. Prevent the creation of any Qdisc class with classid TC_H_ROOT (0xFFFFFFFF) across all qdisc types, as suggested by Jamal. Reported-by: Mingi Cho Signed-off-by: Cong Wang Reviewed-by: Simon Horman Fixes: 066a3b5b2346 ("[NET_SCHED] sch_api: fix qdisc_tree_decrease_qlen() loop") Link: https://patch.msgid.link/20250306232355.93864-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e3e91cf867eb9..6c625dcd06519 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2254,6 +2254,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, return -EOPNOTSUPP; } + /* Prevent creation of traffic classes with classid TC_H_ROOT */ + if (clid == TC_H_ROOT) { + NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT"); + return -EINVAL; + } + new_cl = cl; err = -EOPNOTSUPP; if (cops->change) From bb7737de5f593155aabbca283f4822176f4e7d6b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 6 Mar 2025 15:23:55 -0800 Subject: [PATCH 465/503] selftests/tc-testing: Add a test case for DRR class with TC_H_ROOT Integrate the reproduer from Mingi to TDC. All test results: 1..4 ok 1 0385 - Create DRR with default setting ok 2 2375 - Delete DRR with handle ok 3 3092 - Show DRR class ok 4 4009 - Reject creation of DRR class with classid TC_H_ROOT Cc: Mingi Cho Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306232355.93864-3-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/qdiscs/drr.json | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json index 7126ec3485cbd..2b61d8d79bde8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json @@ -61,5 +61,30 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "4009", + "name": "Reject creation of DRR class with classid TC_H_ROOT", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle ffff: drr", + "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr", + "$TC filter add dev $DUMMY parent ffff: prio 1 u32 match u16 0x0000 0xfe00 at 2 flowid ffff:ffff" + ], + "cmdUnderTest": "$TC class add dev $DUMMY parent ffff: classid ffff:ffff drr", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DUMMY", + "matchPattern": "class drr ffff:ffff", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] } ] From 3bcde88d381a336ff252d67867c186ee602e6656 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Mar 2025 17:21:31 -0400 Subject: [PATCH 466/503] bcachefs: fix tiny leak in bch2_dev_add() Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 6d97d412fed98..0459c875e189b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1811,7 +1811,11 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_late; up_write(&c->state_lock); - return 0; +out: + printbuf_exit(&label); + printbuf_exit(&errbuf); + bch_err_fn(c, ret); + return ret; err_unlock: mutex_unlock(&c->sb_lock); @@ -1820,10 +1824,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (ca) bch2_dev_free(ca); bch2_free_super(&sb); - printbuf_exit(&label); - printbuf_exit(&errbuf); - bch_err_fn(c, ret); - return ret; + goto out; err_late: up_write(&c->state_lock); ca = NULL; From 0102fbf52b93e609fec0dab53b1fb4fe69113f5e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 11 Mar 2025 18:56:31 +0100 Subject: [PATCH 467/503] gpiolib: don't check the retval of get_direction() when registering a chip During chip registration we should neither check the return value of gc->get_direction() nor hold the SRCU lock when calling it. The former is because pin controllers may have pins set to alternate functions and return errors from their get_direction() callbacks. That's alright - we should default to the safe INPUT state and not bail-out. The latter is not needed because we haven't registered the chip yet so there's nothing to protect against dynamic removal. In fact: we currently hit a lockdep splat. Revert to calling the gc->get_direction() callback directly and *not* checking its value. Fixes: 9d846b1aebbe ("gpiolib: check the return value of gpio_chip::get_direction()") Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/all/81f890fc-6688-42f0-9756-567efc8bb97a@samsung.com/ Reviewed-by: Andy Shevchenko Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250226-retval-fixes-v2-1-c8dc57182441@linaro.org Tested-by: Gene C Link: https://lore.kernel.org/r/20250311175631.83779-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 8741600af7efb..de708d0818581 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1056,24 +1056,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, desc->gdev = gdev; - if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index)) { - ret = gc->get_direction(gc, desc_index); - if (ret < 0) - /* - * FIXME: Bail-out here once all GPIO drivers - * are updated to not return errors in - * situations that can be considered normal - * operation. - */ - dev_warn(&gdev->dev, - "%s: get_direction failed: %d\n", - __func__, ret); - - assign_bit(FLAG_IS_OUT, &desc->flags, !ret); - } else { + /* + * We would typically want to check the return value of + * get_direction() here but we must not check the return value + * and bail-out as pin controllers can have pins configured to + * alternate functions and return -EINVAL. Also: there's no + * need to take the SRCU lock here. + */ + if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index)) + assign_bit(FLAG_IS_OUT, &desc->flags, + !gc->get_direction(gc, desc_index)); + else assign_bit(FLAG_IS_OUT, &desc->flags, !gc->direction_input); - } } ret = of_gpiochip_add(gc); From dcb73cbaaeb39c9fd00bf2e019f911725945e2fe Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 11 Mar 2025 15:31:43 +0100 Subject: [PATCH 468/503] gpio: cdev: use raw notifier for line state events We use a notifier to implement the mechanism of informing the user-space about changes in GPIO line status. We register with the notifier when the GPIO character device file is opened and unregister when the last reference to the associated file descriptor is dropped. Since commit fcc8b637c542 ("gpiolib: switch the line state notifier to atomic") we use the atomic notifier variant. Atomic notifiers call rcu_synchronize in atomic_notifier_chain_unregister() which caused a significant performance regression in some circumstances, observed by user-space when calling close() on the GPIO device file descriptor. Replace the atomic notifier with the raw variant and provide synchronization with a read-write spinlock. Fixes: fcc8b637c542 ("gpiolib: switch the line state notifier to atomic") Reported-by: David Jander Closes: https://lore.kernel.org/all/20250311110034.53959031@erd003.prtnl/ Tested-by: David Jander Tested-by: Kent Gibson Link: https://lore.kernel.org/r/20250311-gpiolib-line-state-raw-notifier-v2-1-138374581e1e@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-cdev.c | 15 +++++++++------ drivers/gpio/gpiolib.c | 8 +++++--- drivers/gpio/gpiolib.h | 5 ++++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 40f76a90fd7db..107d75558b5a8 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -2729,8 +2729,9 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file) cdev->gdev = gpio_device_get(gdev); cdev->lineinfo_changed_nb.notifier_call = lineinfo_changed_notify; - ret = atomic_notifier_chain_register(&gdev->line_state_notifier, - &cdev->lineinfo_changed_nb); + scoped_guard(write_lock_irqsave, &gdev->line_state_lock) + ret = raw_notifier_chain_register(&gdev->line_state_notifier, + &cdev->lineinfo_changed_nb); if (ret) goto out_free_bitmap; @@ -2754,8 +2755,9 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file) blocking_notifier_chain_unregister(&gdev->device_notifier, &cdev->device_unregistered_nb); out_unregister_line_notifier: - atomic_notifier_chain_unregister(&gdev->line_state_notifier, - &cdev->lineinfo_changed_nb); + scoped_guard(write_lock_irqsave, &gdev->line_state_lock) + raw_notifier_chain_unregister(&gdev->line_state_notifier, + &cdev->lineinfo_changed_nb); out_free_bitmap: gpio_device_put(gdev); bitmap_free(cdev->watched_lines); @@ -2779,8 +2781,9 @@ static int gpio_chrdev_release(struct inode *inode, struct file *file) blocking_notifier_chain_unregister(&gdev->device_notifier, &cdev->device_unregistered_nb); - atomic_notifier_chain_unregister(&gdev->line_state_notifier, - &cdev->lineinfo_changed_nb); + scoped_guard(write_lock_irqsave, &gdev->line_state_lock) + raw_notifier_chain_unregister(&gdev->line_state_notifier, + &cdev->lineinfo_changed_nb); bitmap_free(cdev->watched_lines); gpio_device_put(gdev); kfree(cdev); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index de708d0818581..0c00ed2ab4315 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1025,7 +1025,8 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, } } - ATOMIC_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier); + rwlock_init(&gdev->line_state_lock); + RAW_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier); BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier); ret = init_srcu_struct(&gdev->srcu); @@ -4188,8 +4189,9 @@ EXPORT_SYMBOL_GPL(gpiod_set_array_value_cansleep); void gpiod_line_state_notify(struct gpio_desc *desc, unsigned long action) { - atomic_notifier_call_chain(&desc->gdev->line_state_notifier, - action, desc); + guard(read_lock_irqsave)(&desc->gdev->line_state_lock); + + raw_notifier_call_chain(&desc->gdev->line_state_notifier, action, desc); } /** diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 147156ec502b2..c129a03e20408 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,7 @@ * @list: links gpio_device:s together for traversal * @line_state_notifier: used to notify subscribers about lines being * requested, released or reconfigured + * @line_state_lock: RW-spinlock protecting the line state notifier * @line_state_wq: used to emit line state events from a separate thread in * process context * @device_notifier: used to notify character device wait queues about the GPIO @@ -72,7 +74,8 @@ struct gpio_device { const char *label; void *data; struct list_head list; - struct atomic_notifier_head line_state_notifier; + struct raw_notifier_head line_state_notifier; + rwlock_t line_state_lock; struct workqueue_struct *line_state_wq; struct blocking_notifier_head device_notifier; struct srcu_struct srcu; From 6edd78af9506bb182518da7f6feebd75655d9a0e Mon Sep 17 00:00:00 2001 From: Alexey Kashavkin Date: Sun, 2 Mar 2025 00:14:36 +0300 Subject: [PATCH 469/503] netfilter: nft_exthdr: fix offset with ipv4_find_option() There is an incorrect calculation in the offset variable which causes the nft_skb_copy_to_reg() function to always return -EFAULT. Adding the start variable is redundant. In the __ip_options_compile() function the correct offset is specified when finding the function. There is no need to add the size of the iphdr structure to the offset. Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") Signed-off-by: Alexey Kashavkin Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index b8d03364566c1..c74012c991255 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -85,7 +85,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; struct iphdr *iph, _iph; - unsigned int start; bool found = false; __be32 info; int optlen; @@ -93,7 +92,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (!iph) return -EBADMSG; - start = sizeof(struct iphdr); optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); if (optlen <= 0) @@ -103,7 +101,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, /* Copy the options since __ip_options_compile() modifies * the options. */ - if (skb_copy_bits(skb, start, opt->__data, optlen)) + if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen)) return -EBADMSG; opt->optlen = optlen; @@ -118,18 +116,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, found = target == IPOPT_SSRR ? opt->is_strictroute : !opt->is_strictroute; if (found) - *offset = opt->srr + start; + *offset = opt->srr; break; case IPOPT_RR: if (!opt->rr) break; - *offset = opt->rr + start; + *offset = opt->rr; found = true; break; case IPOPT_RA: if (!opt->router_alert) break; - *offset = opt->router_alert + start; + *offset = opt->router_alert; found = true; break; default: From 183185a18ff96751db52a46ccf93fff3a1f42815 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 7 Mar 2025 20:28:53 +0100 Subject: [PATCH 470/503] gre: Fix IPv6 link-local address generation. Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE devices in most cases and fall back to using add_v4_addrs() only in case the GRE configuration is incompatible with addrconf_addr_gen(). GRE used to use addrconf_addr_gen() until commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") restricted this use to gretap and ip6gretap devices, and created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones. The original problem came when commit 9af28511be10 ("addrconf: refuse isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its addr parameter was 0. The commit says that this would create an invalid address, however, I couldn't find any RFC saying that the generated interface identifier would be wrong. Anyway, since gre over IPv4 devices pass their local tunnel address to __ipv6_isatap_ifid(), that commit broke their IPv6 link-local address generation when the local address was unspecified. Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") tried to fix that case by defining add_v4_addrs() and calling it to generate the IPv6 link-local address instead of using addrconf_addr_gen() (apart for gretap and ip6gretap devices, which would still use the regular addrconf_addr_gen(), since they have a MAC address). That broke several use cases because add_v4_addrs() isn't properly integrated into the rest of IPv6 Neighbor Discovery code. Several of these shortcomings have been fixed over time, but add_v4_addrs() remains broken on several aspects. In particular, it doesn't send any Router Sollicitations, so the SLAAC process doesn't start until the interface receives a Router Advertisement. Also, add_v4_addrs() mostly ignores the address generation mode of the interface (/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases. Fix the situation by using add_v4_addrs() only in the specific scenario where the normal method would fail. That is, for interfaces that have all of the following characteristics: * run over IPv4, * transport IP packets directly, not Ethernet (that is, not gretap interfaces), * tunnel endpoint is INADDR_ANY (that is, 0), * device address generation mode is EUI64. In all other cases, revert back to the regular addrconf_addr_gen(). Also, remove the special case for ip6gre interfaces in add_v4_addrs(), since ip6gre devices now always use addrconf_addr_gen() instead. Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/559c32ce5c9976b269e6337ac9abb6a96abe5096.1741375285.git.gnault@redhat.com Signed-off-by: Paolo Abeni --- net/ipv6/addrconf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ac8cc10765360..8b6258819dade 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3209,16 +3209,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3529,7 +3526,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } From 6f50175ccad4278ed3a9394c00b797b75441bd6e Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 7 Mar 2025 20:28:58 +0100 Subject: [PATCH 471/503] selftests: Add IPv6 link-local address generation tests for GRE devices. GRE devices have their special code for IPv6 link-local address generation that has been the source of several regressions in the past. Add selftest to check that all gre, ip6gre, gretap and ip6gretap get an IPv6 link-link local address in accordance with the net.ipv6.conf..addr_gen_mode sysctl. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: Petr Machata Link: https://patch.msgid.link/2d6772af8e1da9016b2180ec3f8d9ee99f470c77.1741375285.git.gnault@redhat.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/Makefile | 1 + .../testing/selftests/net/gre_ipv6_lladdr.sh | 177 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100755 tools/testing/selftests/net/gre_ipv6_lladdr.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 73ee88d6b0430..5916f3b81c39f 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,6 +31,7 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 0000000000000..5b34f6e1f8314 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 From a1e64addf3ff9257b45b78bc7d743781c3f41340 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sat, 8 Mar 2025 01:45:59 +0100 Subject: [PATCH 472/503] net: openvswitch: remove misbehaving actions length check The actions length check is unreliable and produces different results depending on the initial length of the provided netlink attribute and the composition of the actual actions inside of it. For example, a user can add 4088 empty clone() actions without triggering -EMSGSIZE, on attempt to add 4089 such actions the operation will fail with the -EMSGSIZE verdict. However, if another 16 KB of other actions will be *appended* to the previous 4089 clone() actions, the check passes and the flow is successfully installed into the openvswitch datapath. The reason for a such a weird behavior is the way memory is allocated. When ovs_flow_cmd_new() is invoked, it calls ovs_nla_copy_actions(), that in turn calls nla_alloc_flow_actions() with either the actual length of the user-provided actions or the MAX_ACTIONS_BUFSIZE. The function adds the size of the sw_flow_actions structure and then the actually allocated memory is rounded up to the closest power of two. So, if the user-provided actions are larger than MAX_ACTIONS_BUFSIZE, then MAX_ACTIONS_BUFSIZE + sizeof(*sfa) rounded up is 32K + 24 -> 64K. Later, while copying individual actions, we look at ksize(), which is 64K, so this way the MAX_ACTIONS_BUFSIZE check is not actually triggered and the user can easily allocate almost 64 KB of actions. However, when the initial size is less than MAX_ACTIONS_BUFSIZE, but the actions contain ones that require size increase while copying (such as clone() or sample()), then the limit check will be performed during the reserve_sfa_size() and the user will not be allowed to create actions that yield more than 32 KB internally. This is one part of the problem. The other part is that it's not actually possible for the userspace application to know beforehand if the particular set of actions will be rejected or not. Certain actions require more space in the internal representation, e.g. an empty clone() takes 4 bytes in the action list passed in by the user, but it takes 12 bytes in the internal representation due to an extra nested attribute, and some actions require less space in the internal representations, e.g. set(tunnel(..)) normally takes 64+ bytes in the action list provided by the user, but only needs to store a single pointer in the internal implementation, since all the data is stored in the tunnel_info structure instead. And the action size limit is applied to the internal representation, not to the action list passed by the user. So, it's not possible for the userpsace application to predict if the certain combination of actions will be rejected or not, because it is not possible for it to calculate how much space these actions will take in the internal representation without knowing kernel internals. All that is causing random failures in ovs-vswitchd in userspace and inability to handle certain traffic patterns as a result. For example, it is reported that adding a bit more than a 1100 VMs in an OpenStack setup breaks the network due to OVS not being able to handle ARP traffic anymore in some cases (it tries to install a proper datapath flow, but the kernel rejects it with -EMSGSIZE, even though the action list isn't actually that large.) Kernel behavior must be consistent and predictable in order for the userspace application to use it in a reasonable way. ovs-vswitchd has a mechanism to re-direct parts of the traffic and partially handle it in userspace if the required action list is oversized, but that doesn't work properly if we can't actually tell if the action list is oversized or not. Solution for this is to check the size of the user-provided actions instead of the internal representation. This commit just removes the check from the internal part because there is already an implicit size check imposed by the netlink protocol. The attribute can't be larger than 64 KB. Realistically, we could reduce the limit to 32 KB, but we'll be risking to break some existing setups that rely on the fact that it's possible to create nearly 64 KB action lists today. Vast majority of flows in real setups are below 100-ish bytes. So removal of the limit will not change real memory consumption on the system. The absolutely worst case scenario is if someone adds a flow with 64 KB of empty clone() actions. That will yield a 192 KB in the internal representation consuming 256 KB block of memory. However, that list of actions is not meaningful and also a no-op. Real world very large action lists (that can occur for a rare cases of BUM traffic handling) are unlikely to contain a large number of clones and will likely have a lot of tunnel attributes making the internal representation comparable in size to the original action list. So, it should be fine to just remove the limit. Commit in the 'Fixes' tag is the first one that introduced the difference between internal representation and the user-provided action lists, but there were many more afterwards that lead to the situation we have today. Fixes: 7d5437c709de ("openvswitch: Add tunneling interface.") Signed-off-by: Ilya Maximets Reviewed-by: Aaron Conole Link: https://patch.msgid.link/20250308004609.2881861-1-i.maximets@ovn.org Signed-off-by: Paolo Abeni --- net/openvswitch/flow_netlink.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 881ddd3696d54..95e0dd14dc1a3 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2317,14 +2317,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) OVS_FLOW_ATTR_MASK, true, skb); } -#define MAX_ACTIONS_BUFSIZE (32 * 1024) - static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); - sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); @@ -2480,15 +2476,6 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); - if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size exceeds max %u", - MAX_ACTIONS_BUFSIZE); - return ERR_PTR(-EMSGSIZE); - } - new_acts_size = MAX_ACTIONS_BUFSIZE; - } - acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return ERR_CAST(acts); @@ -3545,7 +3532,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, int err; u32 mpls_label_count = 0; - *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); + *sfa = nla_alloc_flow_actions(nla_len(attr)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); From 1063ae07383c0ddc5bcce170260c143825846b03 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 8 Mar 2025 13:05:43 -0500 Subject: [PATCH 473/503] Revert "openvswitch: switch to per-action label counting in conntrack" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, ovs_ct_set_labels() is only called for confirmed conntrack entries (ct) within ovs_ct_commit(). However, if the conntrack entry does not have the labels_ext extension, attempting to allocate it in ovs_ct_get_conn_labels() for a confirmed entry triggers a warning in nf_ct_ext_add(): WARN_ON(nf_ct_is_confirmed(ct)); This happens when the conntrack entry is created externally before OVS increments net->ct.labels_used. The issue has become more likely since commit fcb1aa5163b1 ("openvswitch: switch to per-action label counting in conntrack"), which changed to use per-action label counting and increment net->ct.labels_used when a flow with ct action is added. Since there’s no straightforward way to fully resolve this issue at the moment, this reverts the commit to avoid breaking existing use cases. Fixes: fcb1aa5163b1 ("openvswitch: switch to per-action label counting in conntrack") Reported-by: Jianbo Liu Signed-off-by: Xin Long Acked-by: Aaron Conole Link: https://patch.msgid.link/1bdeb2f3a812bca016a225d3de714427b2cd4772.1741457143.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni --- net/openvswitch/conntrack.c | 30 ++++++++++++++++++------------ net/openvswitch/datapath.h | 3 +++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3bb4810234aac..e573e92213029 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1368,8 +1368,11 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) attr == OVS_KEY_ATTR_CT_MARK) return true; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - attr == OVS_KEY_ATTR_CT_LABELS) - return true; + attr == OVS_KEY_ATTR_CT_LABELS) { + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + return ovs_net->xt_label; + } return false; } @@ -1378,7 +1381,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log) { - unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_conntrack_info ct_info; const char *helper = NULL; u16 family; @@ -1407,12 +1409,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, return -ENOMEM; } - if (nf_connlabels_get(net, n_bits - 1)) { - nf_ct_tmpl_free(ct_info.ct); - OVS_NLERR(log, "Failed to set connlabel length"); - return -EOPNOTSUPP; - } - if (ct_info.timeout[0]) { if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, ct_info.timeout)) @@ -1581,7 +1577,6 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) if (ct_info->ct) { if (ct_info->timeout[0]) nf_ct_destroy_timeout(ct_info->ct); - nf_connlabels_put(nf_ct_net(ct_info->ct)); nf_ct_tmpl_free(ct_info->ct); } } @@ -2006,9 +2001,17 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = { int ovs_ct_init(struct net *net) { -#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) + unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + if (nf_connlabels_get(net, n_bits - 1)) { + ovs_net->xt_label = false; + OVS_NLERR(true, "Failed to set connlabel length"); + } else { + ovs_net->xt_label = true; + } + +#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) return ovs_ct_limit_init(net, ovs_net); #else return 0; @@ -2017,9 +2020,12 @@ int ovs_ct_init(struct net *net) void ovs_ct_exit(struct net *net) { -#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_net *ovs_net = net_generic(net, ovs_net_id); +#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) ovs_ct_limit_exit(net, ovs_net); #endif + + if (ovs_net->xt_label) + nf_connlabels_put(net); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 365b9bb7f546e..9ca6231ea6470 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -160,6 +160,9 @@ struct ovs_net { #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_ct_limit_info *ct_limit_info; #endif + + /* Module reference for configuring conntrack. */ + bool xt_label; }; /** From 03ebae19925519cca5b314443c6082e0aeaa6321 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 11 Mar 2025 00:01:39 +0200 Subject: [PATCH 474/503] net/mlx5: DR, use the right action structs for STEv3 Some actions in ConnectX-8 (STEv3) have different structure, and they are handled separately in ste_ctx_v3. This separate handling was missing two actions: INSERT_HDR and REMOVE_HDR, which broke SWS for Linux Bridge. This patch resolves the issue by introducing dedicated callbacks for the insert and remove header functions, with version-specific implementations for each STE variant. Fixes: 4d617b57574f ("net/mlx5: DR, add support for ConnectX-8 steering") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1741644104-97767-2-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../mellanox/mlx5/core/steering/sws/dr_ste.h | 4 ++ .../mlx5/core/steering/sws/dr_ste_v1.c | 52 ++++++++++--------- .../mlx5/core/steering/sws/dr_ste_v1.h | 4 ++ .../mlx5/core/steering/sws/dr_ste_v2.c | 2 + .../mlx5/core/steering/sws/dr_ste_v3.c | 42 +++++++++++++++ 5 files changed, 79 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h index 5f409dc30aca8..3d5afc832fa56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h @@ -210,6 +210,10 @@ struct mlx5dr_ste_ctx { void (*set_encap_l3)(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action, u32 reformat_id, int size); + void (*set_insert_hdr)(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, + u8 anchor, u8 offset, int size); + void (*set_remove_hdr)(u8 *hw_ste_p, u8 *s_action, u8 anchor, + u8 offset, int size); /* Send */ void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c index 7f83d77c43ef0..6447efbae00dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c @@ -266,10 +266,10 @@ void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, int size) dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, - u32 reformat_id, - u8 anchor, u8 offset, - int size) +void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, + u8 anchor, u8 offset, + int size) { MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); @@ -286,9 +286,9 @@ static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, - u8 anchor, u8 offset, - int size) +void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, + u8 anchor, u8 offset, + int size) { MLX5_SET(ste_single_action_remove_header_size_v1, s_action, action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); @@ -584,11 +584,11 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_insert_hdr(last_ste, action, - attr->reformat.id, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) { @@ -597,10 +597,10 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_remove_hdr(last_ste, action, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; } @@ -792,11 +792,11 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_insert_hdr(last_ste, action, - attr->reformat.id, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; allow_modify_hdr = false; @@ -808,10 +808,10 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, allow_modify_hdr = true; allow_ctr = true; } - dr_ste_v1_set_remove_hdr(last_ste, action, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; } @@ -2200,6 +2200,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = { .set_pop_vlan = &dr_ste_v1_set_pop_vlan, .set_rx_decap = &dr_ste_v1_set_rx_decap, .set_encap_l3 = &dr_ste_v1_set_encap_l3, + .set_insert_hdr = &dr_ste_v1_set_insert_hdr, + .set_remove_hdr = &dr_ste_v1_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h index a8d9e308d3392..591c20c95a6ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h @@ -156,6 +156,10 @@ void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num); void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action, u32 reformat_id, int size); void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action); +void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, + u8 anchor, u8 offset, int size); +void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, u8 anchor, + u8 offset, int size); void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c index 0882dba0f64b9..d0ebaf820d42b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c @@ -69,6 +69,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = { .set_pop_vlan = &dr_ste_v1_set_pop_vlan, .set_rx_decap = &dr_ste_v1_set_rx_decap, .set_encap_l3 = &dr_ste_v1_set_encap_l3, + .set_insert_hdr = &dr_ste_v1_set_insert_hdr, + .set_remove_hdr = &dr_ste_v1_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c index cc60ce1d274ef..e468a9ae44e8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c @@ -79,6 +79,46 @@ static void dr_ste_v3_set_rx_decap(u8 *hw_ste_p, u8 *s_action) dr_ste_v1_set_reparse(hw_ste_p); } +static void dr_ste_v3_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, u8 anchor, + u8 offset, int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + start_offset, offset / 2); + + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + attributes, DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, + u8 anchor, u8 offset, int size) +{ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + remove_size, size / 2); + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + start_offset, offset / 2); + + dr_ste_v1_set_reparse(hw_ste_p); +} + static int dr_ste_v3_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action, u32 hw_action_sz, @@ -211,6 +251,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v3 = { .set_pop_vlan = &dr_ste_v3_set_pop_vlan, .set_rx_decap = &dr_ste_v3_set_rx_decap, .set_encap_l3 = &dr_ste_v3_set_encap_l3, + .set_insert_hdr = &dr_ste_v3_set_insert_hdr, + .set_remove_hdr = &dr_ste_v3_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; From 521992337f67f71ce4436b98bc32563ddb1a5ce3 Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Tue, 11 Mar 2025 00:01:40 +0200 Subject: [PATCH 475/503] net/mlx5: HWS, Rightsize bwc matcher priority The bwc layer was clamping the matcher priority from 32 bits to 16 bits. This didn't show up until a matcher was resized, since the initial native matcher was created using the correct 32 bit value. The fix also reorders fields to avoid some padding. Fixes: 2111bb970c78 ("net/mlx5: HWS, added backward-compatible API handling") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1741644104-97767-3-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index f9f569131ddeb..47f7ed1415535 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -24,8 +24,8 @@ struct mlx5hws_bwc_matcher { struct mlx5hws_matcher *matcher; struct mlx5hws_match_template *mt; struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM]; + u32 priority; u8 num_of_at; - u16 priority; u8 size_log; atomic_t num_of_rules; struct list_head *rules; From 32d2724db5b2361ab293427ccd5c24f4f2bcca14 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 11 Mar 2025 00:01:41 +0200 Subject: [PATCH 476/503] net/mlx5: Fix incorrect IRQ pool usage when releasing IRQs mlx5_irq_pool_get() is a getter for completion IRQ pool only. However, after the cited commit, mlx5_irq_pool_get() is called during ctrl IRQ release flow to retrieve the pool, resulting in the use of an incorrect IRQ pool. Hence, use the newly introduced mlx5_irq_get_pool() getter to retrieve the correct IRQ pool based on the IRQ itself. While at it, rename mlx5_irq_pool_get() to mlx5_irq_table_get_comp_irq_pool() which accurately reflects its purpose and improves code readability. Fixes: 0477d5168bbb ("net/mlx5: Expose SFs IRQs") Signed-off-by: Shay Drory Reviewed-by: Maher Sanalla Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741644104-97767-4-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- .../net/ethernet/mellanox/mlx5/core/irq_affinity.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 13 ++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h | 2 +- 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 2b229b6226c6a..dfb079e59d858 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -871,8 +871,8 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { + struct mlx5_irq_pool *pool = mlx5_irq_table_get_comp_irq_pool(dev); struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 1477db7f5307e..2691d88cdee1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -175,7 +175,7 @@ mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool, void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) { - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + struct mlx5_irq_pool *pool = mlx5_irq_get_pool(irq); int cpu; cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index 0881e961d8b17..586688da9940e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -10,12 +10,15 @@ struct mlx5_irq; struct cpu_rmap; +struct mlx5_irq_pool; int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_irq_table_create(struct mlx5_core_dev *dev); void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev); +struct mlx5_irq_pool * +mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev); int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table); int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table); struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); @@ -38,7 +41,6 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); int mlx5_irq_get_index(struct mlx5_irq *irq); int mlx5_irq_get_irq(const struct mlx5_irq *irq); -struct mlx5_irq_pool; #ifdef CONFIG_MLX5_SF struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, struct cpumask *used_cpus, u16 vecidx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index d9362eabc6a1c..2c5f850c31f68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -378,6 +378,11 @@ int mlx5_irq_get_index(struct mlx5_irq *irq) return irq->map.index; } +struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq) +{ + return irq->pool; +} + /* irq_pool API */ /* requesting an irq from a given pool according to given index */ @@ -405,18 +410,20 @@ static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_tab return irq_table->sf_ctrl_pool; } -static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) +static struct mlx5_irq_pool * +sf_comp_irq_pool_get(struct mlx5_irq_table *irq_table) { return irq_table->sf_comp_pool; } -struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) +struct mlx5_irq_pool * +mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev) { struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool = NULL; if (mlx5_core_is_sf(dev)) - pool = sf_irq_pool_get(irq_table); + pool = sf_comp_irq_pool_get(irq_table); /* In some configs, there won't be a pool of SFs IRQs. Hence, returning * the PF IRQs pool in case the SF pool doesn't exist. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h index c4d377f8df308..cc064425fe160 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -28,7 +28,6 @@ struct mlx5_irq_pool { struct mlx5_core_dev *dev; }; -struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev); static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) { return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); @@ -40,5 +39,6 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, int mlx5_irq_get_locked(struct mlx5_irq *irq); int mlx5_irq_read_locked(struct mlx5_irq *irq); int mlx5_irq_put(struct mlx5_irq *irq); +struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq); #endif /* __PCI_IRQ_H__ */ From 32966984bee1defd9f5a8f9be274d7c32f911ba1 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 11 Mar 2025 00:01:42 +0200 Subject: [PATCH 477/503] net/mlx5: Lag, Check shared fdb before creating MultiPort E-Switch Currently, MultiPort E-Switch is requesting to create a LAG with shared FDB without checking the LAG is supporting shared FDB. Add the check. Fixes: a32327a3a02c ("net/mlx5: Lag, Control MultiPort E-Switch single FDB mode") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741644104-97767-5-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index cea5aa314f6c5..ed2ba272946b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -951,7 +951,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); } -static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) { int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev; @@ -1038,7 +1038,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) } if (do_bond && !__mlx5_lag_is_active(ldev)) { - bool shared_fdb = mlx5_shared_fdb_supported(ldev); + bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev); roce_lag = mlx5_lag_is_roce_lag(ldev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 01cf723669471..c2f256bb2bc20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -92,6 +92,7 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev) return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); } +bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev); bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index ffac0bd6c8952..1770297a112e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -83,7 +83,8 @@ static int enable_mpesw(struct mlx5_lag *ldev) if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || - !mlx5_lag_check_prereq(ldev)) + !mlx5_lag_check_prereq(ldev) || + !mlx5_lag_shared_fdb_supported(ldev)) return -EOPNOTSUPP; err = mlx5_mpesw_metadata_set(ldev); From 4b8eeed4fb105770ce6dc84a2c6ef953c7b71cbb Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 11 Mar 2025 00:01:43 +0200 Subject: [PATCH 478/503] net/mlx5: Bridge, fix the crash caused by LAG state check When removing LAG device from bridge, NETDEV_CHANGEUPPER event is triggered. Driver finds the lower devices (PFs) to flush all the offloaded entries. And mlx5_lag_is_shared_fdb is checked, it returns false if one of PF is unloaded. In such case, mlx5_esw_bridge_lag_rep_get() and its caller return NULL, instead of the alive PF, and the flush is skipped. Besides, the bridge fdb entry's lastuse is updated in mlx5 bridge event handler. But this SWITCHDEV_FDB_ADD_TO_BRIDGE event can be ignored in this case because the upper interface for bond is deleted, and the entry will never be aged because lastuse is never updated. To make things worse, as the entry is alive, mlx5 bridge workqueue keeps sending that event, which is then handled by kernel bridge notifier. It causes the following crash when accessing the passed bond netdev which is already destroyed. To fix this issue, remove such checks. LAG state is already checked in commit 15f8f168952f ("net/mlx5: Bridge, verify LAG state when adding bond to bridge"), driver still need to skip offload if LAG becomes invalid state after initialization. Oops: stack segment: 0000 [#1] SMP CPU: 3 UID: 0 PID: 23695 Comm: kworker/u40:3 Tainted: G OE 6.11.0_mlnx #1 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_bridge_wq mlx5_esw_bridge_update_work [mlx5_core] RIP: 0010:br_switchdev_event+0x2c/0x110 [bridge] Code: 44 00 00 48 8b 02 48 f7 00 00 02 00 00 74 69 41 54 55 53 48 83 ec 08 48 8b a8 08 01 00 00 48 85 ed 74 4a 48 83 fe 02 48 89 d3 <4c> 8b 65 00 74 23 76 49 48 83 fe 05 74 7e 48 83 fe 06 75 2f 0f b7 RSP: 0018:ffffc900092cfda0 EFLAGS: 00010297 RAX: ffff888123bfe000 RBX: ffffc900092cfe08 RCX: 00000000ffffffff RDX: ffffc900092cfe08 RSI: 0000000000000001 RDI: ffffffffa0c585f0 RBP: 6669746f6e690a30 R08: 0000000000000000 R09: ffff888123ae92c8 R10: 0000000000000000 R11: fefefefefefefeff R12: ffff888123ae9c60 R13: 0000000000000001 R14: ffffc900092cfe08 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88852c980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f15914c8734 CR3: 0000000002830005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __die_body+0x1a/0x60 ? die+0x38/0x60 ? do_trap+0x10b/0x120 ? do_error_trap+0x64/0xa0 ? exc_stack_segment+0x33/0x50 ? asm_exc_stack_segment+0x22/0x30 ? br_switchdev_event+0x2c/0x110 [bridge] ? sched_balance_newidle.isra.149+0x248/0x390 notifier_call_chain+0x4b/0xa0 atomic_notifier_call_chain+0x16/0x20 mlx5_esw_bridge_update+0xec/0x170 [mlx5_core] mlx5_esw_bridge_update_work+0x19/0x40 [mlx5_core] process_scheduled_works+0x81/0x390 worker_thread+0x106/0x250 ? bh_worker+0x110/0x110 kthread+0xb7/0xe0 ? kthread_park+0x80/0x80 ret_from_fork+0x2d/0x50 ? kthread_park+0x80/0x80 ret_from_fork_asm+0x11/0x20 Fixes: ff9b7521468b ("net/mlx5: Bridge, support LAG") Signed-off-by: Jianbo Liu Reviewed-by: Vlad Buslov Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741644104-97767-6-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 5d128c5b4529a..0f5d7ea8956f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -48,15 +48,10 @@ mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw) struct list_head *iter; netdev_for_each_lower_dev(dev, lower, iter) { - struct mlx5_core_dev *mdev; - struct mlx5e_priv *priv; - if (!mlx5e_eswitch_rep(lower)) continue; - priv = netdev_priv(lower); - mdev = priv->mdev; - if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw)) + if (mlx5_esw_bridge_dev_same_esw(lower, esw)) return lower; } @@ -125,7 +120,7 @@ static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device * priv = netdev_priv(rep); mdev = priv->mdev; if (netif_is_lag_master(dev)) - return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev); + return mlx5_lag_is_master(mdev); return true; } @@ -455,6 +450,9 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, if (!rep) return NOTIFY_DONE; + if (netif_is_lag_master(dev) && !mlx5_lag_is_shared_fdb(esw->dev)) + return NOTIFY_DONE; + switch (event) { case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = container_of(info, From e92df790d07a8eea873efcb84776e7b71f81c7d5 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 11 Mar 2025 00:01:44 +0200 Subject: [PATCH 479/503] net/mlx5e: Prevent bridge link show failure for non-eswitch-allowed devices mlx5_eswitch_get_vepa returns -EPERM if the device lacks eswitch_manager capability, blocking mlx5e_bridge_getlink from retrieving VEPA mode. Since mlx5e_bridge_getlink implements ndo_bridge_getlink, returning -EPERM causes bridge link show to fail instead of skipping devices without this capability. To avoid this, return -EOPNOTSUPP from mlx5e_bridge_getlink when mlx5_eswitch_get_vepa fails, ensuring the command continues processing other devices while ignoring those without the necessary capability. Fixes: 4b89251de024 ("net/mlx5: Support ndo bridge_setlink and getlink") Signed-off-by: Carolina Jubran Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741644104-97767-7-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a814b63ed97e5..8fcaee381b0e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5132,11 +5132,9 @@ static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; u8 mode, setting; - int err; - err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); - if (err) - return err; + if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting)) + return -EOPNOTSUPP; mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, From 3e64bb2ae7d9f2b3a8259d4d6b86ed1984d5460a Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Tue, 11 Mar 2025 03:17:40 -0700 Subject: [PATCH 480/503] net: mana: cleanup mana struct after debugfs_remove() When on a MANA VM hibernation is triggered, as part of hibernate_snapshot(), mana_gd_suspend() and mana_gd_resume() are called. If during this mana_gd_resume(), a failure occurs with HWC creation, mana_port_debugfs pointer does not get reinitialized and ends up pointing to older, cleaned-up dentry. Further in the hibernation path, as part of power_down(), mana_gd_shutdown() is triggered. This call, unaware of the failures in resume, tries to cleanup the already cleaned up mana_port_debugfs value and hits the following bug: [ 191.359296] mana 7870:00:00.0: Shutdown was called [ 191.359918] BUG: kernel NULL pointer dereference, address: 0000000000000098 [ 191.360584] #PF: supervisor write access in kernel mode [ 191.361125] #PF: error_code(0x0002) - not-present page [ 191.361727] PGD 1080ea067 P4D 0 [ 191.362172] Oops: Oops: 0002 [#1] SMP NOPTI [ 191.362606] CPU: 11 UID: 0 PID: 1674 Comm: bash Not tainted 6.14.0-rc5+ #2 [ 191.363292] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 11/21/2024 [ 191.364124] RIP: 0010:down_write+0x19/0x50 [ 191.364537] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb e8 de cd ff ff 31 c0 ba 01 00 00 00 48 0f b1 13 75 16 65 48 8b 05 88 24 4c 6a 48 89 43 08 48 8b 5d [ 191.365867] RSP: 0000:ff45fbe0c1c037b8 EFLAGS: 00010246 [ 191.366350] RAX: 0000000000000000 RBX: 0000000000000098 RCX: ffffff8100000000 [ 191.366951] RDX: 0000000000000001 RSI: 0000000000000064 RDI: 0000000000000098 [ 191.367600] RBP: ff45fbe0c1c037c0 R08: 0000000000000000 R09: 0000000000000001 [ 191.368225] R10: ff45fbe0d2b01000 R11: 0000000000000008 R12: 0000000000000000 [ 191.368874] R13: 000000000000000b R14: ff43dc27509d67c0 R15: 0000000000000020 [ 191.369549] FS: 00007dbc5001e740(0000) GS:ff43dc663f380000(0000) knlGS:0000000000000000 [ 191.370213] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 191.370830] CR2: 0000000000000098 CR3: 0000000168e8e002 CR4: 0000000000b73ef0 [ 191.371557] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 191.372192] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 191.372906] Call Trace: [ 191.373262] [ 191.373621] ? show_regs+0x64/0x70 [ 191.374040] ? __die+0x24/0x70 [ 191.374468] ? page_fault_oops+0x290/0x5b0 [ 191.374875] ? do_user_addr_fault+0x448/0x800 [ 191.375357] ? exc_page_fault+0x7a/0x160 [ 191.375971] ? asm_exc_page_fault+0x27/0x30 [ 191.376416] ? down_write+0x19/0x50 [ 191.376832] ? down_write+0x12/0x50 [ 191.377232] simple_recursive_removal+0x4a/0x2a0 [ 191.377679] ? __pfx_remove_one+0x10/0x10 [ 191.378088] debugfs_remove+0x44/0x70 [ 191.378530] mana_detach+0x17c/0x4f0 [ 191.378950] ? __flush_work+0x1e2/0x3b0 [ 191.379362] ? __cond_resched+0x1a/0x50 [ 191.379787] mana_remove+0xf2/0x1a0 [ 191.380193] mana_gd_shutdown+0x3b/0x70 [ 191.380642] pci_device_shutdown+0x3a/0x80 [ 191.381063] device_shutdown+0x13e/0x230 [ 191.381480] kernel_power_off+0x35/0x80 [ 191.381890] hibernate+0x3c6/0x470 [ 191.382312] state_store+0xcb/0xd0 [ 191.382734] kobj_attr_store+0x12/0x30 [ 191.383211] sysfs_kf_write+0x3e/0x50 [ 191.383640] kernfs_fop_write_iter+0x140/0x1d0 [ 191.384106] vfs_write+0x271/0x440 [ 191.384521] ksys_write+0x72/0xf0 [ 191.384924] __x64_sys_write+0x19/0x20 [ 191.385313] x64_sys_call+0x2b0/0x20b0 [ 191.385736] do_syscall_64+0x79/0x150 [ 191.386146] ? __mod_memcg_lruvec_state+0xe7/0x240 [ 191.386676] ? __lruvec_stat_mod_folio+0x79/0xb0 [ 191.387124] ? __pfx_lru_add+0x10/0x10 [ 191.387515] ? queued_spin_unlock+0x9/0x10 [ 191.387937] ? do_anonymous_page+0x33c/0xa00 [ 191.388374] ? __handle_mm_fault+0xcf3/0x1210 [ 191.388805] ? __count_memcg_events+0xbe/0x180 [ 191.389235] ? handle_mm_fault+0xae/0x300 [ 191.389588] ? do_user_addr_fault+0x559/0x800 [ 191.390027] ? irqentry_exit_to_user_mode+0x43/0x230 [ 191.390525] ? irqentry_exit+0x1d/0x30 [ 191.390879] ? exc_page_fault+0x86/0x160 [ 191.391235] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 191.391745] RIP: 0033:0x7dbc4ff1c574 [ 191.392111] Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 [ 191.393412] RSP: 002b:00007ffd95a23ab8 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 [ 191.393990] RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007dbc4ff1c574 [ 191.394594] RDX: 0000000000000005 RSI: 00005a6eeadb0ce0 RDI: 0000000000000001 [ 191.395215] RBP: 00007ffd95a23ae0 R08: 00007dbc50003b20 R09: 0000000000000000 [ 191.395805] R10: 0000000000000001 R11: 0000000000000202 R12: 0000000000000005 [ 191.396404] R13: 00005a6eeadb0ce0 R14: 00007dbc500045c0 R15: 00007dbc50001ee0 [ 191.396987] To fix this, we explicitly set such mana debugfs variables to NULL after debugfs_remove() is called. Fixes: 6607c17c6c5e ("net: mana: Enable debugfs files for MANA device") Cc: stable@vger.kernel.org Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Reviewed-by: Michal Kubiak Link: https://patch.msgid.link/1741688260-28922-1-git-send-email-shradhagupta@linux.microsoft.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 11 ++++++++++- drivers/net/ethernet/microsoft/mana/mana_en.c | 10 ++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index be95336ce089a..11457b6296cc0 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1547,6 +1547,7 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * adapter-MTU file and apc->mana_pci_debugfs folder. */ debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; pci_iounmap(pdev, bar0_va); free_gc: pci_set_drvdata(pdev, NULL); @@ -1569,6 +1570,8 @@ static void mana_gd_remove(struct pci_dev *pdev) debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; + pci_iounmap(pdev, gc->bar0_va); vfree(gc); @@ -1622,6 +1625,8 @@ static void mana_gd_shutdown(struct pci_dev *pdev) debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; + pci_disable_device(pdev); } @@ -1648,8 +1653,10 @@ static int __init mana_driver_init(void) mana_debugfs_root = debugfs_create_dir("mana", NULL); err = pci_register_driver(&mana_driver); - if (err) + if (err) { debugfs_remove(mana_debugfs_root); + mana_debugfs_root = NULL; + } return err; } @@ -1659,6 +1666,8 @@ static void __exit mana_driver_exit(void) pci_unregister_driver(&mana_driver); debugfs_remove(mana_debugfs_root); + + mana_debugfs_root = NULL; } module_init(mana_driver_init); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index aa1e47233fe50..ae76ecc7a5d36 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -738,12 +738,11 @@ static const struct net_device_ops mana_devops = { static void mana_cleanup_port_context(struct mana_port_context *apc) { /* - * at this point all dir/files under the vport directory - * are already cleaned up. - * We are sure the apc->mana_port_debugfs remove will not - * cause any freed memory access issues + * make sure subsequent cleanup attempts don't end up removing already + * cleaned dentry pointer */ debugfs_remove(apc->mana_port_debugfs); + apc->mana_port_debugfs = NULL; kfree(apc->rxqs); apc->rxqs = NULL; } @@ -1254,6 +1253,7 @@ static void mana_destroy_eq(struct mana_context *ac) return; debugfs_remove_recursive(ac->mana_eqs_debugfs); + ac->mana_eqs_debugfs = NULL; for (i = 0; i < gc->max_num_queues; i++) { eq = ac->eqs[i].eq; @@ -1914,6 +1914,7 @@ static void mana_destroy_txq(struct mana_port_context *apc) for (i = 0; i < apc->num_queues; i++) { debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs); + apc->tx_qp[i].mana_tx_debugfs = NULL; napi = &apc->tx_qp[i].tx_cq.napi; if (apc->tx_qp[i].txq.napi_initialized) { @@ -2099,6 +2100,7 @@ static void mana_destroy_rxq(struct mana_port_context *apc, return; debugfs_remove_recursive(rxq->mana_rx_debugfs); + rxq->mana_rx_debugfs = NULL; napi = &rxq->rx_cq.napi; From 4490fe973669360efaef7350aeb9706f70164176 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 5 Mar 2025 21:44:02 -0600 Subject: [PATCH 481/503] platform/x86/amd: pmf: Fix missing hidden options for Smart PC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amd_pmf_get_slider_info() checks the current profile to report correct value to the TA inputs. If hidden options are in use then the wrong values will be reported to TA. Add the two compat options PLATFORM_PROFILE_BALANCED_PERFORMANCE and PLATFORM_PROFILE_QUIET for this use. Reported-by: Yijun Shen Fixes: 9a43102daf64d ("platform/x86/amd: pmf: Add balanced-performance to hidden choices") Fixes: 44e94fece5170 ("platform/x86/amd: pmf: Add 'quiet' to hidden choices") Signed-off-by: Mario Limonciello Acked-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250306034402.50478-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/spc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index f34f3130c3307..1d90f9382024b 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -219,12 +219,14 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ switch (dev->current_profile) { case PLATFORM_PROFILE_PERFORMANCE: + case PLATFORM_PROFILE_BALANCED_PERFORMANCE: val = TA_BEST_PERFORMANCE; break; case PLATFORM_PROFILE_BALANCED: val = TA_BETTER_PERFORMANCE; break; case PLATFORM_PROFILE_LOW_POWER: + case PLATFORM_PROFILE_QUIET: val = TA_BEST_BATTERY; break; default: From fc99045effa81fdf509c2a97cbb7e6e8f2fd4443 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 12 Mar 2025 10:51:31 -0300 Subject: [PATCH 482/503] smb: client: fix regression with guest option When mounting a CIFS share with 'guest' mount option, mount.cifs(8) will set empty password= and password2= options. Currently we only handle empty strings from user= and password= options, so the mount will fail with cifs: Bad value for 'password2' Fix this by handling empty string from password2= option as well. Link: https://bbs.archlinux.org/viewtopic.php?id=303927 Reported-by: Adam Williamson Closes: https://lore.kernel.org/r/83c00b5fea81c07f6897a5dd3ef50fd3b290f56c.camel@redhat.com Fixes: 35f834265e0d ("smb3: fix broken reconnect when password changing on the server by allowing password rotation") Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index e9b286d9a7ba3..457452b4d42da 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -171,6 +171,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_string("username", Opt_user), fsparam_string("pass", Opt_pass), fsparam_string("password", Opt_pass), + fsparam_string("pass2", Opt_pass2), fsparam_string("password2", Opt_pass2), fsparam_string("ip", Opt_ip), fsparam_string("addr", Opt_ip), @@ -1131,6 +1132,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, } else if (!strcmp("user", param->key) || !strcmp("username", param->key)) { skip_parsing = true; opt = Opt_user; + } else if (!strcmp("pass2", param->key) || !strcmp("password2", param->key)) { + skip_parsing = true; + opt = Opt_pass2; } } From 7489161b1852390b4413d57f2457cd40b34da6cc Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Tue, 11 Mar 2025 17:22:03 +0300 Subject: [PATCH 483/503] cifs: Fix integer overflow while processing acregmax mount option User-provided mount parameter acregmax of type u32 is intended to have an upper limit, but before it is validated, the value is converted from seconds to jiffies which can lead to an integer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 5780464614f6 ("cifs: Add new parameter "acregmax" for distinct file and directory metadata timeout") Signed-off-by: Murad Masimov Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 457452b4d42da..5f3c5967a0f5a 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1344,11 +1344,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, } break; case Opt_acregmax: - ctx->acregmax = HZ * result.uint_32; - if (ctx->acregmax > CIFS_MAX_ACTIMEO) { + if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) { cifs_errorf(fc, "acregmax too large\n"); goto cifs_parse_mount_err; } + ctx->acregmax = HZ * result.uint_32; break; case Opt_acdirmax: ctx->acdirmax = HZ * result.uint_32; From 5b29891f91dfb8758baf1e2217bef4b16b2b165b Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Tue, 11 Mar 2025 17:22:04 +0300 Subject: [PATCH 484/503] cifs: Fix integer overflow while processing acdirmax mount option User-provided mount parameter acdirmax of type u32 is intended to have an upper limit, but before it is validated, the value is converted from seconds to jiffies which can lead to an integer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 4c9f948142a5 ("cifs: Add new mount parameter "acdirmax" to allow caching directory metadata") Signed-off-by: Murad Masimov Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 5f3c5967a0f5a..c983021c406bb 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1351,11 +1351,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->acregmax = HZ * result.uint_32; break; case Opt_acdirmax: - ctx->acdirmax = HZ * result.uint_32; - if (ctx->acdirmax > CIFS_MAX_ACTIMEO) { + if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) { cifs_errorf(fc, "acdirmax too large\n"); goto cifs_parse_mount_err; } + ctx->acdirmax = HZ * result.uint_32; break; case Opt_actimeo: if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) { From 64f690ee22c99e16084e0e45181b2a1eed2fa149 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Tue, 11 Mar 2025 17:22:05 +0300 Subject: [PATCH 485/503] cifs: Fix integer overflow while processing actimeo mount option User-provided mount parameter actimeo of type u32 is intended to have an upper limit, but before it is validated, the value is converted from seconds to jiffies which can lead to an integer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 6d20e8406f09 ("cifs: add attribute cache timeout (actimeo) tunable") Signed-off-by: Murad Masimov Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index c983021c406bb..85b062e7f48db 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1358,7 +1358,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->acdirmax = HZ * result.uint_32; break; case Opt_actimeo: - if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) { + if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) { cifs_errorf(fc, "timeout too large\n"); goto cifs_parse_mount_err; } From d5a30fddfe2f2e540f6c43b59cf701809995faef Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Tue, 11 Mar 2025 17:22:06 +0300 Subject: [PATCH 486/503] cifs: Fix integer overflow while processing closetimeo mount option User-provided mount parameter closetimeo of type u32 is intended to have an upper limit, but before it is validated, the value is converted from seconds to jiffies which can lead to an integer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 5efdd9122eff ("smb3: allow deferred close timeout to be configurable") Signed-off-by: Murad Masimov Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 85b062e7f48db..8c73d4d60d1a7 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1370,11 +1370,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->acdirmax = ctx->acregmax = HZ * result.uint_32; break; case Opt_closetimeo: - ctx->closetimeo = HZ * result.uint_32; - if (ctx->closetimeo > SMB3_MAX_DCLOSETIMEO) { + if (result.uint_32 > SMB3_MAX_DCLOSETIMEO / HZ) { cifs_errorf(fc, "closetimeo too large\n"); goto cifs_parse_mount_err; } + ctx->closetimeo = HZ * result.uint_32; break; case Opt_echo_interval: ctx->echo_interval = result.uint_32; From 605b249ea96770ac4fac4b8510a99e0f8442be5e Mon Sep 17 00:00:00 2001 From: Henrique Carvalho Date: Tue, 11 Mar 2025 15:23:59 -0300 Subject: [PATCH 487/503] smb: client: Fix match_session bug preventing session reuse Fix a bug in match_session() that can causes the session to not be reused in some cases. Reproduction steps: mount.cifs //server/share /mnt/a -o credentials=creds mount.cifs //server/share /mnt/b -o credentials=creds,sec=ntlmssp cat /proc/fs/cifs/DebugData | grep SessionId | wc -l mount.cifs //server/share /mnt/b -o credentials=creds,sec=ntlmssp mount.cifs //server/share /mnt/a -o credentials=creds cat /proc/fs/cifs/DebugData | grep SessionId | wc -l Cc: stable@vger.kernel.org Reviewed-by: Enzo Matsumiya Signed-off-by: Henrique Carvalho Signed-off-by: Steve French --- fs/smb/client/connect.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index f917de020dd5d..73f93a35eeddb 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1825,9 +1825,8 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx, bool match_super) { - if (ctx->sectype != Unspecified && - ctx->sectype != ses->sectype) - return 0; + struct TCP_Server_Info *server = ses->server; + enum securityEnum ctx_sec, ses_sec; if (!match_super && ctx->dfs_root_ses != ses->dfs_root_ses) return 0; @@ -1839,11 +1838,20 @@ static int match_session(struct cifs_ses *ses, if (ses->chan_max < ctx->max_channels) return 0; - switch (ses->sectype) { + ctx_sec = server->ops->select_sectype(server, ctx->sectype); + ses_sec = server->ops->select_sectype(server, ses->sectype); + + if (ctx_sec != ses_sec) + return 0; + + switch (ctx_sec) { + case IAKerb: case Kerberos: if (!uid_eq(ctx->cred_uid, ses->cred_uid)) return 0; break; + case NTLMv2: + case RawNTLMSSP: default: /* NULL username means anonymous session */ if (ses->user_name == NULL) { From bc71aab4513f7fecd23c051703ef5adea2230a54 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:48 +0100 Subject: [PATCH 488/503] Revert "ext4: add pre-content fsnotify hook for DAX faults" This reverts commit bb480760ffc7018e21ee6f60241c2b99ff26ee0e. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-3-amir73il@gmail.com --- fs/ext4/file.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a5205149adba3..3bd96c3d4cd0c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -756,9 +756,6 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order) return VM_FAULT_SIGBUS; } } else { - result = filemap_fsnotify_fault(vmf); - if (unlikely(result)) - return result; filemap_invalidate_lock_shared(mapping); } result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops); From 27773ce1776279ed3220a34d2e6bfcecaee7fc66 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:49 +0100 Subject: [PATCH 489/503] Revert "xfs: add pre-content fsnotify hook for DAX faults" This reverts commit 7f4796a46571ced5d3d5b0942e1bfea1eedaaecd. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-4-amir73il@gmail.com --- fs/xfs/xfs_file.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f7a7d89c345ec..9a435b1ff2647 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1451,9 +1451,6 @@ xfs_dax_read_fault( trace_xfs_read_fault(ip, order); - ret = filemap_fsnotify_fault(vmf); - if (unlikely(ret)) - return ret; xfs_ilock(ip, XFS_MMAPLOCK_SHARED); ret = xfs_dax_fault_locked(vmf, order, false); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); @@ -1482,16 +1479,6 @@ xfs_write_fault( vm_fault_t ret; trace_xfs_write_fault(ip, order); - /* - * Usually we get here from ->page_mkwrite callback but in case of DAX - * we will get here also for ordinary write fault. Handle HSM - * notifications for that case. - */ - if (IS_DAX(inode)) { - ret = filemap_fsnotify_fault(vmf); - if (unlikely(ret)) - return ret; - } sb_start_pagefault(inode->i_sb); file_update_time(vmf->vma->vm_file); From 955fbe0ef19df4197595a98d0906c94025c4beef Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:50 +0100 Subject: [PATCH 490/503] Revert "fsnotify: generate pre-content permission event on page fault" This reverts commit 8392bc2ff8c8bf7c4c5e6dfa71ccd893a3c046f6. In the use case of buffered write whose input buffer is mmapped file on a filesystem with a pre-content mark, the prefaulting of the buffer can happen under the filesystem freeze protection (obtained in vfs_write()) which breaks assumptions of pre-content hook and introduces potential deadlock of HSM handler in userspace with filesystem freezing. Now that we have pre-content hooks at file mmap() time, disable the pre-content event hooks on page fault to avoid the potential deadlock. Reported-by: syzbot+7229071b47908b19d5b7@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-fsdevel/7ehxrhbvehlrjwvrduoxsao5k3x4aw275patsb3krkwuq573yv@o2hskrfawbnc/ Fixes: 8392bc2ff8c8 ("fsnotify: generate pre-content permission event on page fault") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-5-amir73il@gmail.com --- include/linux/mm.h | 1 - mm/filemap.c | 74 ---------------------------------------------- mm/nommu.c | 7 ----- 3 files changed, 82 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b1068ddcbb70..8483e09aeb2cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3420,7 +3420,6 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf); extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); -extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf); extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ diff --git a/mm/filemap.c b/mm/filemap.c index 2974691fdfad2..ff5fcdd961364 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include "internal.h" @@ -3336,48 +3335,6 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf) return ret; } -/** - * filemap_fsnotify_fault - maybe emit a pre-content event. - * @vmf: struct vm_fault containing details of the fault. - * - * If we have a pre-content watch on this file we will emit an event for this - * range. If we return anything the fault caller should return immediately, we - * will return VM_FAULT_RETRY if we had to emit an event, which will trigger the - * fault again and then the fault handler will run the second time through. - * - * Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened. - */ -vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) -{ - struct file *fpin = NULL; - int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS; - loff_t pos = vmf->pgoff >> PAGE_SHIFT; - size_t count = PAGE_SIZE; - int err; - - /* - * We already did this and now we're retrying with everything locked, - * don't emit the event and continue. - */ - if (vmf->flags & FAULT_FLAG_TRIED) - return 0; - - /* No watches, we're done. */ - if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode))) - return 0; - - fpin = maybe_unlock_mmap_for_io(vmf, fpin); - if (!fpin) - return VM_FAULT_SIGBUS; - - err = fsnotify_file_area_perm(fpin, mask, &pos, count); - fput(fpin); - if (err) - return VM_FAULT_SIGBUS; - return VM_FAULT_RETRY; -} -EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); - /** * filemap_fault - read in file data for page fault handling * @vmf: struct vm_fault containing details of the fault @@ -3481,37 +3438,6 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) * or because readahead was otherwise unable to retrieve it. */ if (unlikely(!folio_test_uptodate(folio))) { - /* - * If this is a precontent file we have can now emit an event to - * try and populate the folio. - */ - if (!(vmf->flags & FAULT_FLAG_TRIED) && - unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { - loff_t pos = folio_pos(folio); - size_t count = folio_size(folio); - - /* We're NOWAIT, we have to retry. */ - if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) { - folio_unlock(folio); - goto out_retry; - } - - if (mapping_locked) - filemap_invalidate_unlock_shared(mapping); - mapping_locked = false; - - folio_unlock(folio); - fpin = maybe_unlock_mmap_for_io(vmf, fpin); - if (!fpin) - goto out_retry; - - error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos, - count); - if (error) - ret = VM_FAULT_SIGBUS; - goto out_retry; - } - /* * If the invalidate lock is not held, the folio was in cache * and uptodate and now it is not. Strange but possible since we diff --git a/mm/nommu.c b/mm/nommu.c index baa79abdaf037..9cb6e99215e2b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1613,13 +1613,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, } EXPORT_SYMBOL(remap_vmalloc_range); -vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) -{ - BUG(); - return 0; -} -EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); - vm_fault_t filemap_fault(struct vm_fault *vmf) { BUG(); From 4f4dc3a9378bc2e6bcf331ee9e65a21abf67e7e0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:51 +0100 Subject: [PATCH 491/503] Revert "mm: don't allow huge faults for files with pre content watches" This reverts commit 20bf82a898b65c129af76deb96a1b415d3098a28. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-6-amir73il@gmail.com --- mm/memory.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index b9661ccfa64fd..fb7b8dc751679 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -76,7 +76,6 @@ #include #include #include -#include #include @@ -5750,17 +5749,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - if (vma_is_anonymous(vma)) return do_huge_pmd_anonymous_page(vmf); - /* - * Currently we just emit PAGE_SIZE for our fault events, so don't allow - * a huge fault if we have a pre content watch on this file. This would - * be trivial to support, but there would need to be tests to ensure - * this works properly and those don't exist currently. - */ - if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode))) - return VM_FAULT_FALLBACK; if (vma->vm_ops->huge_fault) return vma->vm_ops->huge_fault(vmf, PMD_ORDER); return VM_FAULT_FALLBACK; @@ -5784,9 +5774,6 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf) } if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { - /* See comment in create_huge_pmd. */ - if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode))) - goto split; if (vma->vm_ops->huge_fault) { ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER); if (!(ret & VM_FAULT_FALLBACK)) @@ -5809,9 +5796,6 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf) /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vma)) return VM_FAULT_FALLBACK; - /* See comment in create_huge_pmd. */ - if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode))) - return VM_FAULT_FALLBACK; if (vma->vm_ops->huge_fault) return vma->vm_ops->huge_fault(vmf, PUD_ORDER); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -5829,9 +5813,6 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) if (vma_is_anonymous(vma)) goto split; if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { - /* See comment in create_huge_pmd. */ - if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode))) - goto split; if (vma->vm_ops->huge_fault) { ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER); if (!(ret & VM_FAULT_FALLBACK)) From 252256e416deb255607f0c4a69e7cfec079e5d61 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:52 +0100 Subject: [PATCH 492/503] Revert "fanotify: disable readahead if we have pre-content watches" This reverts commit fac84846a28c0950d4433118b3dffd44306df62d. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-7-amir73il@gmail.com --- mm/filemap.c | 12 ------------ mm/readahead.c | 14 -------------- 2 files changed, 26 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index ff5fcdd961364..6d616bb9001eb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3197,14 +3197,6 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) unsigned long vm_flags = vmf->vma->vm_flags; unsigned int mmap_miss; - /* - * If we have pre-content watches we need to disable readahead to make - * sure that we don't populate our mapping with 0 filled pages that we - * never emitted an event for. - */ - if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) - return fpin; - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* Use the readahead code, even if readahead is disabled */ if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) { @@ -3273,10 +3265,6 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, struct file *fpin = NULL; unsigned int mmap_miss; - /* See comment in do_sync_mmap_readahead. */ - if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) - return fpin; - /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; diff --git a/mm/readahead.c b/mm/readahead.c index 220155a5c9646..6a4e96b69702b 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -128,7 +128,6 @@ #include #include #include -#include #include "internal.h" @@ -558,15 +557,6 @@ void page_cache_sync_ra(struct readahead_control *ractl, unsigned long max_pages, contig_count; pgoff_t prev_index, miss; - /* - * If we have pre-content watches we need to disable readahead to make - * sure that we don't find 0 filled pages in cache that we never emitted - * events for. Filesystems supporting HSM must make sure to not call - * this function with ractl->file unset for files handled by HSM. - */ - if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode))) - return; - /* * Even if readahead is disabled, issue this request as readahead * as we'll need it to satisfy the requested range. The forced @@ -645,10 +635,6 @@ void page_cache_async_ra(struct readahead_control *ractl, if (!ra->ra_pages) return; - /* See the comment in page_cache_sync_ra. */ - if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode))) - return; - /* * Same bit is used for PG_readahead and PG_reclaim. */ From 69a5a13a22b1def29dce62b5b7c86e6098c20c68 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Mar 2025 09:56:07 -0400 Subject: [PATCH 493/503] bcachefs: target_congested -> get_random_u32_below() get_random_u32_below() has a better algorithm than bch2_rand_range(), it just didn't exist at the time. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 726da68073e2b..aa91fcf51eecc 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -59,7 +59,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target) } rcu_read_unlock(); - return bch2_rand_range(nr * CONGESTED_MAX) < total; + return get_random_u32_below(nr * CONGESTED_MAX) < total; } #else From 9c18ea7ffee090b47afaa7dc41903fb1b436d7bd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Mar 2025 11:16:28 -0400 Subject: [PATCH 494/503] bcachefs: bch2_get_random_u64_below() steal the (clever) algorithm from get_random_u32_below() this fixes a bug where we were passing roundup_pow_of_two() a 64 bit number - we're squaring device latencies now: [ +1.681698] ------------[ cut here ]------------ [ +0.000010] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 [ +0.000011] shift exponent 64 is too large for 64-bit type 'long unsigned int' [ +0.000011] CPU: 1 UID: 0 PID: 196 Comm: kworker/u32:13 Not tainted 6.14.0-rc6-dave+ #10 [ +0.000012] Hardware name: ASUS System Product Name/PRIME B460I-PLUS, BIOS 1301 07/13/2021 [ +0.000005] Workqueue: events_unbound __bch2_read_endio [bcachefs] [ +0.000354] Call Trace: [ +0.000005] [ +0.000007] dump_stack_lvl+0x5d/0x80 [ +0.000018] ubsan_epilogue+0x5/0x30 [ +0.000008] __ubsan_handle_shift_out_of_bounds.cold+0x61/0xe6 [ +0.000011] bch2_rand_range.cold+0x17/0x20 [bcachefs] [ +0.000231] bch2_bkey_pick_read_device+0x547/0x920 [bcachefs] [ +0.000229] __bch2_read_extent+0x1e4/0x18e0 [bcachefs] [ +0.000241] ? bch2_btree_iter_peek_slot+0x3df/0x800 [bcachefs] [ +0.000180] ? bch2_read_retry_nodecode+0x270/0x330 [bcachefs] [ +0.000230] bch2_read_retry_nodecode+0x270/0x330 [bcachefs] [ +0.000230] bch2_rbio_retry+0x1fa/0x600 [bcachefs] [ +0.000224] ? bch2_printbuf_make_room+0x71/0xb0 [bcachefs] [ +0.000243] ? bch2_read_csum_err+0x4a4/0x610 [bcachefs] [ +0.000278] bch2_read_csum_err+0x4a4/0x610 [bcachefs] [ +0.000227] ? __bch2_read_endio+0x58b/0x870 [bcachefs] [ +0.000220] __bch2_read_endio+0x58b/0x870 [bcachefs] [ +0.000268] ? try_to_wake_up+0x31c/0x7f0 [ +0.000011] ? process_one_work+0x176/0x330 [ +0.000008] process_one_work+0x176/0x330 [ +0.000008] worker_thread+0x252/0x390 [ +0.000008] ? __pfx_worker_thread+0x10/0x10 [ +0.000006] kthread+0xec/0x230 [ +0.000011] ? __pfx_kthread+0x10/0x10 [ +0.000009] ret_from_fork+0x31/0x50 [ +0.000009] ? __pfx_kthread+0x10/0x10 [ +0.000008] ret_from_fork_asm+0x1a/0x30 [ +0.000012] [ +0.000046] ---[ end trace ]--- Reported-by: Roland Vet Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 2 +- fs/bcachefs/util.c | 23 ++++++++++++++--------- fs/bcachefs/util.h | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 05d5f71a7ca9f..2d8042f853dcd 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -99,7 +99,7 @@ static inline bool ptr_better(struct bch_fs *c, /* Pick at random, biased in favor of the faster device: */ - return bch2_rand_range(l1 + l2) > l1; + return bch2_get_random_u64_below(l1 + l2) > l1; } if (bch2_force_reconstruct_read) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index e0a876cbaa6b7..8e3ab4bf79a90 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -653,19 +653,24 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) return 0; } -size_t bch2_rand_range(size_t max) +u64 bch2_get_random_u64_below(u64 ceil) { - size_t rand; + if (ceil <= U32_MAX) + return __get_random_u32_below(ceil); - if (!max) - return 0; + /* this is the same (clever) algorithm as in __get_random_u32_below() */ + u64 rand = get_random_u64(); + u64 mult = ceil * rand; - do { - rand = get_random_long(); - rand &= roundup_pow_of_two(max) - 1; - } while (rand >= max); + if (unlikely(mult < ceil)) { + u64 bound = -ceil % ceil; + while (unlikely(mult < bound)) { + rand = get_random_u64(); + mult = ceil * rand; + } + } - return rand; + return mul_u64_u64_shr(ceil, rand, 64); } void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src) diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index e7c3541b38f3f..f4a4783219d9d 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -401,7 +401,7 @@ do { \ _ret; \ }) -size_t bch2_rand_range(size_t); +u64 bch2_get_random_u64_below(u64); void memcpy_to_bio(struct bio *, struct bvec_iter, const void *); void memcpy_from_bio(void *, struct bio *, struct bvec_iter); From 57e9417f69839cb10f7ffca684c38acd28ceb57b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 8 Mar 2025 10:50:08 -0500 Subject: [PATCH 495/503] dm-flakey: Fix memory corruption in optional corrupt_bio_byte feature Fix memory corruption due to incorrect parameter being passed to bio_init Signed-off-by: Kent Overstreet Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v6.5+ Fixes: 1d9a94389853 ("dm flakey: clone pages on write bio before corrupting them") --- drivers/md/dm-flakey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 731467d4ed101..b690905ab89ff 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -426,7 +426,7 @@ static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct b if (!clone) return NULL; - bio_init(clone, fc->dev->bdev, bio->bi_inline_vecs, nr_iovecs, bio->bi_opf); + bio_init(clone, fc->dev->bdev, clone->bi_inline_vecs, nr_iovecs, bio->bi_opf); clone->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector); clone->bi_private = bio; From a2ab25529bbcea51b5e01dded79f45aeb94f644a Mon Sep 17 00:00:00 2001 From: Ajay Kaher Date: Thu, 13 Mar 2025 17:31:11 +0000 Subject: [PATCH 496/503] x86/vmware: Parse MP tables for SEV-SNP enabled guests under VMware hypervisors Under VMware hypervisors, SEV-SNP enabled VMs are fundamentally able to boot without UEFI, but this regressed a year ago due to: 0f4a1e80989a ("x86/sev: Skip ROM range scans and validation for SEV-SNP guests") In this case, mpparse_find_mptable() has to be called to parse MP tables which contains the necessary boot information. [ mingo: Updated the changelog. ] Fixes: 0f4a1e80989a ("x86/sev: Skip ROM range scans and validation for SEV-SNP guests") Co-developed-by: Ye Li Signed-off-by: Ye Li Signed-off-by: Ajay Kaher Signed-off-by: Ingo Molnar Tested-by: Ye Li Reviewed-by: Kevin Loughlin Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250313173111.10918-1-ajay.kaher@broadcom.com --- arch/x86/kernel/cpu/vmware.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 00189cdeb775f..cb3f900c46fcc 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -429,6 +430,9 @@ static void __init vmware_platform_setup(void) pr_warn("Failed to get TSC freq from the hypervisor\n"); } + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !efi_enabled(EFI_BOOT)) + x86_init.mpparse.find_mptable = mpparse_find_mptable; + vmware_paravirt_ops_setup(); #ifdef CONFIG_X86_IO_APIC From f2865c6300d75a9f187dd7918d248e010970fd44 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Mon, 10 Mar 2025 19:19:07 -0700 Subject: [PATCH 497/503] usb: typec: tcpm: fix state transition for SNK_WAIT_CAPABILITIES state in run_state_machine() A subtle error got introduced while manually fixing merge conflict in tcpm.c for commit 85c4efbe6088 ("Merge v6.12-rc6 into usb-next"). As a result of this error, the next state is unconditionally set to SNK_WAIT_CAPABILITIES_TIMEOUT while handling SNK_WAIT_CAPABILITIES state in run_state_machine(...). Fix this by setting new state of TCPM state machine to `upcoming_state` (that is set to different values based on conditions). Cc: stable@vger.kernel.org Fixes: 85c4efbe60888 ("Merge v6.12-rc6 into usb-next") Signed-off-by: Amit Sunil Dhamne Reviewed-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250310-fix-snk-wait-timeout-v6-14-rc6-v1-1-5db14475798f@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 6bf1a22c785af..62ca4a0ec55bb 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5117,16 +5117,16 @@ static void run_state_machine(struct tcpm_port *port) */ if (port->vbus_never_low) { port->vbus_never_low = false; - tcpm_set_state(port, SNK_SOFT_RESET, - port->timings.sink_wait_cap_time); + upcoming_state = SNK_SOFT_RESET; } else { if (!port->self_powered) upcoming_state = SNK_WAIT_CAPABILITIES_TIMEOUT; else upcoming_state = hard_reset_state(port); - tcpm_set_state(port, SNK_WAIT_CAPABILITIES_TIMEOUT, - port->timings.sink_wait_cap_time); } + + tcpm_set_state(port, upcoming_state, + port->timings.sink_wait_cap_time); break; case SNK_WAIT_CAPABILITIES_TIMEOUT: /* From 0b4ffbe4888a2c71185eaf5c1a02dd3586a9bc04 Mon Sep 17 00:00:00 2001 From: Tengda Wu Date: Fri, 14 Mar 2025 06:53:35 +0000 Subject: [PATCH 498/503] tracing: Correct the refcount if the hist/hist_debug file fails to open The function event_{hist,hist_debug}_open() maintains the refcount of 'file->tr' and 'file' through tracing_open_file_tr(). However, it does not roll back these counts on subsequent failure paths, resulting in a refcount leak. A very obvious case is that if the hist/hist_debug file belongs to a specific instance, the refcount leak will prevent the deletion of that instance, as it relies on the condition 'tr->ref == 1' within __remove_instance(). Fix this by calling tracing_release_file_tr() on all failure paths in event_{hist,hist_debug}_open() to correct the refcount. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Zheng Yejian Link: https://lore.kernel.org/20250314065335.1202817-1-wutengda@huaweicloud.com Fixes: 1cc111b9cddc ("tracing: Fix uaf issue when open the hist or hist_debug file") Signed-off-by: Tengda Wu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index ad7419e240556..53dc6719181e5 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5689,12 +5689,16 @@ static int event_hist_open(struct inode *inode, struct file *file) guard(mutex)(&event_mutex); event_file = event_file_data(file); - if (!event_file) - return -ENODEV; + if (!event_file) { + ret = -ENODEV; + goto err; + } hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL); - if (!hist_file) - return -ENOMEM; + if (!hist_file) { + ret = -ENOMEM; + goto err; + } hist_file->file = file; hist_file->last_act = get_hist_hit_count(event_file); @@ -5702,9 +5706,14 @@ static int event_hist_open(struct inode *inode, struct file *file) /* Clear private_data to avoid warning in single_open() */ file->private_data = NULL; ret = single_open(file, hist_show, hist_file); - if (ret) + if (ret) { kfree(hist_file); + goto err; + } + return 0; +err: + tracing_release_file_tr(inode, file); return ret; } @@ -5979,7 +5988,10 @@ static int event_hist_debug_open(struct inode *inode, struct file *file) /* Clear private_data to avoid warning in single_open() */ file->private_data = NULL; - return single_open(file, hist_debug_show, file); + ret = single_open(file, hist_debug_show, file); + if (ret) + tracing_release_file_tr(inode, file); + return ret; } const struct file_operations event_hist_debug_fops = { From ca3ac4bf4dc307cea5781dccccf41c1d14c2f82f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 3 Mar 2025 18:02:32 +0000 Subject: [PATCH 499/503] xfs: Use abs_diff instead of XFS_ABSDIFF We have a central definition for this function since 2023, used by a number of different parts of the kernel. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Carlos Maiolino Reviewed-by: Eric Sandeen Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_alloc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 3d33e17f2e5ce..7839efe050bfa 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -33,8 +33,6 @@ struct kmem_cache *xfs_extfree_item_cache; struct workqueue_struct *xfs_alloc_wq; -#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) - #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 @@ -410,8 +408,8 @@ xfs_alloc_compute_diff( if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { if (newlen1 < newlen2 || (newlen1 == newlen2 && - XFS_ABSDIFF(newbno1, wantbno) > - XFS_ABSDIFF(newbno2, wantbno))) + abs_diff(newbno1, wantbno) > + abs_diff(newbno2, wantbno))) newbno1 = newbno2; } else if (newbno2 != NULLAGBLOCK) newbno1 = newbno2; @@ -427,7 +425,7 @@ xfs_alloc_compute_diff( } else newbno1 = freeend - wantlen; *newbnop = newbno1; - return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); + return newbno1 == NULLAGBLOCK ? 0 : abs_diff(newbno1, wantbno); } /* From 03fc0a2dc9f8c292fad8a1bcfb6d1f0dec1824be Mon Sep 17 00:00:00 2001 From: Ike Panhc Date: Fri, 14 Mar 2025 12:57:32 +0800 Subject: [PATCH 500/503] MAINTAINERS: Update Ike Panhc's email address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I am no longer at Canonical and update with my personal email address. Signed-off-by: Ike Panhc Link: https://lore.kernel.org/r/20250314045732.389973-1-ike.pan@canonical.com Signed-off-by: Ilpo Järvinen --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index ae0adc499f4ac..449c5e3a5d26b 100644 --- a/.mailmap +++ b/.mailmap @@ -281,6 +281,7 @@ Henrik Rydberg Herbert Xu Huacai Chen Huacai Chen +Ike Panhc J. Bruce Fields J. Bruce Fields Jacob Shin diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa0654..bb4c069cc1e92 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11140,7 +11140,7 @@ S: Maintained F: drivers/i2c/busses/i2c-icy.c IDEAPAD LAPTOP EXTRAS DRIVER -M: Ike Panhc +M: Ike Panhc L: platform-driver-x86@vger.kernel.org S: Maintained W: http://launchpad.net/ideapad-laptop From 90fd9ad5b0c981693c8512d9da01f14fb6596e9d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 14 Mar 2025 09:54:43 -0400 Subject: [PATCH 501/503] bcachefs: Change btree wb assert to runtime error We just had a report of the assert for "btree in write buffer for non-write buffer btree" popping during the 6.14 upgrade. - 150TB filesystem, after a reboot the upgrade was able to continue from where it left off, so no major damage. But with 6.14 about to come out we want to get this tracked down asap, and need more data if other users hit this. Convert the BUG_ON() to an emergency read-only, and print out btree, the key itself, and stack trace from the original write buffer update (which did not have this check before). Reported-by: Stijn Tintel Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update.h | 8 ++++++++ fs/bcachefs/btree_write_buffer.c | 21 ++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 8f22ef9a7651a..47d8690f01bfc 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -126,10 +126,18 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *); +int bch2_btree_write_buffer_insert_err(struct btree_trans *, + enum btree_id, struct bkey_i *); + static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, struct bkey_i *k) { + if (unlikely(!btree_type_uses_write_buffer(btree))) { + int ret = bch2_btree_write_buffer_insert_err(trans, btree, k); + dump_stack(); + return ret; + } /* * Most updates skip the btree write buffer until journal replay is * finished because synchronization with journal replay relies on having diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index b56c4987b8c97..2c09d19dd6210 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -264,6 +264,22 @@ static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb) BUG_ON(wb->sorted.size < wb->flushing.keys.nr); } +int bch2_btree_write_buffer_insert_err(struct btree_trans *trans, + enum btree_id btree, struct bkey_i *k) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "attempting to do write buffer update on non wb btree="); + bch2_btree_id_to_text(&buf, btree); + prt_str(&buf, "\n"); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); + + bch2_fs_inconsistent(c, "%s", buf.buf); + printbuf_exit(&buf); + return -EROFS; +} + static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) { struct bch_fs *c = trans->c; @@ -312,7 +328,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) darray_for_each(wb->sorted, i) { struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; - BUG_ON(!btree_type_uses_write_buffer(k->btree)); + if (unlikely(!btree_type_uses_write_buffer(k->btree))) { + ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k); + goto err; + } for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++) prefetch(&wb->flushing.keys.data[n->idx]); From 1a2b74d0a2a46c219b25fdb0efcf9cd7f55cfe5e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 14 Mar 2025 18:20:20 -0400 Subject: [PATCH 502/503] bcachefs: fix build on 32 bit in get_random_u64_below() bare 64 bit divides not allowed, whoops arm-linux-gnueabi-ld: drivers/char/random.o: in function `__get_random_u64_below': drivers/char/random.c:602:(.text+0xc70): undefined reference to `__aeabi_uldivmod' Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 8e3ab4bf79a90..da2cd11b3025d 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -663,7 +663,8 @@ u64 bch2_get_random_u64_below(u64 ceil) u64 mult = ceil * rand; if (unlikely(mult < ceil)) { - u64 bound = -ceil % ceil; + u64 bound; + div64_u64_rem(-ceil, ceil, &bound); while (unlikely(mult < bound)) { rand = get_random_u64(); mult = ceil * rand; From 4701f33a10702d5fc577c32434eb62adde0a1ae1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Mar 2025 12:55:17 -1000 Subject: [PATCH 503/503] Linux 6.14-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1d6a9ec8a2ace..50694bbbf828e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Baby Opossum Posse # *DOCUMENTATION*