From d6732317a5525a95a7eb2d12b46e3e42d321c6b6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 25 Mar 2022 15:24:34 -0700 Subject: [PATCH 001/491] hwmon: (xdpe12284) Fix build warning seen if CONFIG_SENSORS_XDPE122_REGULATOR is disabled 0-day reports: drivers/hwmon/pmbus/xdpe12284.c:127:36: warning: unused variable 'xdpe122_reg_desc' This is seen if CONFIG_SENSORS_XDPE122_REGULATOR is not enabled. Mark xdpe122_reg_desc as __maybe_unused to fix the problem. Fixes: f53bfe4d6984 ("hwmon: (xdpe12284) Add regulator support") Reported-by: kernel test robot Cc: Marcello Sylvester Bauer Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/xdpe12284.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/xdpe12284.c b/drivers/hwmon/pmbus/xdpe12284.c index 18fffc5d749bd..32bc7736d609e 100644 --- a/drivers/hwmon/pmbus/xdpe12284.c +++ b/drivers/hwmon/pmbus/xdpe12284.c @@ -124,7 +124,7 @@ static int xdpe122_identify(struct i2c_client *client, return 0; } -static const struct regulator_desc xdpe122_reg_desc[] = { +static const struct regulator_desc __maybe_unused xdpe122_reg_desc[] = { PMBUS_REGULATOR("vout", 0), PMBUS_REGULATOR("vout", 1), }; From 4fd45cc8568e6086272d3036f2c29d61e9b776a1 Mon Sep 17 00:00:00 2001 From: Denis Pauk Date: Sun, 3 Apr 2022 22:34:54 +0300 Subject: [PATCH 002/491] hwmon: (asus_wmi_sensors) Fix CROSSHAIR VI HERO name CROSSHAIR VI HERO motherboard is incorrectly named as ROG CROSSHAIR VI HERO. Signed-off-by: Denis Pauk Link: https://lore.kernel.org/r/20220403193455.1363-1-pauk.denis@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/asus_wmi_sensors.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/asus_wmi_sensors.c b/drivers/hwmon/asus_wmi_sensors.c index 8fdcb62ae52de..9e935e34c9983 100644 --- a/drivers/hwmon/asus_wmi_sensors.c +++ b/drivers/hwmon/asus_wmi_sensors.c @@ -71,7 +71,7 @@ static const struct dmi_system_id asus_wmi_dmi_table[] = { DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X399-A"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X470-PRO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI EXTREME"), - DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("CROSSHAIR VI HERO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO (WI-FI AC)"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO (WI-FI)"), From 7b2666ce445c700b8dcee994da44ddcf050a0842 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Thu, 7 Apr 2022 12:13:12 +0200 Subject: [PATCH 003/491] hwmon: (adt7470) Fix warning on module removal When removing the adt7470 module, a warning might be printed: do not call blocking ops when !TASK_RUNNING; state=1 set at [] adt7470_update_thread+0x7b/0x130 [adt7470] This happens because adt7470_update_thread() can leave the kthread in TASK_INTERRUPTIBLE state when the kthread is being stopped before the call of set_current_state(). Since kthread_exit() might sleep in exit_signals(), the warning is printed. Fix that by using schedule_timeout_interruptible() and removing the call of set_current_state(). This causes TASK_INTERRUPTIBLE to be set after kthread_should_stop() which might cause the kthread to exit. Reported-by: Zheyu Ma Fixes: 93cacfd41f82 (hwmon: (adt7470) Allow faster removal) Signed-off-by: Armin Wolf Tested-by: Zheyu Ma Link: https://lore.kernel.org/r/20220407101312.13331-1-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7470.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index fb6d14d213a18..c67cd037a93fd 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -294,11 +295,10 @@ static int adt7470_update_thread(void *p) adt7470_read_temperatures(data); mutex_unlock(&data->lock); - set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) break; - schedule_timeout(msecs_to_jiffies(data->auto_update_interval)); + schedule_timeout_interruptible(msecs_to_jiffies(data->auto_update_interval)); } return 0; From 6f277adf11890c09853df4417e96650f7dd5029b Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Sat, 2 Apr 2022 13:00:44 +0200 Subject: [PATCH 004/491] arm64: dts: rockchip: Change io-domains of bpi-r2-pro New 1.0 Hardware revision has different io domain settings than the first board. Pre-1.0 version was not sold, so the setting can be savely overridden. Fixes: f901aaadaa2a ("arm64: dts: rockchip: Add Bananapi R2 Pro") Signed-off-by: Frank Wunderlich Link: https://lore.kernel.org/r/20220402110045.104031-2-linux@fw-web.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index a01886b467eda..a0388ff85ddfa 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -345,9 +345,9 @@ pmuio2-supply = <&vcc3v3_pmu>; vccio1-supply = <&vccio_acodec>; vccio3-supply = <&vccio_sd>; - vccio4-supply = <&vcc_1v8>; + vccio4-supply = <&vcc_3v3>; vccio5-supply = <&vcc_3v3>; - vccio6-supply = <&vcc_3v3>; + vccio6-supply = <&vcc_1v8>; vccio7-supply = <&vcc_3v3>; status = "okay"; }; From 77047ed73dd8b80c209bf5b5c215b858f1f5b14a Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Sat, 2 Apr 2022 13:00:45 +0200 Subject: [PATCH 005/491] arm64: dts: rockchip: Add gmac1 and change network settings of bpi-r2-pro New Version (v1.0) of R2 pro has swapped gmacs compared to the v00. WAN-Port is now on gmac1 (RTL8211F) and lan-ports on gmac0 with mt7531 switch. There is already a mt7531 dsa driver in mainline, but it needs to be modified to work for this board. Pre-1.0 version was not sold, so the setting can be savely overridden. Fixes: f901aaadaa2a ("arm64: dts: rockchip: Add Bananapi R2 Pro") Signed-off-by: Frank Wunderlich Link: https://lore.kernel.org/r/20220402110045.104031-3-linux@fw-web.de Signed-off-by: Heiko Stuebner --- .../boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index a0388ff85ddfa..067fe4a6b178c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -16,6 +16,7 @@ aliases { ethernet0 = &gmac0; + ethernet1 = &gmac1; mmc0 = &sdmmc0; mmc1 = &sdhci; }; @@ -78,7 +79,6 @@ assigned-clocks = <&cru SCLK_GMAC0_RX_TX>, <&cru SCLK_GMAC0>; assigned-clock-parents = <&cru SCLK_GMAC0_RGMII_SPEED>, <&cru CLK_MAC0_2TOP>; clock_in_out = "input"; - phy-handle = <&rgmii_phy0>; phy-mode = "rgmii"; pinctrl-names = "default"; pinctrl-0 = <&gmac0_miim @@ -90,8 +90,38 @@ snps,reset-active-low; /* Reset time is 20ms, 100ms for rtl8211f */ snps,reset-delays-us = <0 20000 100000>; + tx_delay = <0x4f>; + rx_delay = <0x0f>; + status = "okay"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; +}; + +&gmac1 { + assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1>; + assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru CLK_MAC1_2TOP>; + clock_in_out = "output"; + phy-handle = <&rgmii_phy1>; + phy-mode = "rgmii"; + pinctrl-names = "default"; + pinctrl-0 = <&gmac1m1_miim + &gmac1m1_tx_bus2 + &gmac1m1_rx_bus2 + &gmac1m1_rgmii_clk + &gmac1m1_rgmii_bus>; + + snps,reset-gpio = <&gpio3 RK_PB0 GPIO_ACTIVE_LOW>; + snps,reset-active-low; + /* Reset time is 20ms, 100ms for rtl8211f */ + snps,reset-delays-us = <0 20000 100000>; + tx_delay = <0x3c>; rx_delay = <0x2f>; + status = "okay"; }; @@ -315,8 +345,8 @@ status = "disabled"; }; -&mdio0 { - rgmii_phy0: ethernet-phy@0 { +&mdio1 { + rgmii_phy1: ethernet-phy@0 { compatible = "ethernet-phy-ieee802.3-c22"; reg = <0x0>; }; From 92597f97a40bf661bebceb92e26ff87c76d562d4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 31 Mar 2022 19:38:51 +0200 Subject: [PATCH 006/491] PCI/PM: Avoid putting Elo i2 PCIe Ports in D3cold If a Root Port on Elo i2 is put into D3cold and then back into D0, the downstream device becomes permanently inaccessible, so add a bridge D3 DMI quirk for that system. This was exposed by 14858dcc3b35 ("PCI: Use pci_update_current_state() in pci_enable_device_flags()"), but before that commit the Root Port in question had never been put into D3cold for real due to a mismatch between its power state retrieved from the PCI_PM_CTRL register (which was accessible even though the platform firmware indicated that the port was in D3cold) and the state of an ACPI power resource involved in its power management. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215715 Link: https://lore.kernel.org/r/11980172.O9o76ZdvQC@kreacher Reported-by: Stefan Gottwald Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v5.15+ --- drivers/pci/pci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9ecce435fb3f1..d25122fbe98ab 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2920,6 +2920,16 @@ static const struct dmi_system_id bridge_d3_blacklist[] = { DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"), }, + /* + * Downstream device is not accessible after putting a root port + * into D3cold and back into D0 on Elo i2. + */ + .ident = "Elo i2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"), + DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"), + DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"), + }, }, #endif { } From 18019eb62efb68c9b365acca9c4fcb2e0d459487 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 1 Apr 2022 21:58:14 +0300 Subject: [PATCH 007/491] arm64: dts: qcom: sm8250: don't enable rx/tx macro by default Enabling rxmacro and txmacro nodes by defaults makes Qualcomm RB5 to crash and reboot while probing audio devices. Disable these device tree nodes by default and enabled them only when necessary (for the SM8250-MTP board). Fixes: 24f52ef0c4bf ("arm64: dts: qcom: sm8250: Add nodes for tx and rx macros with soundwire masters") Cc: Srinivas Kandagatla Signed-off-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220401185814.519653-1-dmitry.baryshkov@linaro.org --- arch/arm64/boot/dts/qcom/sm8250-mtp.dts | 12 ++++++++++++ arch/arm64/boot/dts/qcom/sm8250.dtsi | 4 ++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sm8250-mtp.dts b/arch/arm64/boot/dts/qcom/sm8250-mtp.dts index fb99cc2827c76..7ab3627cc347d 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-mtp.dts +++ b/arch/arm64/boot/dts/qcom/sm8250-mtp.dts @@ -622,6 +622,10 @@ status = "okay"; }; +&rxmacro { + status = "okay"; +}; + &slpi { status = "okay"; firmware-name = "qcom/sm8250/slpi.mbn"; @@ -773,6 +777,8 @@ }; &swr1 { + status = "okay"; + wcd_rx: wcd9380-rx@0,4 { compatible = "sdw20217010d00"; reg = <0 4>; @@ -781,6 +787,8 @@ }; &swr2 { + status = "okay"; + wcd_tx: wcd9380-tx@0,3 { compatible = "sdw20217010d00"; reg = <0 3>; @@ -819,6 +827,10 @@ }; }; +&txmacro { + status = "okay"; +}; + &uart12 { status = "okay"; }; diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index af8f226364361..1304b86af1a00 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -2255,6 +2255,7 @@ pinctrl-0 = <&rx_swr_active>; compatible = "qcom,sm8250-lpass-rx-macro"; reg = <0 0x3200000 0 0x1000>; + status = "disabled"; clocks = <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>, <&q6afecc LPASS_CLK_ID_TX_CORE_NPL_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>, @@ -2273,6 +2274,7 @@ swr1: soundwire-controller@3210000 { reg = <0 0x3210000 0 0x2000>; compatible = "qcom,soundwire-v1.5.1"; + status = "disabled"; interrupts = ; clocks = <&rxmacro>; clock-names = "iface"; @@ -2300,6 +2302,7 @@ pinctrl-0 = <&tx_swr_active>; compatible = "qcom,sm8250-lpass-tx-macro"; reg = <0 0x3220000 0 0x1000>; + status = "disabled"; clocks = <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>, <&q6afecc LPASS_CLK_ID_TX_CORE_NPL_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>, @@ -2323,6 +2326,7 @@ compatible = "qcom,soundwire-v1.5.1"; interrupts-extended = <&intc GIC_SPI 297 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "core"; + status = "disabled"; clocks = <&txmacro>; clock-names = "iface"; From 290c4a902b79246ec55e477fc313f27f98393dee Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Sun, 10 Apr 2022 22:06:48 -0500 Subject: [PATCH 008/491] RDMA/rxe: Fix "Replace mr by rkey in responder resources" The referenced commit generates a reference counting error if the rkey has the same index but the wrong key. In this case the reference taken by rxe_pool_get_index() is not dropped. Drop the reference if the keys don't match in rxe_recheck_mr(). Check that the mw and mr are still valid. Fixes: 8a1a0be894da ("RDMA/rxe: Replace mr by rkey in responder resources") Link: https://lore.kernel.org/r/20220411030647.20011-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 16fc7ea1298d8..1d95fab606da3 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -680,6 +680,11 @@ static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp, * It is assumed that the access permissions if originally good * are OK and the mappings to be unchanged. * + * TODO: If someone reregisters an MR to change its size or + * access permissions during the processing of an RDMA read + * we should kill the responder resource and complete the + * operation with an error. + * * Return: mr on success else NULL */ static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) @@ -690,23 +695,27 @@ static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) if (rkey_is_mw(rkey)) { mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); - if (!mw || mw->rkey != rkey) + if (!mw) return NULL; - if (mw->state != RXE_MW_STATE_VALID) { + mr = mw->mr; + if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || + !mr || mr->state != RXE_MR_STATE_VALID) { rxe_put(mw); return NULL; } - mr = mw->mr; + rxe_get(mr); rxe_put(mw); - } else { - mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); - if (!mr || mr->rkey != rkey) - return NULL; + + return mr; } - if (mr->state != RXE_MR_STATE_VALID) { + mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); + if (!mr) + return NULL; + + if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { rxe_put(mr); return NULL; } From efddaa397cceefb61476e383c26fafd1f8ab6356 Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:26 -0700 Subject: [PATCH 009/491] ARM: dts: aspeed-g6: remove FWQSPID group in pinctrl dtsi FWSPIDQ2 and FWSPIDQ3 are not part of FWSPI18 interface so remove FWQSPID group in pinctrl dtsi. These pins must be used with the FWSPI pins that are dedicated for boot SPI interface which provides same 3.3v logic level. Fixes: 2f6edb6bcb2f ("ARM: dts: aspeed: Fix AST2600 quad spi group") Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-2-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index e4775bbceecc6..06d60a8540e92 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -117,11 +117,6 @@ groups = "FWSPID"; }; - pinctrl_fwqspid_default: fwqspid_default { - function = "FWSPID"; - groups = "FWQSPID"; - }; - pinctrl_fwspiwp_default: fwspiwp_default { function = "FWSPIWP"; groups = "FWSPIWP"; From 3eef2f48ba0933ba995529f522554ad5c276c39b Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:27 -0700 Subject: [PATCH 010/491] pinctrl: pinctrl-aspeed-g6: remove FWQSPID group in pinctrl FWSPIDQ2 and FWSPIDQ3 are not part of FWSPI18 interface so remove FWQSPID group in pinctrl. These pins must be used with the FWSPI pins that are dedicated for boot SPI interface which provides same 3.3v logic level. Fixes: 2eda1cdec49f ("pinctrl: aspeed: Add AST2600 pinmux support") Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-3-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index a3fa03bcd9a30..54064714d73fb 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -1236,18 +1236,12 @@ FUNC_GROUP_DECL(SALT8, AA12); FUNC_GROUP_DECL(WDTRST4, AA12); #define AE12 196 -SIG_EXPR_LIST_DECL_SEMG(AE12, FWSPIDQ2, FWQSPID, FWSPID, - SIG_DESC_SET(SCU438, 4)); SIG_EXPR_LIST_DECL_SESG(AE12, GPIOY4, GPIOY4); -PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIDQ2), - SIG_EXPR_LIST_PTR(AE12, GPIOY4)); +PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, GPIOY4)); #define AF12 197 -SIG_EXPR_LIST_DECL_SEMG(AF12, FWSPIDQ3, FWQSPID, FWSPID, - SIG_DESC_SET(SCU438, 5)); SIG_EXPR_LIST_DECL_SESG(AF12, GPIOY5, GPIOY5); -PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIDQ3), - SIG_EXPR_LIST_PTR(AF12, GPIOY5)); +PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, GPIOY5)); #define AC12 198 SSSF_PIN_DECL(AC12, GPIOY6, FWSPIABR, SIG_DESC_SET(SCU438, 6)); @@ -1520,9 +1514,8 @@ SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3)); PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7); GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4); -GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12); GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4); -FUNC_DECL_2(FWSPID, FWSPID, FWQSPID); +FUNC_DECL_1(FWSPID, FWSPID); FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4); FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8); /* @@ -1918,7 +1911,6 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = { ASPEED_PINCTRL_GROUP(FSI2), ASPEED_PINCTRL_GROUP(FWSPIABR), ASPEED_PINCTRL_GROUP(FWSPID), - ASPEED_PINCTRL_GROUP(FWQSPID), ASPEED_PINCTRL_GROUP(FWSPIWP), ASPEED_PINCTRL_GROUP(GPIT0), ASPEED_PINCTRL_GROUP(GPIT1), From a29c96a4053dc3c1d39353b61089882f81c6b23d Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:28 -0700 Subject: [PATCH 011/491] dt-bindings: pinctrl: aspeed-g6: remove FWQSPID group FWQSPID is not a group of FWSPID so remove it. Fixes: 7488838f2315 ("dt-bindings: pinctrl: aspeed: Document AST2600 pinmux") Signed-off-by: Jae Hyun Yoo Acked-by: Rob Herring Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-4-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley --- .../devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml index 57b68d6c7c70d..eb6e2f2dc9eb0 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml @@ -58,7 +58,7 @@ patternProperties: $ref: "/schemas/types.yaml#/definitions/string" enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, EMMCG4, - EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP, + EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, From 6082be2a4546441d767a20e0f615109bfd334937 Mon Sep 17 00:00:00 2001 From: Johnny Huang Date: Tue, 29 Mar 2022 10:39:29 -0700 Subject: [PATCH 012/491] pinctrl: pinctrl-aspeed-g6: add FWQSPI function-group Add FWSPIDQ2 (AE12) and FWSPIDQ3 (AF12) function-group to support AST2600 FW SPI quad mode. These pins can be used with dedicated FW SPI pins - FWSPICS0# (AB14), FWSPICK (AF13), FWSPIMOSI (AC14) and FWSPIMISO (AB13). Signed-off-by: Johnny Huang Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-5-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 54064714d73fb..80838dc54b3ab 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -1236,12 +1236,17 @@ FUNC_GROUP_DECL(SALT8, AA12); FUNC_GROUP_DECL(WDTRST4, AA12); #define AE12 196 +SIG_EXPR_LIST_DECL_SESG(AE12, FWSPIQ2, FWQSPI, SIG_DESC_SET(SCU438, 4)); SIG_EXPR_LIST_DECL_SESG(AE12, GPIOY4, GPIOY4); -PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, GPIOY4)); +PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIQ2), + SIG_EXPR_LIST_PTR(AE12, GPIOY4)); #define AF12 197 +SIG_EXPR_LIST_DECL_SESG(AF12, FWSPIQ3, FWQSPI, SIG_DESC_SET(SCU438, 5)); SIG_EXPR_LIST_DECL_SESG(AF12, GPIOY5, GPIOY5); -PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, GPIOY5)); +PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIQ3), + SIG_EXPR_LIST_PTR(AF12, GPIOY5)); +FUNC_GROUP_DECL(FWQSPI, AE12, AF12); #define AC12 198 SSSF_PIN_DECL(AC12, GPIOY6, FWSPIABR, SIG_DESC_SET(SCU438, 6)); @@ -1911,6 +1916,7 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = { ASPEED_PINCTRL_GROUP(FSI2), ASPEED_PINCTRL_GROUP(FWSPIABR), ASPEED_PINCTRL_GROUP(FWSPID), + ASPEED_PINCTRL_GROUP(FWQSPI), ASPEED_PINCTRL_GROUP(FWSPIWP), ASPEED_PINCTRL_GROUP(GPIT0), ASPEED_PINCTRL_GROUP(GPIT1), @@ -2152,6 +2158,7 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = { ASPEED_PINCTRL_FUNC(FSI2), ASPEED_PINCTRL_FUNC(FWSPIABR), ASPEED_PINCTRL_FUNC(FWSPID), + ASPEED_PINCTRL_FUNC(FWQSPI), ASPEED_PINCTRL_FUNC(FWSPIWP), ASPEED_PINCTRL_FUNC(GPIT0), ASPEED_PINCTRL_FUNC(GPIT1), From 5da3ae69987a92c009263e38a0b1842fa879c7a0 Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:30 -0700 Subject: [PATCH 013/491] dt-bindings: pinctrl: aspeed-g6: add FWQSPI function/group Add FWQSPI function/group to support QSPI mode on the dedicated FWSPI interface. Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Acked-by: Rob Herring Link: https://lore.kernel.org/r/20220329173932.2588289-6-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley --- .../devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml index eb6e2f2dc9eb0..3666ac5b6518d 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml @@ -33,7 +33,7 @@ patternProperties: $ref: "/schemas/types.yaml#/definitions/string" enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMC, ESPI, ESPIALT, - FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, + FSI1, FSI2, FWQSPI, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, @@ -58,7 +58,7 @@ patternProperties: $ref: "/schemas/types.yaml#/definitions/string" enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, EMMCG4, - EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, + EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWQSPI, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, From e194aff0066ddbbea3654b742c1286d914b12492 Mon Sep 17 00:00:00 2001 From: Johnny Huang Date: Tue, 29 Mar 2022 10:39:31 -0700 Subject: [PATCH 014/491] ARM: dts: aspeed-g6: add FWQSPI group in pinctrl dtsi Add FWSPIDQ2 and FWSPIDQ3 group to support AST2600 FW SPI quad mode. These pins can be used with dedicated FW SPI pins - FWSPICS0#, FWSPICK, FWSPIMOSI and FWSPIMISO. Signed-off-by: Johnny Huang Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-7-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index 06d60a8540e92..47c3fb137cbc2 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -117,6 +117,11 @@ groups = "FWSPID"; }; + pinctrl_fwqspi_default: fwqspi_default { + function = "FWQSPI"; + groups = "FWQSPI"; + }; + pinctrl_fwspiwp_default: fwspiwp_default { function = "FWSPIWP"; groups = "FWSPIWP"; From 890362d41b244536ab63591f813393f5fdf59ed7 Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:32 -0700 Subject: [PATCH 015/491] ARM: dts: aspeed-g6: fix SPI1/SPI2 quad pin group Fix incorrect function mappings in pinctrl_qspi1_default and pinctrl_qspi2_default since their function should be SPI1 and SPI2 respectively. Fixes: f510f04c8c83 ("ARM: dts: aspeed: Add AST2600 pinmux nodes") Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-8-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index 47c3fb137cbc2..7cd4f075e3250 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -653,12 +653,12 @@ }; pinctrl_qspi1_default: qspi1_default { - function = "QSPI1"; + function = "SPI1"; groups = "QSPI1"; }; pinctrl_qspi2_default: qspi2_default { - function = "QSPI2"; + function = "SPI2"; groups = "QSPI2"; }; From dd7c738684bdf62e8244a6d5b77440df9c085182 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Wed, 30 Mar 2022 19:24:24 -0700 Subject: [PATCH 016/491] ARM: dts: aspeed: romed8hm3: Add lm25066 sense resistor values With this property set the sensor readings from these devices can now be calibrated properly. Signed-off-by: Zev Weiss Fixes: a9a3d60b937a ("ARM: dts: aspeed: Add ASRock ROMED8HM3 BMC") Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20220331022425.28606-1-zev@bewilderbeest.net Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts b/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts index e71ccfd1df631..572a43e57cac2 100644 --- a/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts +++ b/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts @@ -100,12 +100,14 @@ lm25066@40 { compatible = "lm25066"; reg = <0x40>; + shunt-resistor-micro-ohms = <1000>; }; /* 12VSB PMIC */ lm25066@41 { compatible = "lm25066"; reg = <0x41>; + shunt-resistor-micro-ohms = <10000>; }; }; From badcffaf87b16d53c4f40ef31cfde9ab71566bab Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Wed, 30 Mar 2022 19:24:25 -0700 Subject: [PATCH 017/491] ARM: dts: aspeed: romed8hm3: Fix GPIOB0 name This GPIO was mislabeled as DDR_MEM_TEMP in the schematic; after a correction from ASRock Rack its name now reflects its actual functionality (POST_COMPLETE_N). Signed-off-by: Zev Weiss Fixes: a9a3d60b937a ("ARM: dts: aspeed: Add ASRock ROMED8HM3 BMC") Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20220331022425.28606-2-zev@bewilderbeest.net Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts b/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts index 572a43e57cac2..ff4c07c69af1c 100644 --- a/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts +++ b/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts @@ -198,7 +198,7 @@ gpio-line-names = /* A */ "LOCATORLED_STATUS_N", "BMC_MAC2_INTB", "NMI_BTN_N", "BMC_NMI", "", "", "", "", - /* B */ "DDR_MEM_TEMP", "", "", "", "", "", "", "", + /* B */ "POST_COMPLETE_N", "", "", "", "", "", "", "", /* C */ "", "", "", "", "PCIE_HP_SEL_N", "PCIE_SATA_SEL_N", "LOCATORBTN", "", /* D */ "BMC_PSIN", "BMC_PSOUT", "BMC_RESETCON", "RESETCON", "", "", "", "PSU_FAN_FAIL_N", From 32e62d1beab70d485980013312e747a25c4e13f7 Mon Sep 17 00:00:00 2001 From: Howard Chiu Date: Tue, 29 Mar 2022 03:23:51 +0000 Subject: [PATCH 018/491] ARM: dts: aspeed: Add video engine to g6 This node was accidentally removed by commit 645afe73f951 ("ARM: dts: aspeed: ast2600: Update XDMA engine node"). Fixes: 645afe73f951 ("ARM: dts: aspeed: ast2600: Update XDMA engine node") Signed-off-by: Howard Chiu Link: https://lore.kernel.org/r/SG2PR06MB2315C57600A0132FEF40F21EE61E9@SG2PR06MB2315.apcprd06.prod.outlook.com Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-g6.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index 3d5ce9da42c3c..9d2a0ce4ca061 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -389,6 +389,16 @@ reg = <0x1e6f2000 0x1000>; }; + video: video@1e700000 { + compatible = "aspeed,ast2600-video-engine"; + reg = <0x1e700000 0x1000>; + clocks = <&syscon ASPEED_CLK_GATE_VCLK>, + <&syscon ASPEED_CLK_GATE_ECLK>; + clock-names = "vclk", "eclk"; + interrupts = ; + status = "disabled"; + }; + gpio0: gpio@1e780000 { #gpio-cells = <2>; gpio-controller; From 4d0d5c352303a318925ceb84a86818761aa6586b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 13 Apr 2022 08:47:29 -0500 Subject: [PATCH 019/491] dt-bindings: hwmon: ti,tmp421: Fix type for 'ti,n-factor' 'ti,n-factor' is read as a 32-bit signed value, so the type and constraints are wrong. The same property is also defined for ti,tmp464 and is correct. The constraints should also not be under 'items' as this property is not an array. Cc: Jean Delvare Cc: Guenter Roeck Cc: Krzysztof Kozlowski Cc: linux-hwmon@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220413134729.3112190-1-robh@kernel.org Signed-off-by: Guenter Roeck --- Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml index 36f649938fb7e..a6f1fa75a67cd 100644 --- a/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml +++ b/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml @@ -58,10 +58,9 @@ patternProperties: description: | The value (two's complement) to be programmed in the channel specific N correction register. For remote channels only. - $ref: /schemas/types.yaml#/definitions/uint32 - items: - minimum: 0 - maximum: 255 + $ref: /schemas/types.yaml#/definitions/int32 + minimum: -128 + maximum: 127 required: - reg From a063f2fba3fa633a599253b62561051ac185fa99 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 16 Apr 2022 13:51:10 +0200 Subject: [PATCH 020/491] batman-adv: Don't skb_split skbuffs with frag_list The receiving interface might have used GRO to receive more fragments than MAX_SKB_FRAGS fragments. In this case, these will not be stored in skb_shinfo(skb)->frags but merged into the frag list. batman-adv relies on the function skb_split to split packets up into multiple smaller packets which are not larger than the MTU on the outgoing interface. But this function cannot handle frag_list entries and is only operating on skb_shinfo(skb)->frags. If it is still trying to split such an skb and xmit'ing it on an interface without support for NETIF_F_FRAGLIST, then validate_xmit_skb() will try to linearize it. But this fails due to inconsistent information. And __pskb_pull_tail will trigger a BUG_ON after skb_copy_bits() returns an error. In case of entries in frag_list, just linearize the skb before operating on it with skb_split(). Reported-by: Felix Kaechele Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Tested-by: Felix Kaechele Signed-off-by: Simon Wunderlich --- net/batman-adv/fragmentation.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 0899a729a23f4..c120c7c6d25fc 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto free_skb; } + /* GRO might have added fragments to the fragment list instead of + * frags[]. But this is not handled by skb_split and must be + * linearized to avoid incorrect length information after all + * batman-adv fragments were created and submitted to the + * hard-interface + */ + if (skb_has_frag_list(skb) && __skb_linearize(skb)) { + ret = -ENOMEM; + goto free_skb; + } + /* Create one header to be copied to all fragments */ frag_header.packet_type = BATADV_UNICAST_FRAG; frag_header.version = BATADV_COMPAT_VERSION; From 4aaaaf0f279836f06d3b9d0ffeec7a1e1a04ceef Mon Sep 17 00:00:00 2001 From: "Ji-Ze Hong (Peter Hong)" Date: Mon, 18 Apr 2022 17:07:06 +0800 Subject: [PATCH 021/491] hwmon: (f71882fg) Fix negative temperature All temperature of Fintek superio hwmonitor that using 1-byte reg will use 2's complement. In show_temp() temp = data->temp[nr] * 1000; When data->temp[nr] read as 255, it indicate -1C, but this code will report 255C to userspace. It'll be ok when change to: temp = ((s8)data->temp[nr]) * 1000; Signed-off-by: Ji-Ze Hong (Peter Hong) Link: https://lore.kernel.org/r/20220418090706.6339-1-hpeter+linux_kernel@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/f71882fg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c index 938a8b9ec70dd..6830e029995dc 100644 --- a/drivers/hwmon/f71882fg.c +++ b/drivers/hwmon/f71882fg.c @@ -1578,8 +1578,9 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *devattr, temp *= 125; if (sign) temp -= 128000; - } else - temp = data->temp[nr] * 1000; + } else { + temp = ((s8)data->temp[nr]) * 1000; + } return sprintf(buf, "%d\n", temp); } From b4f5c6b2e52b27462c0599e64e96e53b58438de1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 16 Apr 2022 13:54:08 +0100 Subject: [PATCH 022/491] ASoC: wm8958: Fix change notifications for DSP controls The WM8958 DSP controls all return 0 on successful write, not a boolean value indicating if the write changed the value of the control. Fix this by returning 1 after a change, there is already a check at the start of each put() that skips the function in the case that there is no change. Signed-off-by: Mark Brown Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20220416125408.197440-1-broonie@kernel.org Cc: stable@vger.kernel.org --- sound/soc/codecs/wm8958-dsp2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c index e4018ba3b19a2..7878c7a58ff10 100644 --- a/sound/soc/codecs/wm8958-dsp2.c +++ b/sound/soc/codecs/wm8958-dsp2.c @@ -530,7 +530,7 @@ static int wm8958_mbc_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, mbc, wm8994->mbc_ena[mbc]); - return 0; + return 1; } #define WM8958_MBC_SWITCH(xname, xval) {\ @@ -656,7 +656,7 @@ static int wm8958_vss_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, vss, wm8994->vss_ena[vss]); - return 0; + return 1; } @@ -730,7 +730,7 @@ static int wm8958_hpf_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, hpf % 3, ucontrol->value.integer.value[0]); - return 0; + return 1; } #define WM8958_HPF_SWITCH(xname, xval) {\ @@ -824,7 +824,7 @@ static int wm8958_enh_eq_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, eq, ucontrol->value.integer.value[0]); - return 0; + return 1; } #define WM8958_ENH_EQ_SWITCH(xname, xval) {\ From 679ab61bf5f5f519377d812afb4fb93634782c74 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Mon, 18 Apr 2022 23:33:22 +0800 Subject: [PATCH 023/491] RDMA/irdma: Fix deadlock in irdma_cleanup_cm_core() There is a deadlock in irdma_cleanup_cm_core(), which is shown below: (Thread 1) | (Thread 2) | irdma_schedule_cm_timer() irdma_cleanup_cm_core() | add_timer() spin_lock_irqsave() //(1) | (wait a time) ... | irdma_cm_timer_tick() del_timer_sync() | spin_lock_irqsave() //(2) (wait timer to stop) | ... We hold cm_core->ht_lock in position (1) of thread 1 and use del_timer_sync() to wait timer to stop, but timer handler also need cm_core->ht_lock in position (2) of thread 2. As a result, irdma_cleanup_cm_core() will block forever. This patch removes the check of timer_pending() in irdma_cleanup_cm_core(), because the del_timer_sync() function will just return directly if there isn't a pending timer. As a result, the lock is redundant, because there is no resource it could protect. Link: https://lore.kernel.org/r/20220418153322.42524-1-duoming@zju.edu.cn Signed-off-by: Duoming Zhou Reviewed-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/cm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index dedb3b7edd8d6..a98d962e5efb6 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -3246,15 +3246,10 @@ int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver) */ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core) { - unsigned long flags; - if (!cm_core) return; - spin_lock_irqsave(&cm_core->ht_lock, flags); - if (timer_pending(&cm_core->tcp_timer)) - del_timer_sync(&cm_core->tcp_timer); - spin_unlock_irqrestore(&cm_core->ht_lock, flags); + del_timer_sync(&cm_core->tcp_timer); destroy_workqueue(cm_core->event_wq); cm_core->dev->ws_reset(&cm_core->iwdev->vsi); From 95d4782c34a60800ccf91d9f0703137d4367a2fc Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 19 Apr 2022 14:01:58 -0700 Subject: [PATCH 024/491] iommu/arm-smmu-v3: Fix size calculation in arm_smmu_mm_invalidate_range() The arm_smmu_mm_invalidate_range function is designed to be called by mm core for Shared Virtual Addressing purpose between IOMMU and CPU MMU. However, the ways of two subsystems defining their "end" addresses are slightly different. IOMMU defines its "end" address using the last address of an address range, while mm core defines that using the following address of an address range: include/linux/mm_types.h: unsigned long vm_end; /* The first byte after our end address ... This mismatch resulted in an incorrect calculation for size so it failed to be page-size aligned. Further, it caused a dead loop at "while (iova < end)" check in __arm_smmu_tlb_inv_range function. This patch fixes the issue by doing the calculation correctly. Fixes: 2f7e8c553e98 ("iommu/arm-smmu-v3: Hook up ATC invalidation to mm ops") Cc: stable@vger.kernel.org Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Reviewed-by: Robin Murphy Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20220419210158.21320-1-nicolinc@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 22ddd05bbdcd0..c623dae1e1154 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -183,7 +183,14 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, { struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); struct arm_smmu_domain *smmu_domain = smmu_mn->domain; - size_t size = end - start + 1; + size_t size; + + /* + * The mm_types defines vm_end as the first byte after the end address, + * different from IOMMU subsystem using the last address of an address + * range. So do a simple translation here by calculating size correctly. + */ + size = end - start; if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid, From 3756aa16fadaef2873cfbd2659dfa1978a7e1859 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 12 Apr 2022 13:16:58 +0200 Subject: [PATCH 025/491] ASoC: simple-card-utils: fix sysclk shutdown In asoc_simple_shutdown() the snd_soc_dai_set_sysclk() function is called twice with input direction SND_SOC_CLOCK_IN. Restore one call with output direction SND_SOC_CLOCK_OUT. Fixes: 5ca2ab459817 ("ASoC: simple-card-utils: Add new system-clock-fixed flag") Signed-off-by: Olivier Moysan Link: https://lore.kernel.org/r/20220412111658.11015-1-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index f2157944247f7..da0c27828ce60 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -322,7 +322,7 @@ void asoc_simple_shutdown(struct snd_pcm_substream *substream) if (props->mclk_fs && !dai->clk_fixed && !snd_soc_dai_active(cpu_dai)) snd_soc_dai_set_sysclk(cpu_dai, - 0, 0, SND_SOC_CLOCK_IN); + 0, 0, SND_SOC_CLOCK_OUT); asoc_simple_clk_disable(dai); } From 570a4bf7440e9fb2a4164244a6bf60a46362b627 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 18 Apr 2022 12:41:04 -0500 Subject: [PATCH 026/491] RDMA/rxe: Recheck the MR in when generating a READ reply The rping benchmark fails on long runs. The root cause of this failure has been traced to a failure to compute a nonzero value of mr in rare situations. Fix this failure by correctly handling the computation of mr in read_reply() in rxe_resp.c in the replay flow. Fixes: 8a1a0be894da ("RDMA/rxe: Replace mr by rkey in responder resources") Link: https://lore.kernel.org/r/20220418174103.3040-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 1d95fab606da3..9cd0eaff98dea 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -745,8 +745,14 @@ static enum resp_states read_reply(struct rxe_qp *qp, } if (res->state == rdatm_res_state_new) { - mr = qp->resp.mr; - qp->resp.mr = NULL; + if (!res->replay) { + mr = qp->resp.mr; + qp->resp.mr = NULL; + } else { + mr = rxe_recheck_mr(qp, res->read.rkey); + if (!mr) + return RESPST_ERR_RKEY_VIOLATION; + } if (res->read.resid <= mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; From 08ef48404965cfef99343d6bbbcf75b88c74aa0e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 20 Apr 2022 14:34:37 +0100 Subject: [PATCH 027/491] ASoC: da7219: Fix change notifications for tone generator frequency The tone generator frequency control just returns 0 on successful write, not a boolean value indicating if there was a change or not. Compare what was written with the value that was there previously so that notifications are generated appropriately when the value changes. Signed-off-by: Mark Brown Reviewed-by: Adam Thomson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220420133437.569229-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/da7219.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/da7219.c b/sound/soc/codecs/da7219.c index 13009d08b09ac..c7493549a9a50 100644 --- a/sound/soc/codecs/da7219.c +++ b/sound/soc/codecs/da7219.c @@ -446,7 +446,7 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol, struct soc_mixer_control *mixer_ctrl = (struct soc_mixer_control *) kcontrol->private_value; unsigned int reg = mixer_ctrl->reg; - __le16 val; + __le16 val_new, val_old; int ret; /* @@ -454,13 +454,19 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol, * Therefore we need to convert to little endian here to align with * HW registers. */ - val = cpu_to_le16(ucontrol->value.integer.value[0]); + val_new = cpu_to_le16(ucontrol->value.integer.value[0]); mutex_lock(&da7219->ctrl_lock); - ret = regmap_raw_write(da7219->regmap, reg, &val, sizeof(val)); + ret = regmap_raw_read(da7219->regmap, reg, &val_old, sizeof(val_old)); + if (ret == 0 && (val_old != val_new)) + ret = regmap_raw_write(da7219->regmap, reg, + &val_new, sizeof(val_new)); mutex_unlock(&da7219->ctrl_lock); - return ret; + if (ret < 0) + return ret; + + return val_old != val_new; } From 2e3a0d1bfa95b54333f7add3e50e288769373873 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 21 Apr 2022 13:38:01 +0100 Subject: [PATCH 028/491] ASoC: meson: Fix event generation for AUI ACODEC mux The AIU ACODEC has a custom put() operation which returns 0 when the value of the mux changes, meaning that events are not generated for userspace. Change to return 1 in this case, the function returns early in the case where there is no change. Signed-off-by: Mark Brown Reviewed-by: Jerome Brunet Link: https://lore.kernel.org/r/20220421123803.292063-2-broonie@kernel.org Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/meson/aiu-acodec-ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/meson/aiu-acodec-ctrl.c b/sound/soc/meson/aiu-acodec-ctrl.c index 22e181646bc39..3776b073a3dbb 100644 --- a/sound/soc/meson/aiu-acodec-ctrl.c +++ b/sound/soc/meson/aiu-acodec-ctrl.c @@ -58,7 +58,7 @@ static int aiu_acodec_ctrl_mux_put_enum(struct snd_kcontrol *kcontrol, snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); - return 0; + return 1; } static SOC_ENUM_SINGLE_DECL(aiu_acodec_ctrl_mux_enum, AIU_ACODEC_CTRL, From fce49921a22262736cdc3cc74fa67915b75e9363 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 21 Apr 2022 13:38:02 +0100 Subject: [PATCH 029/491] ASoC: meson: Fix event generation for AUI CODEC mux The AIU CODEC has a custom put() operation which returns 0 when the value of the mux changes, meaning that events are not generated for userspace. Change to return 1 in this case, the function returns early in the case where there is no change. Signed-off-by: Mark Brown Reviewed-by: Jerome Brunet Link: https://lore.kernel.org/r/20220421123803.292063-3-broonie@kernel.org Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/meson/aiu-codec-ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/meson/aiu-codec-ctrl.c b/sound/soc/meson/aiu-codec-ctrl.c index 59ee66fc2bcd7..286ac4983d40c 100644 --- a/sound/soc/meson/aiu-codec-ctrl.c +++ b/sound/soc/meson/aiu-codec-ctrl.c @@ -57,7 +57,7 @@ static int aiu_codec_ctrl_mux_put_enum(struct snd_kcontrol *kcontrol, snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); - return 0; + return 1; } static SOC_ENUM_SINGLE_DECL(aiu_hdmi_ctrl_mux_enum, AIU_HDMI_CLK_DATA_CTRL, From 12131008fc13ff7f7690d170b7a8f72d24fd7d1e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 21 Apr 2022 13:38:03 +0100 Subject: [PATCH 030/491] ASoC: meson: Fix event generation for G12A tohdmi mux The G12A tohdmi has a custom put() operation which returns 0 when the value of the mux changes, meaning that events are not generated for userspace. Change to return 1 in this case, the function returns early in the case where there is no change. Signed-off-by: Mark Brown Reviewed-by: Jerome Brunet Link: https://lore.kernel.org/r/20220421123803.292063-4-broonie@kernel.org Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/meson/g12a-tohdmitx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/meson/g12a-tohdmitx.c b/sound/soc/meson/g12a-tohdmitx.c index 9b2b59536ced0..6c99052feafd8 100644 --- a/sound/soc/meson/g12a-tohdmitx.c +++ b/sound/soc/meson/g12a-tohdmitx.c @@ -67,7 +67,7 @@ static int g12a_tohdmitx_i2s_mux_put_enum(struct snd_kcontrol *kcontrol, snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); - return 0; + return 1; } static SOC_ENUM_SINGLE_DECL(g12a_tohdmitx_i2s_mux_enum, TOHDMITX_CTRL0, From eb5773201b1c5d603424bd21f161c8c2d1075b42 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 21 Apr 2022 11:23:28 -0500 Subject: [PATCH 031/491] ASoC: soc-ops: fix error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cppcheck throws the following warning: sound/soc/soc-ops.c:461:8: style: Variable 'ret' is assigned a value that is never used. [unreadVariable] ret = err; ^ This seems to be a missing change in the return value. Fixes: 7f3d90a351968 ("ASoC: ops: Fix stereo change notifications in snd_soc_put_volsw_sx()") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Rander Wang Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20220421162328.302017-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index a0ca58ba16273..58347eadd219b 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -461,7 +461,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, ret = err; } } - return err; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_put_volsw_sx); From c26830b6c5c534d273ce007eb33d5a2d2ad4e969 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 21 Apr 2022 17:57:24 +0200 Subject: [PATCH 032/491] ASoC: meson: axg-tdm-interface: Fix formatters in trigger" This reverts commit bf5e4887eeddb48480568466536aa08ec7f179a5 because the following and required commit e138233e56e9829e65b6293887063a1a3ccb2d68 causes the following system crash when using audio: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:282 Fixes: bf5e4887eeddb4848056846 ("ASoC: meson: axg-tdm-interface: manage formatters in trigger") Reported-by: Dmitry Shmidt Signed-off-by: Neil Armstrong Acked-by: Jerome Brunet Link: https://lore.kernel.org/r/20220421155725.2589089-1-narmstrong@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-tdm-interface.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index 0c31934a96301..e076ced300257 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -351,29 +351,13 @@ static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream, return 0; } -static int axg_tdm_iface_trigger(struct snd_pcm_substream *substream, - int cmd, +static int axg_tdm_iface_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct axg_tdm_stream *ts = - snd_soc_dai_get_dma_data(dai, substream); - - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - axg_tdm_stream_start(ts); - break; - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - case SNDRV_PCM_TRIGGER_STOP: - axg_tdm_stream_stop(ts); - break; - default: - return -EINVAL; - } + struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); - return 0; + /* Force all attached formatters to update */ + return axg_tdm_stream_reset(ts); } static int axg_tdm_iface_remove_dai(struct snd_soc_dai *dai) @@ -413,8 +397,8 @@ static const struct snd_soc_dai_ops axg_tdm_iface_ops = { .set_fmt = axg_tdm_iface_set_fmt, .startup = axg_tdm_iface_startup, .hw_params = axg_tdm_iface_hw_params, + .prepare = axg_tdm_iface_prepare, .hw_free = axg_tdm_iface_hw_free, - .trigger = axg_tdm_iface_trigger, }; /* TDM Backend DAIs */ From 0c9b152c72e53016e96593bdbb8cffe2176694b9 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 21 Apr 2022 17:57:25 +0200 Subject: [PATCH 033/491] ASoC: meson: axg-card: Fix nonatomic links This commit e138233e56e9829e65b6293887063a1a3ccb2d68 causes the following system crash when using audio on G12A/G12B & SM1 systems: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:282 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/0 preempt_count: 10001, expected: 0 RCU nest depth: 0, expected: 0 Preemption disabled at: schedule_preempt_disabled+0x20/0x2c mutex_lock+0x24/0x60 _snd_pcm_stream_lock_irqsave+0x20/0x3c snd_pcm_period_elapsed+0x24/0xa4 axg_fifo_pcm_irq_block+0x64/0xdc __handle_irq_event_percpu+0x104/0x264 handle_irq_event+0x48/0xb4 ... start_kernel+0x3f0/0x484 __primary_switched+0xc0/0xc8 Revert this commit until the crash is fixed. Fixes: e138233e56e9829e65b6 ("ASoC: meson: axg-card: make links nonatomic") Reported-by: Dmitry Shmidt Signed-off-by: Neil Armstrong Acked-by: Jerome Brunet Link: https://lore.kernel.org/r/20220421155725.2589089-2-narmstrong@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-card.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/meson/axg-card.c b/sound/soc/meson/axg-card.c index cbbaa55d92a66..2b77010c2c5ce 100644 --- a/sound/soc/meson/axg-card.c +++ b/sound/soc/meson/axg-card.c @@ -320,7 +320,6 @@ static int axg_card_add_link(struct snd_soc_card *card, struct device_node *np, dai_link->cpus = cpu; dai_link->num_cpus = 1; - dai_link->nonatomic = true; ret = meson_card_parse_dai(card, np, &dai_link->cpus->of_node, &dai_link->cpus->dai_name); From 60cc5468daaefc18ffc081dc484bdaa1bd270561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 21 Apr 2022 14:32:54 -0300 Subject: [PATCH 034/491] =?UTF-8?q?futex:=20MAINTAINERS,=20.mailmap:=20Upd?= =?UTF-8?q?ate=20Andr=C3=A9's=20email=20address?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update futex entry to use my new professional email address. Signed-off-by: André Almeida Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220421173254.29855-1-andrealmeid@igalia.com --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 93458154ce7d4..ea1ba4a9a77e0 100644 --- a/.mailmap +++ b/.mailmap @@ -45,6 +45,7 @@ Andrey Konovalov Andrey Ryabinin Andrey Ryabinin Andrzej Hajda +André Almeida Andy Adamson Antoine Tenart Antoine Tenart diff --git a/MAINTAINERS b/MAINTAINERS index 40fa1955ca3f5..35dea3d129811 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8109,7 +8109,7 @@ M: Ingo Molnar R: Peter Zijlstra R: Darren Hart R: Davidlohr Bueso -R: André Almeida +R: André Almeida L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core From 214cab6f8020a9ad4a5e9862a4e68088d5a79f08 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 18 Apr 2022 21:20:16 +0000 Subject: [PATCH 035/491] MAINTAINERS: Update email address for John Stultz I've switched jobs, so update my email address in MAINTAINERS Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Acked-by: Sumit Semwal Link: https://lore.kernel.org/r/20220418212016.2669086-1-jstultz@google.com --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 40fa1955ca3f5..c2e1b72024491 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5914,7 +5914,7 @@ R: Benjamin Gaignard R: Liam Mark R: Laura Abbott R: Brian Starkey -R: John Stultz +R: John Stultz L: linux-media@vger.kernel.org L: dri-devel@lists.freedesktop.org L: linaro-mm-sig@lists.linaro.org (moderated for non-subscribers) @@ -6584,7 +6584,7 @@ F: drivers/gpu/drm/gma500/ DRM DRIVERS FOR HISILICON M: Xinliang Liu M: Tian Tao -R: John Stultz +R: John Stultz R: Xinwei Kong R: Chen Feng L: dri-devel@lists.freedesktop.org @@ -8845,7 +8845,7 @@ F: Documentation/devicetree/bindings/net/hisilicon*.txt F: drivers/net/ethernet/hisilicon/ HIKEY960 ONBOARD USB GPIO HUB DRIVER -M: John Stultz +M: John Stultz L: linux-kernel@vger.kernel.org S: Maintained F: drivers/misc/hisi_hikey_usb.c @@ -19784,7 +19784,7 @@ F: drivers/net/wireless/ti/ F: include/linux/wl12xx.h TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER -M: John Stultz +M: John Stultz M: Thomas Gleixner R: Stephen Boyd L: linux-kernel@vger.kernel.org From a6ac60b36dade525c13c5bb0838589619533efb7 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 22 Apr 2022 15:39:37 +0800 Subject: [PATCH 036/491] ALSA: hda/realtek: Fix mute led issue on thinkpad with cs35l41 s-codec The quirk ALC287_FIXUP_CS35L41_I2C_2 needs to chain the quirk ALC269_FIXUP_THINKPAD_ACPI, otherwise the mute led will not work if a thinkpad machine applies that quirk. And it will be safe if non-thinkpad machines apply that quirk since hda_fixup_thinkpad_acpi() will check and return in this case. Fixes: ae7abe36e352e ("ALSA: hda/realtek: Add CS35L41 support for Thinkpad laptops") Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20220422073937.10073-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4c0c593f3c0a9..f9c3b2c9ca123 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8769,6 +8769,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC287_FIXUP_CS35L41_I2C_2] = { .type = HDA_FIXUP_FUNC, .v.func = cs35l41_fixup_i2c_two, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI, }, [ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED] = { .type = HDA_FIXUP_FUNC, From 5f5d8890789c90470d9571a283f0b789acd594af Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Fri, 22 Apr 2022 17:08:43 +0800 Subject: [PATCH 037/491] ALSA: hda/realtek: Enable mute/micmute LEDs support for HP Laptops On HP Laptops, requires the same ALC285_FIXUP_HP_GPIO_LED quirk to make its audio LEDs work. So apply the quirk, and make it the last one since it's an LED quirk. Signed-off-by: Andy Chi Fixes: 07bcab93946c ("ALSA: hda/realtek: Add support for HP Laptops") Link: https://lore.kernel.org/r/20220422090845.230071-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f9c3b2c9ca123..c65d3dbc6cc96 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9025,12 +9025,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8971, "HP EliteBook 830 G9", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8972, "HP EliteBook 840 G9", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8971, "HP EliteBook 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8972, "HP EliteBook 840 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), From 4a25f2ea0e030b2fc852c4059a50181bfc5b2f57 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Thu, 21 Apr 2022 13:45:04 +0530 Subject: [PATCH 038/491] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu Tegra194 and Tegra234 SoCs have the erratum that causes walk cache entries to not be invalidated correctly. The problem is that the walk cache index generated for IOVA is not same across translation and invalidation requests. This is leading to page faults when PMD entry is released during unmap and populated with new PTE table during subsequent map request. Disabling large page mappings avoids the release of PMD entry and avoid translations seeing stale PMD entry in walk cache. Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and Tegra234 devices. This is recommended fix from Tegra hardware design team. Acked-by: Robin Murphy Reviewed-by: Krishna Reddy Co-developed-by: Pritesh Raithatha Signed-off-by: Pritesh Raithatha Signed-off-by: Ashish Mhetre Link: https://lore.kernel.org/r/20220421081504.24678-1-amhetre@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 30 ++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 01e9b50b10a18..87bf522b9d2ee 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -258,6 +258,34 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi dev_name(dev), err); } +static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain, + struct io_pgtable_cfg *pgtbl_cfg, + struct device *dev) +{ + struct arm_smmu_device *smmu = smmu_domain->smmu; + const struct device_node *np = smmu->dev->of_node; + + /* + * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache + * entries to not be invalidated correctly. The problem is that the walk + * cache index generated for IOVA is not same across translation and + * invalidation requests. This is leading to page faults when PMD entry + * is released during unmap and populated with new PTE table during + * subsequent map request. Disabling large page mappings avoids the + * release of PMD entry and avoid translations seeing stale PMD entry in + * walk cache. + * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and + * Tegra234. + */ + if (of_device_is_compatible(np, "nvidia,tegra234-smmu") || + of_device_is_compatible(np, "nvidia,tegra194-smmu")) { + smmu->pgsize_bitmap = PAGE_SIZE; + pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap; + } + + return 0; +} + static const struct arm_smmu_impl nvidia_smmu_impl = { .read_reg = nvidia_smmu_read_reg, .write_reg = nvidia_smmu_write_reg, @@ -268,10 +296,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = { .global_fault = nvidia_smmu_global_fault, .context_fault = nvidia_smmu_context_fault, .probe_finalize = nvidia_smmu_probe_finalize, + .init_context = nvidia_smmu_init_context, }; static const struct arm_smmu_impl nvidia_smmu_single_impl = { .probe_finalize = nvidia_smmu_probe_finalize, + .init_context = nvidia_smmu_init_context, }; struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu) From 87c18514bb8477563a61f50b4285da156296edc4 Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Fri, 22 Apr 2022 14:26:50 +0800 Subject: [PATCH 039/491] ASoC: rt9120: Correct the reg 0x09 size to one byte Correct the reg 0x09 size to one byte. Signed-off-by: ChiYuan Huang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1650608810-3829-1-git-send-email-u0084500@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt9120.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/rt9120.c b/sound/soc/codecs/rt9120.c index 7aa1772a915f3..6e0d7cf0c8c92 100644 --- a/sound/soc/codecs/rt9120.c +++ b/sound/soc/codecs/rt9120.c @@ -341,7 +341,6 @@ static int rt9120_get_reg_size(unsigned int reg) { switch (reg) { case 0x00: - case 0x09: case 0x20 ... 0x27: return 2; case 0x30 ... 0x3D: From e13433b4416fa31a24e621cbbbb39227a3d651dd Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 21 Apr 2022 10:32:34 -0400 Subject: [PATCH 040/491] SUNRPC release the transport of a relocated task with an assigned transport A relocated task must release its previous transport. Fixes: 82ee41b85cef1 ("SUNRPC don't resend a task on an offlined transport") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index af0174d7ce5a8..98133aa54f198 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1065,10 +1065,13 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt) static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { - if (task->tk_xprt && - !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && - (task->tk_flags & RPC_TASK_MOVEABLE))) - return; + if (task->tk_xprt) { + if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && + (task->tk_flags & RPC_TASK_MOVEABLE))) + return; + xprt_release(task); + xprt_put(task->tk_xprt); + } if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) task->tk_xprt = rpc_task_get_first_xprt(clnt); else From 7635a1ad8d92dcc8247b53f949e37795154b5b6f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 11 Apr 2022 08:42:10 -0700 Subject: [PATCH 041/491] iwlwifi: iwl-dbg: Use del_timer_sync() before freeing In Chrome OS, a large number of crashes is observed due to corrupted timer lists. Steven Rostedt pointed out that this usually happens when a timer is freed while still active, and that the problem is often triggered by code calling del_timer() instead of del_timer_sync() just before freeing. Steven also identified the iwlwifi driver as one of the possible culprits since it does exactly that. Reported-by: Steven Rostedt Cc: Steven Rostedt Cc: Johannes Berg Cc: Gregory Greenman Fixes: 60e8abd9d3e91 ("iwlwifi: dbg_ini: add periodic trigger new API support") Signed-off-by: Guenter Roeck Acked-by: Gregory Greenman Tested-by: Sedat Dilek # Linux v5.17.3-rc1 and Debian LLVM-14 Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220411154210.1870008-1-linux@roeck-us.net --- drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index 866a33f49915f..3237d4b528b5d 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -371,7 +371,7 @@ void iwl_dbg_tlv_del_timers(struct iwl_trans *trans) struct iwl_dbg_tlv_timer_node *node, *tmp; list_for_each_entry_safe(node, tmp, timer_list, list) { - del_timer(&node->timer); + del_timer_sync(&node->timer); list_del(&node->list); kfree(node); } From 4dd4e6f659850f2df20b9612593f5a0f040549e1 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Tue, 12 Apr 2022 22:01:41 +0300 Subject: [PATCH 042/491] MAINTAINERS: update iwlwifi driver maintainer Set myself as a maintainer of iwlwifi driver as Luca is moving to a new role. Signed-off-by: Gregory Greenman Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220412190141.4543-1-gregory.greenman@intel.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c8fe97a8d60e9..6a7a839acfe33 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10129,7 +10129,7 @@ S: Supported F: drivers/net/wireless/intel/iwlegacy/ INTEL WIRELESS WIFI LINK (iwlwifi) -M: Luca Coelho +M: Gregory Greenman L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi From 70509676739f88317f3a171d9f9b5fce9d12e617 Mon Sep 17 00:00:00 2001 From: Mattijs Korpershoek Date: Sun, 24 Apr 2022 21:08:05 -0700 Subject: [PATCH 043/491] dt-bindings: input: mediatek,mt6779-keypad: update maintainer Fengping has no longer interest and time to maintain this driver so he agreed to transfer maintainership over to me. Signed-off-by: Mattijs Korpershoek Link: https://lore.kernel.org/r/20220421140255.2781505-1-mkorpershoek@baylibre.com Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/mediatek,mt6779-keypad.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml index b1770640f94bb..03ebd2665d078 100644 --- a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml +++ b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Mediatek's Keypad Controller device tree bindings maintainers: - - Fengping Yu + - Mattijs Korpershoek allOf: - $ref: "/schemas/input/matrix-keymap.yaml#" From b7c81f80246fac44077166f3e07103affe6db8ff Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Sat, 9 Apr 2022 13:12:41 +0900 Subject: [PATCH 044/491] firewire: fix potential uaf in outbound_phy_packet_callback() &e->event and e point to the same address, and &e->event could be freed in queue_event. So there is a potential uaf issue if we dereference e after calling queue_event(). Fix this by adding a temporary variable to maintain e->client in advance, this can avoid the potential uaf issue. Cc: Signed-off-by: Chengfeng Ye Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220409041243.603210-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- drivers/firewire/core-cdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 9f89c17730b12..708e417200f46 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1500,6 +1500,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, { struct outbound_phy_packet_event *e = container_of(packet, struct outbound_phy_packet_event, p); + struct client *e_client; switch (status) { /* expected: */ @@ -1516,9 +1517,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, } e->phy_packet.data[0] = packet->timestamp; + e_client = e->client; queue_event(e->client, &e->event, &e->phy_packet, sizeof(e->phy_packet) + e->phy_packet.length, NULL, 0); - client_put(e->client); + client_put(e_client); } static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg) From 9423973869bd4632ffe669f950510c49296656e0 Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Sat, 9 Apr 2022 13:12:42 +0900 Subject: [PATCH 045/491] firewire: remove check of list iterator against head past the loop body When list_for_each_entry() completes the iteration over the whole list without breaking the loop, the iterator value will be a bogus pointer computed based on the head element. While it is safe to use the pointer to determine if it was computed based on the head element, either with list_entry_is_head() or &pos->member == head, using the iterator variable after the loop should be avoided. In preparation to limit the scope of a list iterator to the list traversal loop, use a dedicated pointer to point to the found element [1]. Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Cc: Signed-off-by: Jakob Koschel Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220409041243.603210-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- drivers/firewire/core-transaction.c | 30 +++++++++++++++-------------- drivers/firewire/sbp2.c | 13 +++++++------ 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index ac487c96bb717..6c20815cc8d16 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -73,24 +73,25 @@ static int try_cancel_split_timeout(struct fw_transaction *t) static int close_transaction(struct fw_transaction *transaction, struct fw_card *card, int rcode) { - struct fw_transaction *t; + struct fw_transaction *t = NULL, *iter; unsigned long flags; spin_lock_irqsave(&card->lock, flags); - list_for_each_entry(t, &card->transaction_list, link) { - if (t == transaction) { - if (!try_cancel_split_timeout(t)) { + list_for_each_entry(iter, &card->transaction_list, link) { + if (iter == transaction) { + if (!try_cancel_split_timeout(iter)) { spin_unlock_irqrestore(&card->lock, flags); goto timed_out; } - list_del_init(&t->link); - card->tlabel_mask &= ~(1ULL << t->tlabel); + list_del_init(&iter->link); + card->tlabel_mask &= ~(1ULL << iter->tlabel); + t = iter; break; } } spin_unlock_irqrestore(&card->lock, flags); - if (&t->link != &card->transaction_list) { + if (t) { t->callback(card, rcode, NULL, 0, t->callback_data); return 0; } @@ -935,7 +936,7 @@ EXPORT_SYMBOL(fw_core_handle_request); void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) { - struct fw_transaction *t; + struct fw_transaction *t = NULL, *iter; unsigned long flags; u32 *data; size_t data_length; @@ -947,20 +948,21 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) rcode = HEADER_GET_RCODE(p->header[1]); spin_lock_irqsave(&card->lock, flags); - list_for_each_entry(t, &card->transaction_list, link) { - if (t->node_id == source && t->tlabel == tlabel) { - if (!try_cancel_split_timeout(t)) { + list_for_each_entry(iter, &card->transaction_list, link) { + if (iter->node_id == source && iter->tlabel == tlabel) { + if (!try_cancel_split_timeout(iter)) { spin_unlock_irqrestore(&card->lock, flags); goto timed_out; } - list_del_init(&t->link); - card->tlabel_mask &= ~(1ULL << t->tlabel); + list_del_init(&iter->link); + card->tlabel_mask &= ~(1ULL << iter->tlabel); + t = iter; break; } } spin_unlock_irqrestore(&card->lock, flags); - if (&t->link == &card->transaction_list) { + if (!t) { timed_out: fw_notice(card, "unsolicited response (source %x, tlabel %x)\n", source, tlabel); diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 85cd379fd3838..60051c0cabeaa 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -408,7 +408,7 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request, void *payload, size_t length, void *callback_data) { struct sbp2_logical_unit *lu = callback_data; - struct sbp2_orb *orb; + struct sbp2_orb *orb = NULL, *iter; struct sbp2_status status; unsigned long flags; @@ -433,17 +433,18 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request, /* Lookup the orb corresponding to this status write. */ spin_lock_irqsave(&lu->tgt->lock, flags); - list_for_each_entry(orb, &lu->orb_list, link) { + list_for_each_entry(iter, &lu->orb_list, link) { if (STATUS_GET_ORB_HIGH(status) == 0 && - STATUS_GET_ORB_LOW(status) == orb->request_bus) { - orb->rcode = RCODE_COMPLETE; - list_del(&orb->link); + STATUS_GET_ORB_LOW(status) == iter->request_bus) { + iter->rcode = RCODE_COMPLETE; + list_del(&iter->link); + orb = iter; break; } } spin_unlock_irqrestore(&lu->tgt->lock, flags); - if (&orb->link != &lu->orb_list) { + if (orb) { orb->callback(orb, &status); kref_put(&orb->kref, free_orb); /* orb callback reference */ } else { From a7ecbe92b9243edbe94772f6f2c854e4142a3345 Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Sat, 9 Apr 2022 13:12:43 +0900 Subject: [PATCH 046/491] firewire: core: extend card->lock in fw_core_handle_bus_reset card->local_node and card->bm_retries are both always accessed under card->lock. fw_core_handle_bus_reset has a check whose condition depends on card->local_node and whose body writes to card->bm_retries. Both of these accesses are not under card->lock. Move the lock acquiring of card->lock to before this check such that these accesses do happen when card->lock is held. fw_destroy_nodes is called inside the check. Since fw_destroy_nodes already acquires card->lock inside its function body, move this out to the callsites of fw_destroy_nodes. Also add a comment to indicate which locking is necessary when calling fw_destroy_nodes. Cc: Signed-off-by: Niels Dossche Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220409041243.603210-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- drivers/firewire/core-card.c | 3 +++ drivers/firewire/core-topology.c | 9 +++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 54be88167c60b..f3b3953cac834 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -668,6 +668,7 @@ EXPORT_SYMBOL_GPL(fw_card_release); void fw_core_remove_card(struct fw_card *card) { struct fw_card_driver dummy_driver = dummy_driver_template; + unsigned long flags; card->driver->update_phy_reg(card, 4, PHY_LINK_ACTIVE | PHY_CONTENDER, 0); @@ -682,7 +683,9 @@ void fw_core_remove_card(struct fw_card *card) dummy_driver.stop_iso = card->driver->stop_iso; card->driver = &dummy_driver; + spin_lock_irqsave(&card->lock, flags); fw_destroy_nodes(card); + spin_unlock_irqrestore(&card->lock, flags); /* Wait for all users, especially device workqueue jobs, to finish. */ fw_card_put(card); diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index b63d55f5ebd33..f40c815343812 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -375,16 +375,13 @@ static void report_found_node(struct fw_card *card, card->bm_retries = 0; } +/* Must be called with card->lock held */ void fw_destroy_nodes(struct fw_card *card) { - unsigned long flags; - - spin_lock_irqsave(&card->lock, flags); card->color++; if (card->local_node != NULL) for_each_fw_node(card, card->local_node, report_lost_node); card->local_node = NULL; - spin_unlock_irqrestore(&card->lock, flags); } static void move_tree(struct fw_node *node0, struct fw_node *node1, int port) @@ -510,6 +507,8 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation, struct fw_node *local_node; unsigned long flags; + spin_lock_irqsave(&card->lock, flags); + /* * If the selfID buffer is not the immediate successor of the * previously processed one, we cannot reliably compare the @@ -521,8 +520,6 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation, card->bm_retries = 0; } - spin_lock_irqsave(&card->lock, flags); - card->broadcast_channel_allocated = card->broadcast_channel_auto_allocated; card->node_id = node_id; /* From 3b79954fd00d540677c97a560622b73f3a1f4e28 Mon Sep 17 00:00:00 2001 From: Zihao Wang Date: Sun, 24 Apr 2022 16:41:20 +0800 Subject: [PATCH 047/491] ALSA: hda/realtek: Add quirk for Yoga Duet 7 13ITL6 speakers Lenovo Yoga Duet 7 13ITL6 has Realtek ALC287 and built-in speakers do not work out of the box. The fix developed for Yoga 7i 14ITL5 also enables speaker output for this model. Signed-off-by: Zihao Wang Cc: Link: https://lore.kernel.org/r/20220424084120.74125-1-wzhd@ustc.edu Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c65d3dbc6cc96..cf531c1efa132 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9247,6 +9247,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), From eb9d84b0ffe39893cb23b0b6712bbe3637fa25fa Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 24 Apr 2022 19:24:28 +0900 Subject: [PATCH 048/491] ALSA: fireworks: fix wrong return count shorter than expected by 4 bytes ALSA fireworks driver has a bug in its initial state to return count shorter than expected by 4 bytes to userspace applications when handling response frame for Echo Audio Fireworks transaction. It's due to missing addition of the size for the type of event in ALSA firewire stack. Fixes: 555e8a8f7f14 ("ALSA: fireworks: Add command/response functionality into hwdep interface") Cc: Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220424102428.21109-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/fireworks/fireworks_hwdep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c index 626c0c34b0b66..3a53914277d35 100644 --- a/sound/firewire/fireworks/fireworks_hwdep.c +++ b/sound/firewire/fireworks/fireworks_hwdep.c @@ -34,6 +34,7 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained, type = SNDRV_FIREWIRE_EVENT_EFW_RESPONSE; if (copy_to_user(buf, &type, sizeof(type))) return -EFAULT; + count += sizeof(type); remained -= sizeof(type); buf += sizeof(type); From 2fbe467bcbfc760a08f08475eea6bbd4c2874319 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 20 Apr 2022 20:34:53 +0100 Subject: [PATCH 049/491] ASoC: max98090: Reject invalid values in custom control put() The max98090 driver has a custom put function for some controls which can only be updated in certain circumstances which makes no effort to validate that input is suitable for the control, allowing out of spec values to be written to the hardware and presented to userspace. Fix this by returning an error when invalid values are written. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220420193454.2647908-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index b45ec35cd63c3..6d9261346842f 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -413,6 +413,9 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol, val = (val >> mc->shift) & mask; + if (sel < 0 || sel > mc->max) + return -EINVAL; + *select = sel; /* Setting a volume is only valid if it is already On */ From 13fcf676d9e102594effc686d98521ff5c90b925 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 20 Apr 2022 20:34:54 +0100 Subject: [PATCH 050/491] ASoC: max98090: Generate notifications on changes for custom control The max98090 driver has some custom controls which share a put() function which returns 0 unconditionally, meaning that events are not generated when the value changes. Fix that. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220420193454.2647908-2-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 6d9261346842f..62b41ca050a20 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -430,7 +430,7 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol, mask << mc->shift, sel << mc->shift); - return 0; + return *select != val; } static const char *max98090_perf_pwr_text[] = From 2bde1985e39173d8cb64005dad6f34e9bee4c750 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Thu, 21 Apr 2022 15:54:03 +0300 Subject: [PATCH 051/491] ASoC: atmel: mchp-pdmc: set prepare_slave_config Since a pointer to struct snd_dmaengine_pcm_config is passed, snd_dmaengine_pcm_prepare_slave_config() is no longer called unless it's explicitly set in prepare_slave_config. Fixes: 50291652af52 ("ASoC: atmel: mchp-pdmc: add PDMC driver") Suggested-by: Sascha Hauer Signed-off-by: Codrin Ciubotariu Link: https://lore.kernel.org/r/20220421125403.2180824-2-codrin.ciubotariu@microchip.com Signed-off-by: Mark Brown --- sound/soc/atmel/mchp-pdmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/atmel/mchp-pdmc.c b/sound/soc/atmel/mchp-pdmc.c index 1a7802fbf23c1..a3856c73e2214 100644 --- a/sound/soc/atmel/mchp-pdmc.c +++ b/sound/soc/atmel/mchp-pdmc.c @@ -966,6 +966,7 @@ static int mchp_pdmc_process(struct snd_pcm_substream *substream, static struct snd_dmaengine_pcm_config mchp_pdmc_config = { .process = mchp_pdmc_process, + .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config, }; static int mchp_pdmc_probe(struct platform_device *pdev) From 660564fc9a92a893a14f255be434f7ea0b967901 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Thu, 21 Apr 2022 15:54:02 +0300 Subject: [PATCH 052/491] ASoC: dmaengine: Restore NULL prepare_slave_config() callback As pointed out by Sascha Hauer, this patch changes: if (pmc->config && !pcm->config->prepare_slave_config) to: if (pmc->config && !pcm->config->prepare_slave_config) snd_dmaengine_pcm_prepare_slave_config() This breaks the drivers that do not need a call to dmaengine_slave_config(). Drivers that still need to call snd_dmaengine_pcm_prepare_slave_config(), but have a NULL pcm->config->prepare_slave_config should use snd_dmaengine_pcm_prepare_slave_config() as their prepare_slave_config callback. Fixes: 9a1e13440a4f ("ASoC: dmaengine: do not use a NULL prepare_slave_config() callback") Reported-by: Sascha Hauer Signed-off-by: Codrin Ciubotariu Link: https://lore.kernel.org/r/20220421125403.2180824-1-codrin.ciubotariu@microchip.com Signed-off-by: Mark Brown --- sound/soc/soc-generic-dmaengine-pcm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index 2ab2ddc1294dd..285441d6aeed4 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -86,10 +86,10 @@ static int dmaengine_pcm_hw_params(struct snd_soc_component *component, memset(&slave_config, 0, sizeof(slave_config)); - if (pcm->config && pcm->config->prepare_slave_config) - prepare_slave_config = pcm->config->prepare_slave_config; - else + if (!pcm->config) prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config; + else + prepare_slave_config = pcm->config->prepare_slave_config; if (prepare_slave_config) { int ret = prepare_slave_config(substream, params, &slave_config); From 75d2b2b06bd8407d03a3f126bc8b95eb356906c7 Mon Sep 17 00:00:00 2001 From: Adam Wujek Date: Wed, 20 Apr 2022 14:51:25 +0000 Subject: [PATCH 053/491] hwmon: (pmbus) disable PEC if not enabled Explicitly disable PEC when the client does not support it. The problematic scenario is the following. A device with enabled PEC support is up and running and a kernel driver is loaded. Then the driver is unloaded (or device unbound), the HW device is reconfigured externally (e.g. by i2cset) to advertise itself as not supporting PEC. Without a new code, at the second load of the driver (or bind) the "flags" variable is not updated to avoid PEC usage. As a consequence the further communication with the device is done with the PEC enabled, which is wrong and may fail. The implementation first disable the I2C_CLIENT_PEC flag, then the old code enable it if needed. Fixes: 4e5418f787ec ("hwmon: (pmbus_core) Check adapter PEC support") Signed-off-by: Adam Wujek Link: https://lore.kernel.org/r/20220420145059.431061-1-dev_public@wujek.eu Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index b2618b1d529e2..d93574d6a1fb6 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -2326,6 +2326,9 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data, data->has_status_word = true; } + /* Make sure PEC is disabled, will be enabled later if needed */ + client->flags &= ~I2C_CLIENT_PEC; + /* Enable PEC if the controller and bus supports it */ if (!(data->flags & PMBUS_NO_CAPABILITY)) { ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY); From 00c94ebec5925593c0377b941289224469e72ac7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Apr 2022 18:04:27 -0400 Subject: [PATCH 054/491] NFSv4: Don't invalidate inode attributes on delegation return There is no need to declare attributes such as the ctime, mtime and block size invalid when we're just returning a delegation, so it is inappropriate to call nfs_post_op_update_inode_force_wcc(). Instead, just call nfs_refresh_inode() after faking up the change attribute. We know that the GETATTR op occurs before the DELEGRETURN, so we are safe when doing this. Fixes: 0bc2c9b4dca9 ("NFSv4: Don't discard the attributes returned by asynchronous DELEGRETURN") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 16106f805ffa2..a79f66432bd39 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -363,6 +363,14 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent kunmap_atomic(start); } +static void nfs4_fattr_set_prechange(struct nfs_fattr *fattr, u64 version) +{ + if (!(fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) { + fattr->pre_change_attr = version; + fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; + } +} + static void nfs4_test_and_free_stateid(struct nfs_server *server, nfs4_stateid *stateid, const struct cred *cred) @@ -6553,7 +6561,9 @@ static void nfs4_delegreturn_release(void *calldata) pnfs_roc_release(&data->lr.arg, &data->lr.res, data->res.lr_ret); if (inode) { - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); + nfs4_fattr_set_prechange(&data->fattr, + inode_peek_iversion_raw(inode)); + nfs_refresh_inode(inode, &data->fattr); nfs_iput_and_deactive(inode); } kfree(calldata); From 73f1aaf114e63ee2baa94ac798d5764b15e4fdc5 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Wed, 13 Apr 2022 10:13:03 +0530 Subject: [PATCH 055/491] MAINTAINERS: omap: remove me as a maintainer The codeaurora.org domain is no longer valid, remove my id from the maintainers for OMAP PM frameworks. I haven't contributed to them in years, neither do I plan to in the future so not updating this with my new quicinc id. Signed-off-by: Rajendra Nayak Message-Id: <1649824983-29400-1-git-send-email-quic_rjendra@quicinc.com> Signed-off-by: Tony Lindgren --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fd768d43e0482..fc2de38b5087c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14363,7 +14363,6 @@ F: arch/arm/*omap*/*pm* F: drivers/cpufreq/omap-cpufreq.c OMAP POWERDOMAIN SOC ADAPTATION LAYER SUPPORT -M: Rajendra Nayak M: Paul Walmsley L: linux-omap@vger.kernel.org S: Maintained From 4bc31edebde51fcf8ad0794763b8679a7ecb5ec0 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 22 Apr 2022 10:08:53 -0700 Subject: [PATCH 056/491] mmc: core: Set HS clock speed before sending HS CMD13 Way back in commit 4f25580fb84d ("mmc: core: changes frequency to hs_max_dtr when selecting hs400es"), Rockchip engineers noticed that some eMMC don't respond to SEND_STATUS commands very reliably if they're still running at a low initial frequency. As mentioned in that commit, JESD84-B51 P49 suggests a sequence in which the host: 1. sets HS_TIMING 2. bumps the clock ("<= 52 MHz") 3. sends further commands It doesn't exactly require that we don't use a lower-than-52MHz frequency, but in practice, these eMMC don't like it. The aforementioned commit tried to get that right for HS400ES, although it's unclear whether this ever truly worked as committed into mainline, as other changes/refactoring adjusted the sequence in conflicting ways: 08573eaf1a70 ("mmc: mmc: do not use CMD13 to get status after speed mode switch") 53e60650f74e ("mmc: core: Allow CMD13 polling when switching to HS mode for mmc") In any case, today we do step 3 before step 2. Let's fix that, and also apply the same logic to HS200/400, where this eMMC has problems too. Resolves errors like this seen when booting some RK3399 Gru/Scarlet systems: [ 2.058881] mmc1: CQHCI version 5.10 [ 2.097545] mmc1: SDHCI controller on fe330000.mmc [fe330000.mmc] using ADMA [ 2.209804] mmc1: mmc_select_hs400es failed, error -84 [ 2.215597] mmc1: error -84 whilst initialising MMC card [ 2.417514] mmc1: mmc_select_hs400es failed, error -110 [ 2.423373] mmc1: error -110 whilst initialising MMC card [ 2.605052] mmc1: mmc_select_hs400es failed, error -110 [ 2.617944] mmc1: error -110 whilst initialising MMC card [ 2.835884] mmc1: mmc_select_hs400es failed, error -110 [ 2.841751] mmc1: error -110 whilst initialising MMC card Ealier versions of this patch bumped to 200MHz/HS200 speeds too early, which caused issues on, e.g., qcom-msm8974-fairphone-fp2. (Thanks for the report Luca!) After a second look, it appears that aligns with JESD84 / page 45 / table 28, so we need to keep to lower (HS / 52 MHz) rates first. Fixes: 08573eaf1a70 ("mmc: mmc: do not use CMD13 to get status after speed mode switch") Fixes: 53e60650f74e ("mmc: core: Allow CMD13 polling when switching to HS mode for mmc") Fixes: 4f25580fb84d ("mmc: core: changes frequency to hs_max_dtr when selecting hs400es") Cc: Shawn Lin Link: https://lore.kernel.org/linux-mmc/11962455.O9o76ZdvQC@g550jk/ Reported-by: Luca Weiss Signed-off-by: Brian Norris Tested-by: Luca Weiss Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220422100824.v4.1.I484f4ee35609f78b932bd50feed639c29e64997e@changeid Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index e7ea45386c22f..efa95dc4fc4ee 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1384,13 +1384,17 @@ static int mmc_select_hs400es(struct mmc_card *card) goto out_err; } + /* + * Bump to HS timing and frequency. Some cards don't handle + * SEND_STATUS reliably at the initial frequency. + */ mmc_set_timing(host, MMC_TIMING_MMC_HS); + mmc_set_bus_speed(card); + err = mmc_switch_status(card, true); if (err) goto out_err; - mmc_set_clock(host, card->ext_csd.hs_max_dtr); - /* Switch card to DDR with strobe bit */ val = EXT_CSD_DDR_BUS_WIDTH_8 | EXT_CSD_BUS_WIDTH_STROBE; err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, @@ -1448,7 +1452,7 @@ static int mmc_select_hs400es(struct mmc_card *card) static int mmc_select_hs200(struct mmc_card *card) { struct mmc_host *host = card->host; - unsigned int old_timing, old_signal_voltage; + unsigned int old_timing, old_signal_voltage, old_clock; int err = -EINVAL; u8 val; @@ -1479,8 +1483,17 @@ static int mmc_select_hs200(struct mmc_card *card) false, true, MMC_CMD_RETRIES); if (err) goto err; + + /* + * Bump to HS timing and frequency. Some cards don't handle + * SEND_STATUS reliably at the initial frequency. + * NB: We can't move to full (HS200) speeds until after we've + * successfully switched over. + */ old_timing = host->ios.timing; + old_clock = host->ios.clock; mmc_set_timing(host, MMC_TIMING_MMC_HS200); + mmc_set_clock(card->host, card->ext_csd.hs_max_dtr); /* * For HS200, CRC errors are not a reliable way to know the @@ -1493,8 +1506,10 @@ static int mmc_select_hs200(struct mmc_card *card) * mmc_select_timing() assumes timing has not changed if * it is a switch error. */ - if (err == -EBADMSG) + if (err == -EBADMSG) { + mmc_set_clock(host, old_clock); mmc_set_timing(host, old_timing); + } } err: if (err) { From aa22125c57f9e577f0a667e4fa07fc3fa8ca1e60 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 23 Apr 2022 14:12:39 +0100 Subject: [PATCH 057/491] ASoC: ops: Validate input values in snd_soc_put_volsw_range() Check that values written via snd_soc_put_volsw_range() are within the range advertised by the control, ensuring that we don't write out of spec values to the hardware. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220423131239.3375261-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 58347eadd219b..e693070f51fe8 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -519,7 +519,15 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; unsigned int val, val_mask; - int err, ret; + int err, ret, tmp; + + tmp = ucontrol->value.integer.value[0]; + if (tmp < 0) + return -EINVAL; + if (mc->platform_max && tmp > mc->platform_max) + return -EINVAL; + if (tmp > mc->max - mc->min + 1) + return -EINVAL; if (invert) val = (max - ucontrol->value.integer.value[0]) & mask; @@ -534,6 +542,14 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, ret = err; if (snd_soc_volsw_is_stereo(mc)) { + tmp = ucontrol->value.integer.value[1]; + if (tmp < 0) + return -EINVAL; + if (mc->platform_max && tmp > mc->platform_max) + return -EINVAL; + if (tmp > mc->max - mc->min + 1) + return -EINVAL; + if (invert) val = (max - ucontrol->value.integer.value[1]) & mask; else From 8b202ee218395319aec1ef44f72043e1fbaccdd6 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 25 Apr 2022 14:17:42 +0200 Subject: [PATCH 058/491] s390: disable -Warray-bounds gcc-12 shows a lot of array bound warnings on s390. This is caused by the S390_lowcore macro which uses a hardcoded address of 0. Wrapping that with absolute_pointer() works, but gcc no longer knows that a 12 bit displacement is sufficient to access lowcore. So it emits instructions like 'lghi %r1,0; l %rx,xxx(%r1)' instead of a single load/store instruction. As s390 stores variables often read/written in lowcore, this is considered problematic. Therefore disable -Warray-bounds on s390 for gcc-12 for the time being, until there is a better solution. Signed-off-by: Sven Schnelle Link: https://lore.kernel.org/r/yt9dzgkelelc.fsf@linux.ibm.com Link: https://lore.kernel.org/r/20220422134308.1613610-1-svens@linux.ibm.com Link: https://lore.kernel.org/r/20220425121742.3222133-1-svens@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index e441b60b1812c..df325eacf62d2 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -30,6 +30,16 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) + +ifdef CONFIG_CC_IS_GCC + ifeq ($(call cc-ifversion, -ge, 1200, y), y) + ifeq ($(call cc-ifversion, -lt, 1300, y), y) + KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds) + KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds) + endif + endif +endif + UTS_MACHINE := s390x STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384) CHECKFLAGS += -D__s390__ -D__s390x__ From 08da09f028043fed9653331ae75bc310411f72e6 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Tue, 26 Apr 2022 20:51:09 -0700 Subject: [PATCH 059/491] hwmon: (pmbus) delta-ahe50dc-fan: work around hardware quirk CLEAR_FAULTS commands can apparently sometimes trigger catastrophic power output glitches on the ahe-50dc, so block them from being sent at all. Signed-off-by: Zev Weiss Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220427035109.3819-1-zev@bewilderbeest.net Fixes: d387d88ed045 ("hwmon: (pmbus) Add Delta AHE-50DC fan control module driver") Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/delta-ahe50dc-fan.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/hwmon/pmbus/delta-ahe50dc-fan.c b/drivers/hwmon/pmbus/delta-ahe50dc-fan.c index 40dffd9c4cbfc..f546f0c12497b 100644 --- a/drivers/hwmon/pmbus/delta-ahe50dc-fan.c +++ b/drivers/hwmon/pmbus/delta-ahe50dc-fan.c @@ -14,6 +14,21 @@ #define AHE50DC_PMBUS_READ_TEMP4 0xd0 +static int ahe50dc_fan_write_byte(struct i2c_client *client, int page, u8 value) +{ + /* + * The CLEAR_FAULTS operation seems to sometimes (unpredictably, perhaps + * 5% of the time or so) trigger a problematic phenomenon in which the + * fan speeds surge momentarily and at least some (perhaps all?) of the + * system's power outputs experience a glitch. + * + * However, according to Delta it should be OK to simply not send any + * CLEAR_FAULTS commands (the device doesn't seem to be capable of + * reporting any faults anyway), so just blackhole them unconditionally. + */ + return value == PMBUS_CLEAR_FAULTS ? -EOPNOTSUPP : -ENODATA; +} + static int ahe50dc_fan_read_word_data(struct i2c_client *client, int page, int phase, int reg) { /* temp1 in (virtual) page 1 is remapped to mfr-specific temp4 */ @@ -68,6 +83,7 @@ static struct pmbus_driver_info ahe50dc_fan_info = { PMBUS_HAVE_VIN | PMBUS_HAVE_FAN12 | PMBUS_HAVE_FAN34 | PMBUS_HAVE_STATUS_FAN12 | PMBUS_HAVE_STATUS_FAN34 | PMBUS_PAGE_VIRTUAL, .func[1] = PMBUS_HAVE_TEMP | PMBUS_PAGE_VIRTUAL, + .write_byte = ahe50dc_fan_write_byte, .read_word_data = ahe50dc_fan_read_word_data, }; From c61711c1c95791850be48dd65a1d72eb34ba719f Mon Sep 17 00:00:00 2001 From: Ajit Kumar Pandey Date: Tue, 26 Apr 2022 13:33:57 -0500 Subject: [PATCH 060/491] ASoC: SOF: Fix NULL pointer exception in sof_pci_probe callback We are accessing "desc->ops" in sof_pci_probe without checking "desc" pointer. This results in NULL pointer exception if pci_id->driver_data i.e desc pointer isn't defined in sof device probe: BUG: kernel NULL pointer dereference, address: 0000000000000060 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:sof_pci_probe+0x1e/0x17f [snd_sof_pci] Code: Unable to access opcode bytes at RIP 0xffffffffc043dff4. RSP: 0018:ffffac4b03b9b8d8 EFLAGS: 00010246 Add NULL pointer check for sof_dev_desc pointer to avoid such exception. Reviewed-by: Ranjani Sridharan Signed-off-by: Ajit Kumar Pandey Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220426183357.102155-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-pci-dev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index 12f5cff224486..7fa2649e56e57 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -153,6 +153,11 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) dev_dbg(&pci->dev, "PCI DSP detected"); + if (!desc) { + dev_err(dev, "error: no matching PCI descriptor\n"); + return -ENODEV; + } + if (!desc->ops) { dev_err(dev, "error: no matching PCI descriptor ops\n"); return -ENODEV; From 3f65b1e2f424f44585bd701024a3bfd0b1e0ade2 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 26 Apr 2022 14:12:14 -0700 Subject: [PATCH 061/491] drm/msm/dp: remove fail safe mode related code Current DP driver implementation has adding safe mode done at dp_hpd_plug_handle() which is expected to be executed under event thread context. However there is possible circular locking happen (see blow stack trace) after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() which is executed under drm_thread context. After review all possibilities methods and as discussed on https://patchwork.freedesktop.org/patch/483155/, supporting EDID compliance tests in the driver is quite hacky. As seen with other vendor drivers, supporting these will be much easier with IGT. Hence removing all the related fail safe code for it so that no possibility of circular lock will happen. Reviewed-by: Stephen Boyd Reviewed-by: Douglas Anderson Reviewed-by: Dmitry Baryshkov ====================================================== WARNING: possible circular locking dependency detected 5.15.35-lockdep #6 Tainted: G W ------------------------------------------------------ frecon/429 is trying to acquire lock: ffffff808dc3c4e8 (&dev->mode_config.mutex){+.+.}-{3:3}, at: dp_panel_add_fail_safe_mode+0x4c/0xa0 but task is already holding lock: ffffff808dc441e0 (&kms->commit_lock[i]){+.+.}-{3:3}, at: lock_crtcs+0xb4/0x124 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&kms->commit_lock[i]){+.+.}-{3:3}: __mutex_lock_common+0x174/0x1a64 mutex_lock_nested+0x98/0xac lock_crtcs+0xb4/0x124 msm_atomic_commit_tail+0x330/0x748 commit_tail+0x19c/0x278 drm_atomic_helper_commit+0x1dc/0x1f0 drm_atomic_commit+0xc0/0xd8 drm_atomic_helper_set_config+0xb4/0x134 drm_mode_setcrtc+0x688/0x1248 drm_ioctl_kernel+0x1e4/0x338 drm_ioctl+0x3a4/0x684 __arm64_sys_ioctl+0x118/0x154 invoke_syscall+0x78/0x224 el0_svc_common+0x178/0x200 do_el0_svc+0x94/0x13c el0_svc+0x5c/0xec el0t_64_sync_handler+0x78/0x108 el0t_64_sync+0x1a4/0x1a8 -> #2 (crtc_ww_class_mutex){+.+.}-{3:3}: __mutex_lock_common+0x174/0x1a64 ww_mutex_lock+0xb8/0x278 modeset_lock+0x304/0x4ac drm_modeset_lock+0x4c/0x7c drmm_mode_config_init+0x4a8/0xc50 msm_drm_init+0x274/0xac0 msm_drm_bind+0x20/0x2c try_to_bring_up_master+0x3dc/0x470 __component_add+0x18c/0x3c0 component_add+0x1c/0x28 dp_display_probe+0x954/0xa98 platform_probe+0x124/0x15c really_probe+0x1b0/0x5f8 __driver_probe_device+0x174/0x20c driver_probe_device+0x70/0x134 __device_attach_driver+0x130/0x1d0 bus_for_each_drv+0xfc/0x14c __device_attach+0x1bc/0x2bc device_initial_probe+0x1c/0x28 bus_probe_device+0x94/0x178 deferred_probe_work_func+0x1a4/0x1f0 process_one_work+0x5d4/0x9dc worker_thread+0x898/0xccc kthread+0x2d4/0x3d4 ret_from_fork+0x10/0x20 -> #1 (crtc_ww_class_acquire){+.+.}-{0:0}: ww_acquire_init+0x1c4/0x2c8 drm_modeset_acquire_init+0x44/0xc8 drm_helper_probe_single_connector_modes+0xb0/0x12dc drm_mode_getconnector+0x5dc/0xfe8 drm_ioctl_kernel+0x1e4/0x338 drm_ioctl+0x3a4/0x684 __arm64_sys_ioctl+0x118/0x154 invoke_syscall+0x78/0x224 el0_svc_common+0x178/0x200 do_el0_svc+0x94/0x13c el0_svc+0x5c/0xec el0t_64_sync_handler+0x78/0x108 el0t_64_sync+0x1a4/0x1a8 -> #0 (&dev->mode_config.mutex){+.+.}-{3:3}: __lock_acquire+0x2650/0x672c lock_acquire+0x1b4/0x4ac __mutex_lock_common+0x174/0x1a64 mutex_lock_nested+0x98/0xac dp_panel_add_fail_safe_mode+0x4c/0xa0 dp_hpd_plug_handle+0x1f0/0x280 dp_bridge_enable+0x94/0x2b8 drm_atomic_bridge_chain_enable+0x11c/0x168 drm_atomic_helper_commit_modeset_enables+0x500/0x740 msm_atomic_commit_tail+0x3e4/0x748 commit_tail+0x19c/0x278 drm_atomic_helper_commit+0x1dc/0x1f0 drm_atomic_commit+0xc0/0xd8 drm_atomic_helper_set_config+0xb4/0x134 drm_mode_setcrtc+0x688/0x1248 drm_ioctl_kernel+0x1e4/0x338 drm_ioctl+0x3a4/0x684 __arm64_sys_ioctl+0x118/0x154 invoke_syscall+0x78/0x224 el0_svc_common+0x178/0x200 do_el0_svc+0x94/0x13c el0_svc+0x5c/0xec el0t_64_sync_handler+0x78/0x108 el0t_64_sync+0x1a4/0x1a8 Changes in v2: -- re text commit title -- remove all fail safe mode Changes in v3: -- remove dp_panel_add_fail_safe_mode() from dp_panel.h -- add Fixes Changes in v5: -- to=dianders@chromium.org Changes in v6: -- fix Fixes commit ID Fixes: 8b2c181e3dcf ("drm/msm/dp: add fail safe mode outside of event_mutex context") Reported-by: Douglas Anderson Signed-off-by: Kuogee Hsieh Link: https://lore.kernel.org/r/1651007534-31842-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 6 ------ drivers/gpu/drm/msm/dp/dp_panel.c | 11 ----------- drivers/gpu/drm/msm/dp/dp_panel.h | 1 - 3 files changed, 18 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index a42732b673490..178b774a5fbd3 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -580,12 +580,6 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) dp->dp_display.connector_type, state); mutex_unlock(&dp->event_mutex); - /* - * add fail safe mode outside event_mutex scope - * to avoid potiential circular lock with drm thread - */ - dp_panel_add_fail_safe_mode(dp->dp_display.connector); - /* uevent will complete connection part */ return 0; }; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 26c3653c99ec9..26f4b6959c31d 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -151,15 +151,6 @@ static int dp_panel_update_modes(struct drm_connector *connector, return rc; } -void dp_panel_add_fail_safe_mode(struct drm_connector *connector) -{ - /* fail safe edid */ - mutex_lock(&connector->dev->mode_config.mutex); - if (drm_add_modes_noedid(connector, 640, 480)) - drm_set_preferred_mode(connector, 640, 480); - mutex_unlock(&connector->dev->mode_config.mutex); -} - int dp_panel_read_sink_caps(struct dp_panel *dp_panel, struct drm_connector *connector) { @@ -215,8 +206,6 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel, rc = -ETIMEDOUT; goto end; } - - dp_panel_add_fail_safe_mode(connector); } if (panel->aux_cfg_update_done) { diff --git a/drivers/gpu/drm/msm/dp/dp_panel.h b/drivers/gpu/drm/msm/dp/dp_panel.h index 99739ea679a77..9023e5bb4b8b2 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.h +++ b/drivers/gpu/drm/msm/dp/dp_panel.h @@ -59,7 +59,6 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel); int dp_panel_deinit(struct dp_panel *dp_panel); int dp_panel_timing_cfg(struct dp_panel *dp_panel); void dp_panel_dump_regs(struct dp_panel *dp_panel); -void dp_panel_add_fail_safe_mode(struct drm_connector *connector); int dp_panel_read_sink_caps(struct dp_panel *dp_panel, struct drm_connector *connector); u32 dp_panel_get_mode_bpp(struct dp_panel *dp_panel, u32 mode_max_bpp, From f9095ac1ba1ce407cecc1df93c05ad4ac504661c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 27 Apr 2022 08:58:02 +0200 Subject: [PATCH 062/491] dt-bindings: ufs: cdns,ufshc: Add power-domains The Cadence UFS controller can be part of power domain (as it is in example DTS of TI J721e UFS Host Controller Glue), so allow such property. Reported-by: Rob Herring Signed-off-by: Krzysztof Kozlowski Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220427065802.110402-1-krzysztof.kozlowski@linaro.org --- Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml b/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml index d227dea368be8..fb45f66d64549 100644 --- a/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml +++ b/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml @@ -43,6 +43,9 @@ properties: - const: phy_clk - const: ref_clk + power-domains: + maxItems: 1 + reg: maxItems: 1 From e17fd4bf54fb579d03566ab22a02dc03b36f4d06 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 26 Apr 2022 08:35:08 -0500 Subject: [PATCH 063/491] dt-bindings: leds-mt6360: Drop redundant 'unevaluatedProperties' The binding has both 'unevaluatedProperties: false' and 'additionalProperties: false' which is redundant. 'additionalProperties' is the stricter of the two, so drop 'unevaluatedProperties'. Fixes: e05cab34e417 ("dt-bindings: leds: Add bindings for MT6360 LED") Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220426133508.1849580-1-robh@kernel.org --- Documentation/devicetree/bindings/leds/leds-mt6360.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/leds/leds-mt6360.yaml b/Documentation/devicetree/bindings/leds/leds-mt6360.yaml index b2fe6eb89389a..10f95bf1d666a 100644 --- a/Documentation/devicetree/bindings/leds/leds-mt6360.yaml +++ b/Documentation/devicetree/bindings/leds/leds-mt6360.yaml @@ -43,8 +43,6 @@ patternProperties: - 4 # LED output FLASH1 - 5 # LED output FLASH2 -unevaluatedProperties: false - required: - compatible - "#address-cells" From e6f9d69648029e48b8f97db09368d419b5e2614a Mon Sep 17 00:00:00 2001 From: Chung-Chiang Cheng Date: Fri, 15 Apr 2022 16:04:05 +0800 Subject: [PATCH 064/491] btrfs: export a helper for compression hard check inode_can_compress will be used outside of inode.c to check the availability of setting compression flag by xattr. This patch moves this function as an internal helper and renames it to btrfs_inode_can_compress. Reviewed-by: Nikolay Borisov Signed-off-by: Chung-Chiang Cheng Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/btrfs_inode.h | 11 +++++++++++ fs/btrfs/inode.c | 15 ++------------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 47e72d72f7d0a..32131a5d321b3 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -384,6 +384,17 @@ static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) return ret; } +/* + * Check if the inode has flags compatible with compression + */ +static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode) +{ + if (inode->flags & BTRFS_INODE_NODATACOW || + inode->flags & BTRFS_INODE_NODATASUM) + return false; + return true; +} + struct btrfs_dio_private { struct inode *inode; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8bac68d8e96f9..673b9259f6c03 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -480,17 +480,6 @@ static noinline int add_async_extent(struct async_chunk *cow, return 0; } -/* - * Check if the inode has flags compatible with compression - */ -static inline bool inode_can_compress(struct btrfs_inode *inode) -{ - if (inode->flags & BTRFS_INODE_NODATACOW || - inode->flags & BTRFS_INODE_NODATASUM) - return false; - return true; -} - /* * Check if the inode needs to be submitted to compression, based on mount * options, defragmentation, properties or heuristics. @@ -500,7 +489,7 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start, { struct btrfs_fs_info *fs_info = inode->root->fs_info; - if (!inode_can_compress(inode)) { + if (!btrfs_inode_can_compress(inode)) { WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), KERN_ERR "BTRFS: unexpected compression for ino %llu\n", btrfs_ino(inode)); @@ -2019,7 +2008,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root)); ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, nr_written); - } else if (!inode_can_compress(inode) || + } else if (!btrfs_inode_can_compress(inode) || !inode_need_compress(inode, start, end)) { if (zoned) ret = run_delalloc_zoned(inode, locked_page, start, end, From 0e852ab8974cd2b5946766b2d9baf82c78ace03d Mon Sep 17 00:00:00 2001 From: Chung-Chiang Cheng Date: Fri, 15 Apr 2022 16:04:06 +0800 Subject: [PATCH 065/491] btrfs: do not allow compression on nodatacow files Compression and nodatacow are mutually exclusive. A similar issue was fixed by commit f37c563bab429 ("btrfs: add missing check for nocow and compression inode flags"). Besides ioctl, there is another way to enable/disable/reset compression directly via xattr. The following steps will result in a invalid combination. $ touch bar $ chattr +C bar $ lsattr bar ---------------C-- bar $ setfattr -n btrfs.compression -v zstd bar $ lsattr bar --------c------C-- bar To align with the logic in check_fsflags, nocompress will also be unacceptable after this patch, to prevent mix any compression-related options with nodatacow. $ touch bar $ chattr +C bar $ lsattr bar ---------------C-- bar $ setfattr -n btrfs.compression -v zstd bar setfattr: bar: Invalid argument $ setfattr -n btrfs.compression -v no bar setfattr: bar: Invalid argument When both compression and nodatacow are enabled, then btrfs_run_delalloc_range prefers nodatacow and no compression happens. Reported-by: Jayce Lin CC: stable@vger.kernel.org # 5.10.x: e6f9d6964802: btrfs: export a helper for compression hard check CC: stable@vger.kernel.org # 5.10.x Reviewed-by: Filipe Manana Signed-off-by: Chung-Chiang Cheng Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 16 +++++++++++----- fs/btrfs/props.h | 3 ++- fs/btrfs/xattr.c | 2 +- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 1a6d2d5b4b333..5a6f87744c282 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -17,7 +17,8 @@ static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); struct prop_handler { struct hlist_node node; const char *xattr_name; - int (*validate)(const char *value, size_t len); + int (*validate)(const struct btrfs_inode *inode, const char *value, + size_t len); int (*apply)(struct inode *inode, const char *value, size_t len); const char *(*extract)(struct inode *inode); int inheritable; @@ -55,7 +56,8 @@ find_prop_handler(const char *name, return NULL; } -int btrfs_validate_prop(const char *name, const char *value, size_t value_len) +int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name, + const char *value, size_t value_len) { const struct prop_handler *handler; @@ -69,7 +71,7 @@ int btrfs_validate_prop(const char *name, const char *value, size_t value_len) if (value_len == 0) return 0; - return handler->validate(value, value_len); + return handler->validate(inode, value, value_len); } int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, @@ -252,8 +254,12 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path) return ret; } -static int prop_compression_validate(const char *value, size_t len) +static int prop_compression_validate(const struct btrfs_inode *inode, + const char *value, size_t len) { + if (!btrfs_inode_can_compress(inode)) + return -EINVAL; + if (!value) return 0; @@ -364,7 +370,7 @@ static int inherit_props(struct btrfs_trans_handle *trans, * This is not strictly necessary as the property should be * valid, but in case it isn't, don't propagate it further. */ - ret = h->validate(value, strlen(value)); + ret = h->validate(BTRFS_I(inode), value, strlen(value)); if (ret) continue; diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h index 40b2c65b518c6..2b2ac15ab7882 100644 --- a/fs/btrfs/props.h +++ b/fs/btrfs/props.h @@ -13,7 +13,8 @@ void __init btrfs_props_init(void); int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const char *value, size_t value_len, int flags); -int btrfs_validate_prop(const char *name, const char *value, size_t value_len); +int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name, + const char *value, size_t value_len); int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 99abf41b89b92..9632d0ff2038a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -403,7 +403,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, struct btrfs_root *root = BTRFS_I(inode)->root; name = xattr_full_name(handler, name); - ret = btrfs_validate_prop(name, value, size); + ret = btrfs_validate_prop(BTRFS_I(inode), name, value, size); if (ret) return ret; From d0e64a981fd841cb0f28fcd6afcac55e6f1e6994 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 21 Apr 2022 10:56:39 +0100 Subject: [PATCH 066/491] btrfs: always log symlinks in full mode On Linux, empty symlinks are invalid, and attempting to create one with the system call symlink(2) results in an -ENOENT error and this is explicitly documented in the man page. If we rename a symlink that was created in the current transaction and its parent directory was logged before, we actually end up logging the symlink without logging its content, which is stored in an inline extent. That means that after a power failure we can end up with an empty symlink, having no content and an i_size of 0 bytes. It can be easily reproduced like this: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ mkdir /mnt/testdir $ sync # Create a file inside the directory and fsync the directory. $ touch /mnt/testdir/foo $ xfs_io -c "fsync" /mnt/testdir # Create a symlink inside the directory and then rename the symlink. $ ln -s /mnt/testdir/foo /mnt/testdir/bar $ mv /mnt/testdir/bar /mnt/testdir/baz # Now fsync again the directory, this persist the log tree. $ xfs_io -c "fsync" /mnt/testdir $ mount /dev/sdc /mnt $ stat -c %s /mnt/testdir/baz 0 $ readlink /mnt/testdir/baz $ Fix this by always logging symlinks in full mode (LOG_INODE_ALL), so that their content is also logged. A test case for fstests will follow. CC: stable@vger.kernel.org # 4.9+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 09e4f1a04e6fb..11399c8eed87d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5804,6 +5804,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, mutex_lock(&inode->log_mutex); } + /* + * For symlinks, we must always log their content, which is stored in an + * inline extent, otherwise we could end up with an empty symlink after + * log replay, which is invalid on linux (symlink(2) returns -ENOENT if + * one attempts to create an empty symlink). + * We don't need to worry about flushing delalloc, because when we create + * the inline extent when the symlink is created (we never have delalloc + * for symlinks). + */ + if (S_ISLNK(inode->vfs_inode.i_mode)) + inode_only = LOG_INODE_ALL; + /* * Before logging the inode item, cache the value returned by * inode_logged(), because after that we have the need to figure out if @@ -6182,7 +6194,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, } ctx->log_new_dentries = false; - if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK) + if (type == BTRFS_FT_DIR) log_mode = LOG_INODE_ALL; ret = btrfs_log_inode(trans, BTRFS_I(di_inode), log_mode, ctx); From 193b4e83986d7ee6caa8ceefb5ee9f58240fbee0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 21 Apr 2022 11:03:09 +0100 Subject: [PATCH 067/491] btrfs: do not BUG_ON() on failure to update inode when setting xattr We are doing a BUG_ON() if we fail to update an inode after setting (or clearing) a xattr, but there's really no reason to not instead simply abort the transaction and return the error to the caller. This should be a rare error because we have previously reserved enough metadata space to update the inode and the delayed inode should have already been setup, so an -ENOSPC or -ENOMEM, which are the possible errors, are very unlikely to happen. So replace the BUG_ON()s with a transaction abort. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Reviewed-by: Anand Jain Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 9632d0ff2038a..367bb87b66df3 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -262,7 +262,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, inode_inc_iversion(inode); inode->i_ctime = current_time(inode); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); - BUG_ON(ret); + if (ret) + btrfs_abort_transaction(trans, ret); out: if (start_trans) btrfs_end_transaction(trans); @@ -416,7 +417,8 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, inode_inc_iversion(inode); inode->i_ctime = current_time(inode); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); - BUG_ON(ret); + if (ret) + btrfs_abort_transaction(trans, ret); } btrfs_end_transaction(trans); From 4b73c55fdebd8939f0f6000921075f7f6fa41397 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 21 Apr 2022 11:01:22 +0100 Subject: [PATCH 068/491] btrfs: skip compression property for anything other than files and dirs The compression property only has effect on regular files and directories (so that it's propagated to files and subdirectories created inside a directory). For any other inode type (symlink, fifo, device, socket), it's pointless to set the compression property because it does nothing and ends up unnecessarily wasting leaf space due to the pointless xattr (75 or 76 bytes, depending on the compression value). Symlinks in particular are very common (for example, I have almost 10k symlinks under /etc, /usr and /var alone) and therefore it's worth to avoid wasting leaf space with the compression xattr. For example, the compression property can end up on a symlink or character device implicitly, through inheritance from a parent directory $ mkdir /mnt/testdir $ btrfs property set /mnt/testdir compression lzo $ ln -s yadayada /mnt/testdir/lnk $ mknod /mnt/testdir/dev c 0 0 Or explicitly like this: $ ln -s yadayda /mnt/lnk $ setfattr -h -n btrfs.compression -v lzo /mnt/lnk So skip the compression property on inodes that are neither a regular file nor a directory. CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 43 +++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/props.h | 1 + fs/btrfs/xattr.c | 3 +++ 3 files changed, 47 insertions(+) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 5a6f87744c282..1b31481f9e72c 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -21,6 +21,7 @@ struct prop_handler { size_t len); int (*apply)(struct inode *inode, const char *value, size_t len); const char *(*extract)(struct inode *inode); + bool (*ignore)(const struct btrfs_inode *inode); int inheritable; }; @@ -74,6 +75,28 @@ int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name, return handler->validate(inode, value, value_len); } +/* + * Check if a property should be ignored (not set) for an inode. + * + * @inode: The target inode. + * @name: The property's name. + * + * The caller must be sure the given property name is valid, for example by + * having previously called btrfs_validate_prop(). + * + * Returns: true if the property should be ignored for the given inode + * false if the property must not be ignored for the given inode + */ +bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name) +{ + const struct prop_handler *handler; + + handler = find_prop_handler(name, NULL); + ASSERT(handler != NULL); + + return handler->ignore(inode); +} + int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const char *value, size_t value_len, int flags) @@ -316,6 +339,22 @@ static int prop_compression_apply(struct inode *inode, const char *value, return 0; } +static bool prop_compression_ignore(const struct btrfs_inode *inode) +{ + /* + * Compression only has effect for regular files, and for directories + * we set it just to propagate it to new files created inside them. + * Everything else (symlinks, devices, sockets, fifos) is pointless as + * it will do nothing, so don't waste metadata space on a compression + * xattr for anything that is neither a file nor a directory. + */ + if (!S_ISREG(inode->vfs_inode.i_mode) && + !S_ISDIR(inode->vfs_inode.i_mode)) + return true; + + return false; +} + static const char *prop_compression_extract(struct inode *inode) { switch (BTRFS_I(inode)->prop_compress) { @@ -336,6 +375,7 @@ static struct prop_handler prop_handlers[] = { .validate = prop_compression_validate, .apply = prop_compression_apply, .extract = prop_compression_extract, + .ignore = prop_compression_ignore, .inheritable = 1 }, }; @@ -362,6 +402,9 @@ static int inherit_props(struct btrfs_trans_handle *trans, if (!h->inheritable) continue; + if (h->ignore(BTRFS_I(inode))) + continue; + value = h->extract(parent); if (!value) continue; diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h index 2b2ac15ab7882..59bea741cfcf4 100644 --- a/fs/btrfs/props.h +++ b/fs/btrfs/props.h @@ -15,6 +15,7 @@ int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, int flags); int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name, const char *value, size_t value_len); +bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name); int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 367bb87b66df3..85691dc2232fa 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -408,6 +408,9 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, if (ret) return ret; + if (btrfs_ignore_prop(BTRFS_I(inode), name)) + return 0; + trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) return PTR_ERR(trans); From 59bf3557cf2f8a469a554aea1e3d2c8e72a579f7 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Sun, 10 Apr 2022 09:35:33 +0800 Subject: [PATCH 069/491] iommu/vt-d: Calculate mask for non-aligned flushes Calculate the appropriate mask for non-size-aligned page selective invalidation. Since psi uses the mask value to mask out the lower order bits of the target address, properly flushing the iotlb requires using a mask value such that [pfn, pfn+pages) all lie within the flushed size-aligned region. This is not normally an issue because iova.c always allocates iovas that are aligned to their size. However, iovas which come from other sources (e.g. userspace via VFIO) may not be aligned. To properly flush the IOTLB, both the start and end pfns need to be equal after applying the mask. That means that the most efficient mask to use is the index of the lowest bit that is equal where all higher bits are also equal. For example, if pfn=0x17f and pages=3, then end_pfn=0x181, so the smallest mask we can use is 8. Any differences above the highest bit of pages are due to carrying, so by xnor'ing pfn and end_pfn and then masking out the lower order bits based on pages, we get 0xffffff00, where the first set bit is the mask we want to use. Fixes: 6fe1010d6d9c ("vfio/type1: DMA unmap chunking") Cc: stable@vger.kernel.org Signed-off-by: David Stevens Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220401022430.1262215-1-stevensd@google.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220410013533.3959168-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index df5c62ecf942b..0ea47e17b379e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1588,7 +1588,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, unsigned long pfn, unsigned int pages, int ih, int map) { - unsigned int mask = ilog2(__roundup_pow_of_two(pages)); + unsigned int aligned_pages = __roundup_pow_of_two(pages); + unsigned int mask = ilog2(aligned_pages); uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; u16 did = domain->iommu_did[iommu->seq_id]; @@ -1600,10 +1601,30 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (domain_use_first_level(domain)) { qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); } else { + unsigned long bitmask = aligned_pages - 1; + + /* + * PSI masks the low order bits of the base address. If the + * address isn't aligned to the mask, then compute a mask value + * needed to ensure the target range is flushed. + */ + if (unlikely(bitmask & pfn)) { + unsigned long end_pfn = pfn + pages - 1, shared_bits; + + /* + * Since end_pfn <= pfn + bitmask, the only way bits + * higher than bitmask can differ in pfn and end_pfn is + * by carrying. This means after masking out bitmask, + * high bits starting with the first set bit in + * shared_bits are all equal in both pfn and end_pfn. + */ + shared_bits = ~(pfn ^ end_pfn) & ~bitmask; + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + } + /* * Fallback to domain selective flush if no PSI support or - * the size is too big. PSI requires page size to be 2 ^ x, - * and the base address is naturally aligned to the size. + * the size is too big. */ if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) From da8669ff41fa31573375c9a4180f5c080677204b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 23 Apr 2022 16:23:30 +0800 Subject: [PATCH 070/491] iommu/vt-d: Drop stop marker messages The page fault handling framework in the IOMMU core explicitly states that it doesn't handle PCI PASID Stop Marker and the IOMMU drivers must discard them before reporting faults. This handles Stop Marker messages in prq_event_thread() before reporting events to the core. The VT-d driver explicitly drains the pending page requests when a CPU page table (represented by a mm struct) is unbound from a PASID according to the procedures defined in the VT-d spec. The Stop Marker messages do not need a response. Hence, it is safe to drop the Stop Marker messages silently if any of them is found in the page request queue. Fixes: d5b9e4bfe0d88 ("iommu/vt-d: Report prq to io-pgfault framework") Signed-off-by: Lu Baolu Reviewed-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220421113558.3504874-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220423082330.3897867-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 23a38763c1d1f..7ee37d996e157 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -757,6 +757,10 @@ static irqreturn_t prq_event_thread(int irq, void *d) goto bad_req; } + /* Drop Stop Marker message. No need for a response. */ + if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) + goto prq_advance; + if (!svm || svm->pasid != req->pasid) { /* * It can't go away, because the driver is not permitted From a15932f4377062364d22096afe25bc579134a1c3 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 25 Apr 2022 17:08:26 +0800 Subject: [PATCH 071/491] iommu/dart: check return value after calling platform_get_resource() It will cause null-ptr-deref in resource_size(), if platform_get_resource() returns NULL, move calling resource_size() after devm_ioremap_resource() that will check 'res' to avoid null-ptr-deref. And use devm_platform_get_and_ioremap_resource() to simplify code. Fixes: 46d1fb072e76 ("iommu/dart: Add DART iommu driver") Signed-off-by: Yang Yingliang Reviewed-by: Sven Peter Link: https://lore.kernel.org/r/20220425090826.2532165-1-yangyingliang@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index decafb07ad083..15b77f16cfa31 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -859,16 +859,15 @@ static int apple_dart_probe(struct platform_device *pdev) dart->dev = dev; spin_lock_init(&dart->lock); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dart->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(dart->regs)) + return PTR_ERR(dart->regs); + if (resource_size(res) < 0x4000) { dev_err(dev, "MMIO region too small (%pr)\n", res); return -EINVAL; } - dart->regs = devm_ioremap_resource(dev, res); - if (IS_ERR(dart->regs)) - return PTR_ERR(dart->regs); - dart->irq = platform_get_irq(pdev, 0); if (dart->irq < 0) return -ENODEV; From aad41a7d7cf6c6fa804c872a2480f8e541da37cf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 28 Apr 2022 11:08:13 -0400 Subject: [PATCH 072/491] SUNRPC: Don't leak sockets in xs_local_connect() If there is still a closed socket associated with the transport, then we need to trigger an autoclose before we can set up a new connection. Reported-by: wanghai (M) Fixes: f00432063db1 ("SUNRPC: Ensure we flush any closed sockets before xs_xprt_free()") Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ab64ea46870a..f9849b297ea39 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1950,6 +1950,9 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); int ret; + if (transport->file) + goto force_disconnect; + if (RPC_IS_ASYNC(task)) { /* * We want the AF_LOCAL connect to be resolved in the @@ -1962,11 +1965,17 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) */ task->tk_rpc_status = -ENOTCONN; rpc_exit(task, -ENOTCONN); - return; + goto out_wake; } ret = xs_local_setup_socket(transport); if (ret && !RPC_IS_SOFTCONN(task)) msleep_interruptible(15000); + return; +force_disconnect: + xprt_force_disconnect(xprt); +out_wake: + xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, -ENOTCONN); } #if IS_ENABLED(CONFIG_SUNRPC_SWAP) From 2c33d775ef4c25c0e1e1cc0fd5496d02f76bfa20 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Thu, 28 Apr 2022 08:24:32 +0200 Subject: [PATCH 073/491] timekeeping: Mark NMI safe time accessors as notrace Mark the CLOCK_MONOTONIC fast time accessors as notrace. These functions are used in tracing to retrieve timestamps, so they should not recurse. Fixes: 4498e7467e9e ("time: Parametrize all tk_fast_mono users") Fixes: f09cb9a1808e ("time: Introduce tk_fast_raw") Reported-by: Steven Rostedt Signed-off-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220426175338.3807ca4f@gandalf.local.home/ Link: https://lore.kernel.org/r/20220428062432.61063-1-kurt@linutronix.de --- kernel/time/timekeeping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index dcdcb85121e40..3b1398fbddaf8 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -482,7 +482,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) * of the following timestamps. Callers need to be aware of that and * deal with it. */ -u64 ktime_get_mono_fast_ns(void) +u64 notrace ktime_get_mono_fast_ns(void) { return __ktime_get_fast_ns(&tk_fast_mono); } @@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); * Contrary to ktime_get_mono_fast_ns() this is always correct because the * conversion factor is not affected by NTP/PTP correction. */ -u64 ktime_get_raw_fast_ns(void) +u64 notrace ktime_get_raw_fast_ns(void) { return __ktime_get_fast_ns(&tk_fast_raw); } From bb300130e47fcefbe938f06dbacaef0312e28416 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Wed, 27 Apr 2022 14:16:19 +0300 Subject: [PATCH 074/491] ath11k: reduce the wait time of 11d scan and hw scan while add interface (cherry picked from commit 1f682dc9fb3790aa7ec27d3d122ff32b1eda1365 in wireless-next) Currently ath11k will wait 11d scan complete while add interface in ath11k_mac_op_add_interface(), when system resume without enable wowlan, ath11k_mac_op_add_interface() is called for each resume, thus it increase the resume time of system. And ath11k_mac_op_hw_scan() after ath11k_mac_op_add_interface() also needs some time cost because the previous 11d scan need more than 5 seconds when 6 GHz is enabled, then the scan started event will indicated to ath11k after the 11d scan completed. While 11d scan/hw scan is running in firmware, if ath11k update channel list to firmware by WMI_SCAN_CHAN_LIST_CMDID, then firmware will cancel the current scan which is running, it lead the scan failed. The patch commit 9dcf6808b253 ("ath11k: add 11d scan offload support") used finish_11d_scan/finish_11d_ch_list/pending_11d to synchronize the 11d scan/hw scan/channel list between ath11k/firmware/mac80211 and to avoid the scan fail. Add wait operation before ath11k update channel list, function ath11k_reg_update_chan_list() will wait until the current 11d scan/hw scan completed. And remove the wait operation of start 11d scan and waiting channel list complete in hw scan. After these changes, resume time cost reduce about 5 seconds and also hw scan time cost reduced obviously, and scan failed not seen. The 11d scan is sent to firmware only one time for each interface added in mac.c, and it is moved after the 1st hw scan because 11d scan will cost some time and thus leads the AP scan result update to UI delay. Currently priority of ath11k's hw scan is WMI_SCAN_PRIORITY_LOW, and priority of 11d scan in firmware is WMI_SCAN_PRIORITY_MEDIUM, then the 11d scan which sent after hw scan will cancel the hw scan in firmware, so change the priority to WMI_SCAN_PRIORITY_MEDIUM for the hw scan which is in front of the 11d scan, thus it will not happen scan cancel in firmware. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3 Fixes: 9dcf6808b253 ("ath11k: add 11d scan offload support") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215777 Cc: Signed-off-by: Wen Gong Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220328035832.14122-1-quic_wgong@quicinc.com Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220427111619.9758-1-kvalo@kernel.org --- drivers/net/wireless/ath/ath11k/core.c | 1 + drivers/net/wireless/ath/ath11k/core.h | 13 +++-- drivers/net/wireless/ath/ath11k/mac.c | 71 +++++++++++--------------- drivers/net/wireless/ath/ath11k/mac.h | 2 +- drivers/net/wireless/ath/ath11k/reg.c | 43 ++++++++++------ drivers/net/wireless/ath/ath11k/reg.h | 2 +- drivers/net/wireless/ath/ath11k/wmi.c | 16 +++++- 7 files changed, 84 insertions(+), 64 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 71eb7d04c3bf2..90a5df1fbdbd2 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -1288,6 +1288,7 @@ static void ath11k_core_restart(struct work_struct *work) ieee80211_stop_queues(ar->hw); ath11k_mac_drain_tx(ar); + complete(&ar->completed_11d_scan); complete(&ar->scan.started); complete(&ar->scan.completed); complete(&ar->peer_assoc_done); diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index c0228e91a596b..b8634eddf49aa 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -38,6 +38,8 @@ extern unsigned int ath11k_frame_mode; +#define ATH11K_SCAN_TIMEOUT_HZ (20 * HZ) + #define ATH11K_MON_TIMER_INTERVAL 10 enum ath11k_supported_bw { @@ -189,6 +191,12 @@ enum ath11k_scan_state { ATH11K_SCAN_ABORTING, }; +enum ath11k_11d_state { + ATH11K_11D_IDLE, + ATH11K_11D_PREPARING, + ATH11K_11D_RUNNING, +}; + enum ath11k_dev_flags { ATH11K_CAC_RUNNING, ATH11K_FLAG_CORE_REGISTERED, @@ -607,9 +615,8 @@ struct ath11k { bool dfs_block_radar_events; struct ath11k_thermal thermal; u32 vdev_id_11d_scan; - struct completion finish_11d_scan; - struct completion finish_11d_ch_list; - bool pending_11d; + struct completion completed_11d_scan; + enum ath11k_11d_state state_11d; bool regdom_set_by_user; int hw_rate_code; u8 twt_enabled; diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index e6b34b0d61bd3..58ff761393db1 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3601,26 +3601,6 @@ static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw, if (ret) goto exit; - /* Currently the pending_11d=true only happened 1 time while - * wlan interface up in ath11k_mac_11d_scan_start(), it is called by - * ath11k_mac_op_add_interface(), after wlan interface up, - * pending_11d=false always. - * If remove below wait, it always happened scan fail and lead connect - * fail while wlan interface up, because it has a 11d scan which is running - * in firmware, and lead this scan failed. - */ - if (ar->pending_11d) { - long time_left; - unsigned long timeout = 5 * HZ; - - if (ar->supports_6ghz) - timeout += 5 * HZ; - - time_left = wait_for_completion_timeout(&ar->finish_11d_ch_list, timeout); - ath11k_dbg(ar->ab, ATH11K_DBG_MAC, - "mac wait 11d channel list time left %ld\n", time_left); - } - memset(&arg, 0, sizeof(arg)); ath11k_wmi_start_scan_init(ar, &arg); arg.vdev_id = arvif->vdev_id; @@ -3686,6 +3666,10 @@ static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw, kfree(arg.extraie.ptr); mutex_unlock(&ar->conf_mutex); + + if (ar->state_11d == ATH11K_11D_PREPARING) + ath11k_mac_11d_scan_start(ar, arvif->vdev_id); + return ret; } @@ -5814,7 +5798,7 @@ static int ath11k_mac_op_start(struct ieee80211_hw *hw) /* TODO: Do we need to enable ANI? */ - ath11k_reg_update_chan_list(ar); + ath11k_reg_update_chan_list(ar, false); ar->num_started_vdevs = 0; ar->num_created_vdevs = 0; @@ -5881,6 +5865,11 @@ static void ath11k_mac_op_stop(struct ieee80211_hw *hw) cancel_work_sync(&ar->ab->update_11d_work); cancel_work_sync(&ar->ab->rfkill_work); + if (ar->state_11d == ATH11K_11D_PREPARING) { + ar->state_11d = ATH11K_11D_IDLE; + complete(&ar->completed_11d_scan); + } + spin_lock_bh(&ar->data_lock); list_for_each_entry_safe(ppdu_stats, tmp, &ar->ppdu_stats_info, list) { list_del(&ppdu_stats->list); @@ -6051,7 +6040,7 @@ static bool ath11k_mac_vif_ap_active_any(struct ath11k_base *ab) return false; } -void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait) +void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id) { struct wmi_11d_scan_start_params param; int ret; @@ -6079,28 +6068,22 @@ void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait) ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "mac start 11d scan\n"); - if (wait) - reinit_completion(&ar->finish_11d_scan); - ret = ath11k_wmi_send_11d_scan_start_cmd(ar, ¶m); if (ret) { ath11k_warn(ar->ab, "failed to start 11d scan vdev %d ret: %d\n", vdev_id, ret); } else { ar->vdev_id_11d_scan = vdev_id; - if (wait) { - ar->pending_11d = true; - ret = wait_for_completion_timeout(&ar->finish_11d_scan, - 5 * HZ); - ath11k_dbg(ar->ab, ATH11K_DBG_MAC, - "mac 11d scan left time %d\n", ret); - - if (!ret) - ar->pending_11d = false; - } + if (ar->state_11d == ATH11K_11D_PREPARING) + ar->state_11d = ATH11K_11D_RUNNING; } fin: + if (ar->state_11d == ATH11K_11D_PREPARING) { + ar->state_11d = ATH11K_11D_IDLE; + complete(&ar->completed_11d_scan); + } + mutex_unlock(&ar->ab->vdev_id_11d_lock); } @@ -6123,12 +6106,15 @@ void ath11k_mac_11d_scan_stop(struct ath11k *ar) vdev_id = ar->vdev_id_11d_scan; ret = ath11k_wmi_send_11d_scan_stop_cmd(ar, vdev_id); - if (ret) + if (ret) { ath11k_warn(ar->ab, "failed to stopt 11d scan vdev %d ret: %d\n", vdev_id, ret); - else + } else { ar->vdev_id_11d_scan = ATH11K_11D_INVALID_VDEV_ID; + ar->state_11d = ATH11K_11D_IDLE; + complete(&ar->completed_11d_scan); + } } mutex_unlock(&ar->ab->vdev_id_11d_lock); } @@ -6324,8 +6310,10 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw, goto err_peer_del; } - ath11k_mac_11d_scan_start(ar, arvif->vdev_id, true); - + if (test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ab->wmi_ab.svc_map)) { + reinit_completion(&ar->completed_11d_scan); + ar->state_11d = ATH11K_11D_PREPARING; + } break; case WMI_VDEV_TYPE_MONITOR: set_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags); @@ -7190,7 +7178,7 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, } if (arvif->vdev_type == WMI_VDEV_TYPE_STA) - ath11k_mac_11d_scan_start(ar, arvif->vdev_id, false); + ath11k_mac_11d_scan_start(ar, arvif->vdev_id); mutex_unlock(&ar->conf_mutex); } @@ -8671,8 +8659,7 @@ int ath11k_mac_allocate(struct ath11k_base *ab) ar->monitor_vdev_id = -1; clear_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags); ar->vdev_id_11d_scan = ATH11K_11D_INVALID_VDEV_ID; - init_completion(&ar->finish_11d_scan); - init_completion(&ar->finish_11d_ch_list); + init_completion(&ar->completed_11d_scan); } return 0; diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h index 0e6c870b09c88..29b523af66dd2 100644 --- a/drivers/net/wireless/ath/ath11k/mac.h +++ b/drivers/net/wireless/ath/ath11k/mac.h @@ -130,7 +130,7 @@ extern const struct htt_rx_ring_tlv_filter ath11k_mac_mon_status_filter_default; #define ATH11K_SCAN_11D_INTERVAL 600000 #define ATH11K_11D_INVALID_VDEV_ID 0xFFFF -void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait); +void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id); void ath11k_mac_11d_scan_stop(struct ath11k *ar); void ath11k_mac_11d_scan_stop_all(struct ath11k_base *ab); diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c index 81e11cde31d7b..80a6977713932 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -102,7 +102,7 @@ ath11k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) ar->regdom_set_by_user = true; } -int ath11k_reg_update_chan_list(struct ath11k *ar) +int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait) { struct ieee80211_supported_band **bands; struct scan_chan_list_params *params; @@ -111,7 +111,32 @@ int ath11k_reg_update_chan_list(struct ath11k *ar) struct channel_param *ch; enum nl80211_band band; int num_channels = 0; - int i, ret; + int i, ret, left; + + if (wait && ar->state_11d != ATH11K_11D_IDLE) { + left = wait_for_completion_timeout(&ar->completed_11d_scan, + ATH11K_SCAN_TIMEOUT_HZ); + if (!left) { + ath11k_dbg(ar->ab, ATH11K_DBG_REG, + "failed to receive 11d scan complete: timed out\n"); + ar->state_11d = ATH11K_11D_IDLE; + } + ath11k_dbg(ar->ab, ATH11K_DBG_REG, + "reg 11d scan wait left time %d\n", left); + } + + if (wait && + (ar->scan.state == ATH11K_SCAN_STARTING || + ar->scan.state == ATH11K_SCAN_RUNNING)) { + left = wait_for_completion_timeout(&ar->scan.completed, + ATH11K_SCAN_TIMEOUT_HZ); + if (!left) + ath11k_dbg(ar->ab, ATH11K_DBG_REG, + "failed to receive hw scan complete: timed out\n"); + + ath11k_dbg(ar->ab, ATH11K_DBG_REG, + "reg hw scan wait left time %d\n", left); + } bands = hw->wiphy->bands; for (band = 0; band < NUM_NL80211_BANDS; band++) { @@ -193,11 +218,6 @@ int ath11k_reg_update_chan_list(struct ath11k *ar) ret = ath11k_wmi_send_scan_chan_list_cmd(ar, params); kfree(params); - if (ar->pending_11d) { - complete(&ar->finish_11d_ch_list); - ar->pending_11d = false; - } - return ret; } @@ -263,15 +283,8 @@ int ath11k_regd_update(struct ath11k *ar) goto err; } - if (ar->pending_11d) - complete(&ar->finish_11d_scan); - rtnl_lock(); wiphy_lock(ar->hw->wiphy); - - if (ar->pending_11d) - reinit_completion(&ar->finish_11d_ch_list); - ret = regulatory_set_wiphy_regd_sync(ar->hw->wiphy, regd_copy); wiphy_unlock(ar->hw->wiphy); rtnl_unlock(); @@ -282,7 +295,7 @@ int ath11k_regd_update(struct ath11k *ar) goto err; if (ar->state == ATH11K_STATE_ON) { - ret = ath11k_reg_update_chan_list(ar); + ret = ath11k_reg_update_chan_list(ar, true); if (ret) goto err; } diff --git a/drivers/net/wireless/ath/ath11k/reg.h b/drivers/net/wireless/ath/ath11k/reg.h index 5fb9dc03a74e8..2f284f26378d1 100644 --- a/drivers/net/wireless/ath/ath11k/reg.h +++ b/drivers/net/wireless/ath/ath11k/reg.h @@ -32,5 +32,5 @@ struct ieee80211_regdomain * ath11k_reg_build_regd(struct ath11k_base *ab, struct cur_regulatory_info *reg_info, bool intersect); int ath11k_regd_update(struct ath11k *ar); -int ath11k_reg_update_chan_list(struct ath11k *ar); +int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait); #endif diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index b4f86c45d81f8..2751fe8814df7 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -2015,7 +2015,10 @@ void ath11k_wmi_start_scan_init(struct ath11k *ar, { /* setup commonly used values */ arg->scan_req_id = 1; - arg->scan_priority = WMI_SCAN_PRIORITY_LOW; + if (ar->state_11d == ATH11K_11D_PREPARING) + arg->scan_priority = WMI_SCAN_PRIORITY_MEDIUM; + else + arg->scan_priority = WMI_SCAN_PRIORITY_LOW; arg->dwell_time_active = 50; arg->dwell_time_active_2g = 0; arg->dwell_time_passive = 150; @@ -6350,8 +6353,10 @@ static void ath11k_wmi_op_ep_tx_credits(struct ath11k_base *ab) static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *skb) { const struct wmi_11d_new_cc_ev *ev; + struct ath11k *ar; + struct ath11k_pdev *pdev; const void **tb; - int ret; + int ret, i; tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC); if (IS_ERR(tb)) { @@ -6377,6 +6382,13 @@ static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *s kfree(tb); + for (i = 0; i < ab->num_radios; i++) { + pdev = &ab->pdevs[i]; + ar = pdev->ar; + ar->state_11d = ATH11K_11D_IDLE; + complete(&ar->completed_11d_scan); + } + queue_work(ab->workqueue, &ab->update_11d_work); return 0; From b4e61fc031b11dd807dffc46cebbf0e25966d3d1 Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Thu, 28 Apr 2022 23:14:43 -0700 Subject: [PATCH 075/491] Revert "mm/memory-failure.c: skip huge_zero_page in memory_failure()" Patch series "mm/memory-failure: rework fix on huge_zero_page splitting". This patch (of 2): This reverts commit d173d5417fb67411e623d394aab986d847e47dad. The commit d173d5417fb6 ("mm/memory-failure.c: skip huge_zero_page in memory_failure()") explicitly skips huge_zero_page in memory_failure(), in order to avoid triggering VM_BUG_ON_PAGE on huge_zero_page in split_huge_page_to_list(). This works, but Yang Shi thinks that, Raising BUG is overkilling for splitting huge_zero_page. The huge_zero_page can't be met from normal paths other than memory failure, but memory failure is a valid caller. So I tend to replace the BUG to WARN + returning -EBUSY. If we don't care about the reason code in memory failure, we don't have to touch memory failure. And for the issue that huge_zero_page will be set PG_has_hwpoisoned, Yang Shi comments that, The anonymous page fault doesn't check if the page is poisoned or not since it typically gets a fresh allocated page and assumes the poisoned page (isolated successfully) can't be reallocated again. But huge zero page and base zero page are reused every time. So no matter what fix we pick, the issue is always there. Finally, Yang, David, Anshuman and Naoya all agree to fix the bug, i.e., to split huge_zero_page, in split_huge_page_to_list(). This reverts the commit d173d5417fb6 ("mm/memory-failure.c: skip huge_zero_page in memory_failure()"), and the original bug will be fixed by the next patch. Link: https://lkml.kernel.org/r/872cefb182ba1dd686b0e7db1e6b2ebe5a4fff87.1651039624.git.xuyu@linux.alibaba.com Fixes: d173d5417fb6 ("mm/memory-failure.c: skip huge_zero_page in memory_failure()") Fixes: 6a46079cf57a ("HWPOISON: The high level memory error handler in the VM v7") Signed-off-by: Xu Yu Suggested-by: Yang Shi Reviewed-by: Yang Shi Reviewed-by: Miaohe Lin Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 27760c19bad75..2020944398c93 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1860,19 +1860,6 @@ int memory_failure(unsigned long pfn, int flags) } if (PageTransHuge(hpage)) { - /* - * Bail out before SetPageHasHWPoisoned() if hpage is - * huge_zero_page, although PG_has_hwpoisoned is not - * checked in set_huge_zero_page(). - * - * TODO: Handle memory failure of huge_zero_page thoroughly. - */ - if (is_huge_zero_page(hpage)) { - action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); - res = -EBUSY; - goto unlock_mutex; - } - /* * The flag must be set after the refcount is bumped * otherwise it may race with THP split. From 478d134e9506c7e9bfe2830ed03dd85e97966313 Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Thu, 28 Apr 2022 23:14:43 -0700 Subject: [PATCH 076/491] mm/huge_memory: do not overkill when splitting huge_zero_page Kernel panic when injecting memory_failure for the global huge_zero_page, when CONFIG_DEBUG_VM is enabled, as follows. Injecting memory failure for pfn 0x109ff9 at process virtual address 0x20ff9000 page:00000000fb053fc3 refcount:2 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x109e00 head:00000000fb053fc3 order:9 compound_mapcount:0 compound_pincount:0 flags: 0x17fffc000010001(locked|head|node=0|zone=2|lastcpupid=0x1ffff) raw: 017fffc000010001 0000000000000000 dead000000000122 0000000000000000 raw: 0000000000000000 0000000000000000 00000002ffffffff 0000000000000000 page dumped because: VM_BUG_ON_PAGE(is_huge_zero_page(head)) ------------[ cut here ]------------ kernel BUG at mm/huge_memory.c:2499! invalid opcode: 0000 [#1] PREEMPT SMP PTI CPU: 6 PID: 553 Comm: split_bug Not tainted 5.18.0-rc1+ #11 Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 3288b3c 04/01/2014 RIP: 0010:split_huge_page_to_list+0x66a/0x880 Code: 84 9b fb ff ff 48 8b 7c 24 08 31 f6 e8 9f 5d 2a 00 b8 b8 02 00 00 e9 e8 fb ff ff 48 c7 c6 e8 47 3c 82 4c b RSP: 0018:ffffc90000dcbdf8 EFLAGS: 00010246 RAX: 000000000000003c RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff823e4c4f RDI: 00000000ffffffff RBP: ffff88843fffdb40 R08: 0000000000000000 R09: 00000000fffeffff R10: ffffc90000dcbc48 R11: ffffffff82d68448 R12: ffffea0004278000 R13: ffffffff823c6203 R14: 0000000000109ff9 R15: ffffea000427fe40 FS: 00007fc375a26740(0000) GS:ffff88842fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc3757c9290 CR3: 0000000102174006 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: try_to_split_thp_page+0x3a/0x130 memory_failure+0x128/0x800 madvise_inject_error.cold+0x8b/0xa1 __x64_sys_madvise+0x54/0x60 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fc3754f8bf9 Code: 01 00 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 8 RSP: 002b:00007ffeda93a1d8 EFLAGS: 00000217 ORIG_RAX: 000000000000001c RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc3754f8bf9 RDX: 0000000000000064 RSI: 0000000000003000 RDI: 0000000020ff9000 RBP: 00007ffeda93a200 R08: 0000000000000000 R09: 0000000000000000 R10: 00000000ffffffff R11: 0000000000000217 R12: 0000000000400490 R13: 00007ffeda93a2e0 R14: 0000000000000000 R15: 0000000000000000 We think that raising BUG is overkilling for splitting huge_zero_page, the huge_zero_page can't be met from normal paths other than memory failure, but memory failure is a valid caller. So we tend to replace the BUG to WARN + returning -EBUSY, and thus the panic above won't happen again. Link: https://lkml.kernel.org/r/f35f8b97377d5d3ede1bc5ac3114da888c57cbce.1651052574.git.xuyu@linux.alibaba.com Fixes: d173d5417fb6 ("mm/memory-failure.c: skip huge_zero_page in memory_failure()") Fixes: 6a46079cf57a ("HWPOISON: The high level memory error handler in the VM v7") Signed-off-by: Xu Yu Suggested-by: Yang Shi Reported-by: kernel test robot Reviewed-by: Naoya Horiguchi Reviewed-by: Yang Shi Reviewed-by: Miaohe Lin Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c468fee595ffa..910a138e9859e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2495,11 +2495,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) struct address_space *mapping = NULL; int extra_pins, ret; pgoff_t end; + bool is_hzp; - VM_BUG_ON_PAGE(is_huge_zero_page(head), head); VM_BUG_ON_PAGE(!PageLocked(head), head); VM_BUG_ON_PAGE(!PageCompound(head), head); + is_hzp = is_huge_zero_page(head); + VM_WARN_ON_ONCE_PAGE(is_hzp, head); + if (is_hzp) + return -EBUSY; + if (PageWriteback(head)) return -EBUSY; From 1825b93b626e99eb9a0f9f50342c7b2fa201b387 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 28 Apr 2022 23:14:44 -0700 Subject: [PATCH 077/491] mm/hwpoison: use pr_err() instead of dump_page() in get_any_page() The following VM_BUG_ON_FOLIO() is triggered when memory error event happens on the (thp/folio) pages which are about to be freed: [ 1160.232771] page:00000000b36a8a0f refcount:1 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x16a000 [ 1160.236916] page:00000000b36a8a0f refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x16a000 [ 1160.240684] flags: 0x57ffffc0800000(hwpoison|node=1|zone=2|lastcpupid=0x1fffff) [ 1160.243458] raw: 0057ffffc0800000 dead000000000100 dead000000000122 0000000000000000 [ 1160.246268] raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 [ 1160.249197] page dumped because: VM_BUG_ON_FOLIO(!folio_test_large(folio)) [ 1160.251815] ------------[ cut here ]------------ [ 1160.253438] kernel BUG at include/linux/mm.h:788! [ 1160.256162] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 1160.258172] CPU: 2 PID: 115368 Comm: mceinj.sh Tainted: G E 5.18.0-rc1-v5.18-rc1-220404-2353-005-g83111+ #3 [ 1160.262049] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1.fc35 04/01/2014 [ 1160.265103] RIP: 0010:dump_page.cold+0x27e/0x2bd [ 1160.266757] Code: fe ff ff 48 c7 c6 81 f1 5a 98 e9 4c fe ff ff 48 c7 c6 a1 95 59 98 e9 40 fe ff ff 48 c7 c6 50 bf 5a 98 48 89 ef e8 9d 04 6d ff <0f> 0b 41 f7 c4 ff 0f 00 00 0f 85 9f fd ff ff 49 8b 04 24 a9 00 00 [ 1160.273180] RSP: 0018:ffffaa2c4d59fd18 EFLAGS: 00010292 [ 1160.274969] RAX: 000000000000003e RBX: 0000000000000001 RCX: 0000000000000000 [ 1160.277263] RDX: 0000000000000001 RSI: ffffffff985995a1 RDI: 00000000ffffffff [ 1160.279571] RBP: ffffdc9c45a80000 R08: 0000000000000000 R09: 00000000ffffdfff [ 1160.281794] R10: ffffaa2c4d59fb08 R11: ffffffff98940d08 R12: ffffdc9c45a80000 [ 1160.283920] R13: ffffffff985b6f94 R14: 0000000000000000 R15: ffffdc9c45a80000 [ 1160.286641] FS: 00007eff54ce1740(0000) GS:ffff99c67bd00000(0000) knlGS:0000000000000000 [ 1160.289498] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1160.291106] CR2: 00005628381a5f68 CR3: 0000000104712003 CR4: 0000000000170ee0 [ 1160.293031] Call Trace: [ 1160.293724] [ 1160.294334] get_hwpoison_page+0x47d/0x570 [ 1160.295474] memory_failure+0x106/0xaa0 [ 1160.296474] ? security_capable+0x36/0x50 [ 1160.297524] hard_offline_page_store+0x43/0x80 [ 1160.298684] kernfs_fop_write_iter+0x11c/0x1b0 [ 1160.299829] new_sync_write+0xf9/0x160 [ 1160.300810] vfs_write+0x209/0x290 [ 1160.301835] ksys_write+0x4f/0xc0 [ 1160.302718] do_syscall_64+0x3b/0x90 [ 1160.303664] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 1160.304981] RIP: 0033:0x7eff54b018b7 As shown in the RIP address, this VM_BUG_ON in folio_entire_mapcount() is called from dump_page("hwpoison: unhandlable page") in get_any_page(). The below explains the mechanism of the race: CPU 0 CPU 1 memory_failure get_hwpoison_page get_any_page dump_page compound = PageCompound free_pages_prepare page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP folio_entire_mapcount VM_BUG_ON_FOLIO(!folio_test_large(folio)) So replace dump_page() with safer one, pr_err(). Link: https://lkml.kernel.org/r/20220427053220.719866-1-naoya.horiguchi@linux.dev Fixes: 74e8ee4708a8 ("mm: Turn head_compound_mapcount() into folio_entire_mapcount()") Signed-off-by: Naoya Horiguchi Reviewed-by: John Hubbard Reviewed-by: Miaohe Lin Cc: Matthew Wilcox Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: William Kucharski Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2020944398c93..d4a4adcca01f3 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1274,7 +1274,7 @@ static int get_any_page(struct page *p, unsigned long flags) } out: if (ret == -EIO) - dump_page(p, "hwpoison: unhandlable page"); + pr_err("Memory failure: %#lx: unhandlable page.\n", page_to_pfn(p)); return ret; } From 72ed3ee9fa0b461ad086403a8b5336154bd82234 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 22 Apr 2022 10:23:37 +0200 Subject: [PATCH 078/491] can: isotp: remove re-binding of bound socket As a carry over from the CAN_RAW socket (which allows to change the CAN interface while mantaining the filter setup) the re-binding of the CAN_ISOTP socket needs to take care about CAN ID address information and subscriptions. It turned out that this feature is so limited (e.g. the sockopts remain fix) that it finally has never been needed/used. In opposite to the stateless CAN_RAW socket the switching of the CAN ID subscriptions might additionally lead to an interrupted ongoing PDU reception. So better remove this unneeded complexity. Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Link: https://lore.kernel.org/all/20220422082337.1676-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index ff5d7870294e8..1e7c6a460ef9a 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1189,6 +1189,11 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) lock_sock(sk); + if (so->bound) { + err = -EINVAL; + goto out; + } + /* do not register frame reception for functional addressing */ if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) do_rx_reg = 0; @@ -1199,10 +1204,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) goto out; } - if (so->bound && addr->can_ifindex == so->ifindex && - rx_id == so->rxid && tx_id == so->txid) - goto out; - dev = dev_get_by_index(net, addr->can_ifindex); if (!dev) { err = -ENODEV; @@ -1237,22 +1238,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) dev_put(dev); - if (so->bound && do_rx_reg) { - /* unregister old filter */ - if (so->ifindex) { - dev = dev_get_by_index(net, so->ifindex); - if (dev) { - can_rx_unregister(net, dev, so->rxid, - SINGLE_MASK(so->rxid), - isotp_rcv, sk); - can_rx_unregister(net, dev, so->txid, - SINGLE_MASK(so->txid), - isotp_rcv_echo, sk); - dev_put(dev); - } - } - } - /* switch to new settings */ so->ifindex = ifindex; so->rxid = rx_id; From 47f070a63e735bcc8d481de31be1b5a1aa62b31c Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Mon, 25 Apr 2022 12:24:00 +0800 Subject: [PATCH 079/491] can: grcan: grcan_close(): fix deadlock There are deadlocks caused by del_timer_sync(&priv->hang_timer) and del_timer_sync(&priv->rr_timer) in grcan_close(), one of the deadlocks are shown below: (Thread 1) | (Thread 2) | grcan_reset_timer() grcan_close() | mod_timer() spin_lock_irqsave() //(1) | (wait a time) ... | grcan_initiate_running_reset() del_timer_sync() | spin_lock_irqsave() //(2) (wait timer to stop) | ... We hold priv->lock in position (1) of thread 1 and use del_timer_sync() to wait timer to stop, but timer handler also need priv->lock in position (2) of thread 2. As a result, grcan_close() will block forever. This patch extracts del_timer_sync() from the protection of spin_lock_irqsave(), which could let timer handler to obtain the needed lock. Link: https://lore.kernel.org/all/20220425042400.66517-1-duoming@zju.edu.cn Fixes: 6cec9b07fe6a ("can: grcan: Add device driver for GRCAN and GRHCAN cores") Cc: stable@vger.kernel.org Signed-off-by: Duoming Zhou Reviewed-by: Andreas Larsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/grcan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index d0c5a7a60dafb..1189057b5d680 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1102,8 +1102,10 @@ static int grcan_close(struct net_device *dev) priv->closing = true; if (priv->need_txbug_workaround) { + spin_unlock_irqrestore(&priv->lock, flags); del_timer_sync(&priv->hang_timer); del_timer_sync(&priv->rr_timer); + spin_lock_irqsave(&priv->lock, flags); } netif_stop_queue(dev); grcan_stop_hardware(dev); From 101da4268626b00d16356a6bf284d66e44c46ff9 Mon Sep 17 00:00:00 2001 From: Daniel Hellstrom Date: Fri, 29 Apr 2022 10:46:54 +0200 Subject: [PATCH 080/491] can: grcan: use ofdev->dev when allocating DMA memory Use the device of the device tree node should be rather than the device of the struct net_device when allocating DMA buffers. The driver got away with it on sparc32 until commit 53b7670e5735 ("sparc: factor the dma coherent mapping into helper") after which the driver oopses. Fixes: 6cec9b07fe6a ("can: grcan: Add device driver for GRCAN and GRHCAN cores") Link: https://lore.kernel.org/all/20220429084656.29788-2-andreas@gaisler.com Cc: stable@vger.kernel.org Signed-off-by: Daniel Hellstrom Signed-off-by: Andreas Larsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/grcan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index 1189057b5d680..f8860900575be 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -248,6 +248,7 @@ struct grcan_device_config { struct grcan_priv { struct can_priv can; /* must be the first member */ struct net_device *dev; + struct device *ofdev_dev; struct napi_struct napi; struct grcan_registers __iomem *regs; /* ioremap'ed registers */ @@ -921,7 +922,7 @@ static void grcan_free_dma_buffers(struct net_device *dev) struct grcan_priv *priv = netdev_priv(dev); struct grcan_dma *dma = &priv->dma; - dma_free_coherent(&dev->dev, dma->base_size, dma->base_buf, + dma_free_coherent(priv->ofdev_dev, dma->base_size, dma->base_buf, dma->base_handle); memset(dma, 0, sizeof(*dma)); } @@ -946,7 +947,7 @@ static int grcan_allocate_dma_buffers(struct net_device *dev, /* Extra GRCAN_BUFFER_ALIGNMENT to allow for alignment */ dma->base_size = lsize + ssize + GRCAN_BUFFER_ALIGNMENT; - dma->base_buf = dma_alloc_coherent(&dev->dev, + dma->base_buf = dma_alloc_coherent(priv->ofdev_dev, dma->base_size, &dma->base_handle, GFP_KERNEL); @@ -1589,6 +1590,7 @@ static int grcan_setup_netdev(struct platform_device *ofdev, memcpy(&priv->config, &grcan_module_config, sizeof(struct grcan_device_config)); priv->dev = dev; + priv->ofdev_dev = &ofdev->dev; priv->regs = base; priv->can.bittiming_const = &grcan_bittiming_const; priv->can.do_set_bittiming = grcan_set_bittiming; From 1e93ed26acf03fe6c97c6d573a10178596aadd43 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Fri, 29 Apr 2022 10:46:55 +0200 Subject: [PATCH 081/491] can: grcan: grcan_probe(): fix broken system id check for errata workaround needs The systemid property was checked for in the wrong place of the device tree and compared to the wrong value. Fixes: 6cec9b07fe6a ("can: grcan: Add device driver for GRCAN and GRHCAN cores") Link: https://lore.kernel.org/all/20220429084656.29788-3-andreas@gaisler.com Cc: stable@vger.kernel.org Signed-off-by: Andreas Larsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/grcan.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index f8860900575be..4ca3da56d3aa5 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -241,7 +241,7 @@ struct grcan_device_config { .rxsize = GRCAN_DEFAULT_BUFFER_SIZE, \ } -#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 0x4100 +#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 4100 #define GRLIB_VERSION_MASK 0xffff /* GRCAN private data structure */ @@ -1643,6 +1643,7 @@ static int grcan_setup_netdev(struct platform_device *ofdev, static int grcan_probe(struct platform_device *ofdev) { struct device_node *np = ofdev->dev.of_node; + struct device_node *sysid_parent; u32 sysid, ambafreq; int irq, err; void __iomem *base; @@ -1651,10 +1652,15 @@ static int grcan_probe(struct platform_device *ofdev) /* Compare GRLIB version number with the first that does not * have the tx bug (see start_xmit) */ - err = of_property_read_u32(np, "systemid", &sysid); - if (!err && ((sysid & GRLIB_VERSION_MASK) - >= GRCAN_TXBUG_SAFE_GRLIB_VERSION)) - txbug = false; + sysid_parent = of_find_node_by_path("/ambapp0"); + if (sysid_parent) { + of_node_get(sysid_parent); + err = of_property_read_u32(sysid_parent, "systemid", &sysid); + if (!err && ((sysid & GRLIB_VERSION_MASK) >= + GRCAN_TXBUG_SAFE_GRLIB_VERSION)) + txbug = false; + of_node_put(sysid_parent); + } err = of_property_read_u32(np, "freq", &ambafreq); if (err) { From 2873d4d52f7c52d60b316ba6c47bd7122b5a9861 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Fri, 29 Apr 2022 10:46:56 +0200 Subject: [PATCH 082/491] can: grcan: only use the NAPI poll budget for RX The previous split budget between TX and RX made it return not using the entire budget but at the same time not having calling called napi_complete. This sometimes led to the poll to not be called, and at the same time having TX and RX interrupts disabled resulting in the driver getting stuck. Fixes: 6cec9b07fe6a ("can: grcan: Add device driver for GRCAN and GRHCAN cores") Link: https://lore.kernel.org/all/20220429084656.29788-4-andreas@gaisler.com Cc: stable@vger.kernel.org Signed-off-by: Andreas Larsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/grcan.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index 4ca3da56d3aa5..5215bd9b2c80d 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1125,7 +1125,7 @@ static int grcan_close(struct net_device *dev) return 0; } -static int grcan_transmit_catch_up(struct net_device *dev, int budget) +static void grcan_transmit_catch_up(struct net_device *dev) { struct grcan_priv *priv = netdev_priv(dev); unsigned long flags; @@ -1133,7 +1133,7 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget) spin_lock_irqsave(&priv->lock, flags); - work_done = catch_up_echo_skb(dev, budget, true); + work_done = catch_up_echo_skb(dev, -1, true); if (work_done) { if (!priv->resetting && !priv->closing && !(priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)) @@ -1147,8 +1147,6 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget) } spin_unlock_irqrestore(&priv->lock, flags); - - return work_done; } static int grcan_receive(struct net_device *dev, int budget) @@ -1230,19 +1228,13 @@ static int grcan_poll(struct napi_struct *napi, int budget) struct net_device *dev = priv->dev; struct grcan_registers __iomem *regs = priv->regs; unsigned long flags; - int tx_work_done, rx_work_done; - int rx_budget = budget / 2; - int tx_budget = budget - rx_budget; + int work_done; - /* Half of the budget for receiving messages */ - rx_work_done = grcan_receive(dev, rx_budget); + work_done = grcan_receive(dev, budget); - /* Half of the budget for transmitting messages as that can trigger echo - * frames being received - */ - tx_work_done = grcan_transmit_catch_up(dev, tx_budget); + grcan_transmit_catch_up(dev); - if (rx_work_done < rx_budget && tx_work_done < tx_budget) { + if (work_done < budget) { napi_complete(napi); /* Guarantee no interference with a running reset that otherwise @@ -1259,7 +1251,7 @@ static int grcan_poll(struct napi_struct *napi, int budget) spin_unlock_irqrestore(&priv->lock, flags); } - return rx_work_done + tx_work_done; + return work_done; } /* Work tx bug by waiting while for the risky situation to clear. If that fails, From efce2d0ba6bf70994394a5a139347ced4d172771 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 28 Apr 2022 11:15:08 -0400 Subject: [PATCH 083/491] SUNRPC: Ensure timely close of disconnected AF_LOCAL sockets When the rpcbind server closes the socket, we need to ensure that the socket is closed by the kernel as soon as feasible, so add a sk_state_change callback to trigger this close. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f9849b297ea39..25b8a8ead56bf 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1418,6 +1418,26 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ +/** + * xs_local_state_change - callback to handle AF_LOCAL socket state changes + * @sk: socket whose state has changed + * + */ +static void xs_local_state_change(struct sock *sk) +{ + struct rpc_xprt *xprt; + struct sock_xprt *transport; + + if (!(xprt = xprt_from_sock(sk))) + return; + transport = container_of(xprt, struct sock_xprt, xprt); + if (sk->sk_shutdown & SHUTDOWN_MASK) { + clear_bit(XPRT_CONNECTED, &xprt->state); + /* Trigger the socket release */ + xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); + } +} + /** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed @@ -1866,6 +1886,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; + sk->sk_state_change = xs_local_state_change; sk->sk_error_report = xs_error_report; xprt_clear_connected(xprt); From f0a6c68f69981214cb7858738dd2bc81475111f7 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 24 Apr 2022 12:46:23 +0100 Subject: [PATCH 084/491] MIPS: Fix CP0 counter erratum detection for R4k CPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the discrepancy between the two places we check for the CP0 counter erratum in along with the incorrect comparison of the R4400 revision number against 0x30 which matches none and consistently consider all R4000 and R4400 processors affected, as documented in processor errata publications[1][2][3], following the mapping between CP0 PRId register values and processor models: PRId | Processor Model ---------+-------------------- 00000422 | R4000 Revision 2.2 00000430 | R4000 Revision 3.0 00000440 | R4400 Revision 1.0 00000450 | R4400 Revision 2.0 00000460 | R4400 Revision 3.0 No other revision of either processor has ever been spotted. Contrary to what has been stated in commit ce202cbb9e0b ("[MIPS] Assume R4000/R4400 newer than 3.0 don't have the mfc0 count bug") marking the CP0 counter as buggy does not preclude it from being used as either a clock event or a clock source device. It just cannot be used as both at a time, because in that case clock event interrupts will be occasionally lost, and the use as a clock event device takes precedence. Compare against 0x4ff in `can_use_mips_counter' so that a single machine instruction is produced. References: [1] "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", MIPS Technologies Inc., May 10, 1994, Erratum 53, p.13 [2] "MIPS R4400PC/SC Errata, Processor Revision 1.0", MIPS Technologies Inc., February 9, 1994, Erratum 21, p.4 [3] "MIPS R4400PC/SC Errata, Processor Revision 2.0 & 3.0", MIPS Technologies Inc., January 24, 1995, Erratum 14, p.3 Signed-off-by: Maciej W. Rozycki Fixes: ce202cbb9e0b ("[MIPS] Assume R4000/R4400 newer than 3.0 don't have the mfc0 count bug") Cc: stable@vger.kernel.org # v2.6.24+ Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/timex.h | 8 ++++---- arch/mips/kernel/time.c | 11 +++-------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index b05bb70a2e46f..8026baf46e729 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -40,9 +40,9 @@ typedef unsigned int cycles_t; /* - * On R4000/R4400 before version 5.0 an erratum exists such that if the - * cycle counter is read in the exact moment that it is matching the - * compare register, no interrupt will be generated. + * On R4000/R4400 an erratum exists such that if the cycle counter is + * read in the exact moment that it is matching the compare register, + * no interrupt will be generated. * * There is a suggested workaround and also the erratum can't strike if * the compare interrupt isn't being used as the clock source device. @@ -63,7 +63,7 @@ static inline int can_use_mips_counter(unsigned int prid) if (!__builtin_constant_p(cpu_has_counter)) asm volatile("" : "=m" (cpu_data[0].options)); if (likely(cpu_has_counter && - prid >= (PRID_IMP_R4000 | PRID_REV_ENCODE_44(5, 0)))) + prid > (PRID_IMP_R4000 | PRID_REV_ENCODE_44(15, 15)))) return 1; else return 0; diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index caa01457dce60..ed339d7979f3f 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -141,15 +141,10 @@ static __init int cpu_has_mfc0_count_bug(void) case CPU_R4400MC: /* * The published errata for the R4400 up to 3.0 say the CPU - * has the mfc0 from count bug. + * has the mfc0 from count bug. This seems the last version + * produced. */ - if ((current_cpu_data.processor_id & 0xff) <= 0x30) - return 1; - - /* - * we assume newer revisions are ok - */ - return 0; + return 1; } return 0; From c6fe81191bd74f7e6ae9ce96a4837df9485f3ab8 Mon Sep 17 00:00:00 2001 From: Nick Kossifidis Date: Tue, 22 Mar 2022 15:28:39 +0200 Subject: [PATCH 085/491] RISC-V: relocate DTB if it's outside memory region In case the DTB provided by the bootloader/BootROM is before the kernel image or outside /memory, we won't be able to access it through the linear mapping, and get a segfault on setup_arch(). Currently OpenSBI relocates DTB but that's not always the case (e.g. if FW_JUMP_FDT_ADDR is not specified), and it's also not the most portable approach since the default FW_JUMP_FDT_ADDR of the generic platform relocates the DTB at a specific offset that may not be available. To avoid this situation copy DTB so that it's visible through the linear mapping. Signed-off-by: Nick Kossifidis Link: https://lore.kernel.org/r/20220322132839.3653682-1-mick@ics.forth.gr Tested-by: Conor Dooley Fixes: f105aa940e78 ("riscv: add BUILTIN_DTB support for MMU-enabled targets") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b0793dc0c291e..05ed641a1134c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -208,8 +208,25 @@ static void __init setup_bootmem(void) * early_init_fdt_reserve_self() since __pa() does * not work for DTB pointers that are fixmap addresses */ - if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) - memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) { + /* + * In case the DTB is not located in a memory region we won't + * be able to locate it later on via the linear mapping and + * get a segfault when accessing it via __va(dtb_early_pa). + * To avoid this situation copy DTB to a memory region. + * Note that memblock_phys_alloc will also reserve DTB region. + */ + if (!memblock_is_memory(dtb_early_pa)) { + size_t fdt_size = fdt_totalsize(dtb_early_va); + phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE); + void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size); + + memcpy(new_dtb_early_va, dtb_early_va, fdt_size); + early_memunmap(new_dtb_early_va, fdt_size); + _dtb_early_pa = new_dtb_early_pa; + } else + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + } early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); From 3d092ef09303e615707dc5755cf0e29b4df7555f Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 19 Apr 2022 12:08:09 -0500 Subject: [PATCH 086/491] ipmi: When handling send message responses, don't process the message A chunk was dropped when the code handling send messages was rewritten. Those messages shouldn't be processed normally, they are just an indication that the message was successfully sent and the timers should be started for the real response that should be coming later. Add back in the missing chunk to just discard the message and go on. Fixes: 059747c245f0 ("ipmi: Add support for IPMB direct messages") Reported-by: Joe Wiese Cc: stable@vger.kernel.org # v5.16+ Signed-off-by: Corey Minyard Tested-by: Joe Wiese --- drivers/char/ipmi/ipmi_msghandler.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c59265146e9c8..5f7abf456a177 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -4518,6 +4518,8 @@ static int handle_one_recv_msg(struct ipmi_smi *intf, } else /* The message was sent, start the timer. */ intf_start_seq_timer(intf, msg->msgid); + requeue = 0; + goto out; } else if (((msg->rsp[0] >> 2) != ((msg->data[0] >> 2) | 1)) || (msg->rsp[1] != msg->data[1])) { /* From 9cc3aac42566a0021e0ab7c4e9b31667ad75b1e3 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 21 Apr 2022 06:49:43 -0500 Subject: [PATCH 087/491] ipmi:ipmi_ipmb: Fix null-ptr-deref in ipmi_unregister_smi() KASAN report null-ptr-deref as follows: KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:ipmi_unregister_smi+0x7d/0xd50 drivers/char/ipmi/ipmi_msghandler.c:3680 Call Trace: ipmi_ipmb_remove+0x138/0x1a0 drivers/char/ipmi/ipmi_ipmb.c:443 ipmi_ipmb_probe+0x409/0xda1 drivers/char/ipmi/ipmi_ipmb.c:548 i2c_device_probe+0x959/0xac0 drivers/i2c/i2c-core-base.c:563 really_probe+0x3f3/0xa70 drivers/base/dd.c:541 In ipmi_ipmb_probe(), 'iidev->intf' is not set before ipmi_register_smi() success. And in the error handling case, ipmi_ipmb_remove() is called to release resources, ipmi_unregister_smi() is called without check 'iidev->intf', this will cause KASAN null-ptr-deref issue. General kernel style is to allow NULL to be passed into unregister calls, so fix it that way. This allows a NULL check to be removed in other code. Fixes: 57c9e3c9a374 ("ipmi:ipmi_ipmb: Unregister the SMI on remove") Reported-by: Hulk Robot Cc: stable@vger.kernel.org # v5.17+ Cc: Wei Yongjun Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 5 ++++- drivers/char/ipmi/ipmi_si_intf.c | 5 +---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 5f7abf456a177..f1827257ef0e0 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3677,8 +3677,11 @@ static void cleanup_smi_msgs(struct ipmi_smi *intf) void ipmi_unregister_smi(struct ipmi_smi *intf) { struct ipmi_smi_watcher *w; - int intf_num = intf->intf_num, index; + int intf_num, index; + if (!intf) + return; + intf_num = intf->intf_num; mutex_lock(&ipmi_interfaces_mutex); intf->intf_num = -1; intf->in_shutdown = true; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 64dedb3ef8ec4..5604a810fb3d2 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2220,10 +2220,7 @@ static void cleanup_one_si(struct smi_info *smi_info) return; list_del(&smi_info->link); - - if (smi_info->intf) - ipmi_unregister_smi(smi_info->intf); - + ipmi_unregister_smi(smi_info->intf); kfree(smi_info); } From 892de36fd4a98fab3298d417c051d9099af5448d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Apr 2022 12:22:10 -0400 Subject: [PATCH 088/491] SUNRPC: Ensure gss-proxy connects on setup For reasons best known to the author, gss-proxy does not implement a NULL procedure, and returns RPC_PROC_UNAVAIL. However we still want to ensure that we connect to the service at setup time. So add a quirk-flag specially for this case. Fixes: 1d658336b05f ("SUNRPC: Add RPC based upcall mechanism for RPCGSS auth") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +- net/sunrpc/clnt.c | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 267b7aeaf1a69..db5149567305e 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -160,6 +160,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) +#define RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL (1UL << 12) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 61c276bddaf25..8ca1d809b78d9 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -97,7 +97,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) * timeout, which would result in reconnections being * done without the correct namespace: */ - .flags = RPC_CLNT_CREATE_NOPING | + .flags = RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL | RPC_CLNT_CREATE_NO_IDLE_TIMEOUT }; struct rpc_clnt *clnt; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 98133aa54f198..22c28cf43ebae 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -479,6 +479,9 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { int err = rpc_ping(clnt); + if ((args->flags & RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL) && + err == -EOPNOTSUPP) + err = 0; if (err != 0) { rpc_shutdown_client(clnt); return ERR_PTR(err); From 2bfed7d2ffa5d86c462d3e2067f2832eaf8c04c7 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 19 Mar 2022 02:00:11 +0100 Subject: [PATCH 089/491] selftests/seccomp: Don't call read() on TTY from background pgrp Since commit 92d25637a3a4 ("kselftest: signal all child processes"), tests are executed in background process groups. This means that trying to read from stdin now throws SIGTTIN when stdin is a TTY, which breaks some seccomp selftests that try to use read(0, NULL, 0) as a dummy syscall. The simplest way to fix that is probably to just use -1 instead of 0 as the dummy read()'s FD. Fixes: 92d25637a3a4 ("kselftest: signal all child processes") Signed-off-by: Jann Horn Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220319010011.1374622-1-jannh@google.com --- tools/testing/selftests/seccomp/seccomp_bpf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 9d126d7fabdb7..313bb0cbfb1eb 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -955,7 +955,7 @@ TEST(ERRNO_valid) ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); - EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(-1, read(-1, NULL, 0)); EXPECT_EQ(E2BIG, errno); } @@ -974,7 +974,7 @@ TEST(ERRNO_zero) EXPECT_EQ(parent, syscall(__NR_getppid)); /* "errno" of 0 is ok. */ - EXPECT_EQ(0, read(0, NULL, 0)); + EXPECT_EQ(0, read(-1, NULL, 0)); } /* @@ -995,7 +995,7 @@ TEST(ERRNO_capped) ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); - EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(-1, read(-1, NULL, 0)); EXPECT_EQ(4095, errno); } @@ -1026,7 +1026,7 @@ TEST(ERRNO_order) ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); - EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(-1, read(-1, NULL, 0)); EXPECT_EQ(12, errno); } @@ -2623,7 +2623,7 @@ void *tsync_sibling(void *data) ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); if (!ret) return (void *)SIBLING_EXIT_NEWPRIVS; - read(0, NULL, 0); + read(-1, NULL, 0); return (void *)SIBLING_EXIT_UNKILLED; } From a3d0562d4dc039bca39445e1cddde7951662e17d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Apr 2022 12:27:30 -0400 Subject: [PATCH 090/491] Revert "SUNRPC: attempt AF_LOCAL connect on setup" This reverts commit 7073ea8799a8cf73db60270986f14e4aae20fa80. We must not try to connect the socket while the transport is under construction, because the mechanisms to safely tear it down are not in place. As the code stands, we end up leaking the sockets on a connection error. Reported-by: wanghai (M) Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 25b8a8ead56bf..650102a9c86a9 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2875,9 +2875,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) } xprt_set_bound(xprt); xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); - ret = ERR_PTR(xs_local_setup_socket(transport)); - if (ret) - goto out_err; break; default: ret = ERR_PTR(-EAFNOSUPPORT); From 38dcd9570d6fced1dcfee226d5b29722b070ce90 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 28 Apr 2022 12:45:10 +0800 Subject: [PATCH 091/491] selftests/net: add missing tests to Makefile When generating the selftests to another folder, the fixed tests are missing as they are not in Makefile, e.g. make -C tools/testing/selftests/ install \ TARGETS="net" INSTALL_PATH=/tmp/kselftests Signed-off-by: Hangbin Liu Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3fe2515aa616e..0f2ebc38d8934 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -30,7 +30,7 @@ TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh -TEST_PROGS += cmsg_time.sh +TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh @@ -54,6 +54,7 @@ TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender +TEST_PROGS += test_vxlan_vnifiltering.sh TEST_FILES := settings From f62c5acc800eebfe25bf6738b2cc8aa5d6aa7598 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 28 Apr 2022 12:45:11 +0800 Subject: [PATCH 092/491] selftests/net/forwarding: add missing tests to Makefile When generating the selftests to another folder, the fixed tests are missing as they are not in Makefile, e.g. make -C tools/testing/selftests/ install \ TARGETS="net/forwarding" INSTALL_PATH=/tmp/kselftests Signed-off-by: Hangbin Liu Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/Makefile | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 8fa97ae9af9ee..c87e674b61b1d 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -2,15 +2,31 @@ TEST_PROGS = bridge_igmp.sh \ bridge_locked_port.sh \ + bridge_mld.sh \ bridge_port_isolation.sh \ bridge_sticky_fdb.sh \ bridge_vlan_aware.sh \ + bridge_vlan_mcast.sh \ bridge_vlan_unaware.sh \ + custom_multipath_hash.sh \ + dual_vxlan_bridge.sh \ + ethtool_extended_state.sh \ ethtool.sh \ + gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ + gre_multipath_nh_res.sh \ + gre_multipath_nh.sh \ gre_multipath.sh \ + hw_stats_l3.sh \ ip6_forward_instats_vrf.sh \ + ip6gre_custom_multipath_hash.sh \ + ip6gre_flat_key.sh \ + ip6gre_flat_keys.sh \ + ip6gre_flat.sh \ + ip6gre_hier_key.sh \ + ip6gre_hier_keys.sh \ + ip6gre_hier.sh \ ip6gre_inner_v4_multipath.sh \ ip6gre_inner_v6_multipath.sh \ ipip_flat_gre_key.sh \ @@ -34,36 +50,53 @@ TEST_PROGS = bridge_igmp.sh \ mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ mirror_vlan.sh \ + pedit_dsfield.sh \ + pedit_ip.sh \ + pedit_l4port.sh \ + q_in_vni_ipv6.sh \ + q_in_vni.sh \ router_bridge.sh \ router_bridge_vlan.sh \ router_broadcast.sh \ + router_mpath_nh_res.sh \ router_mpath_nh.sh \ router_multicast.sh \ router_multipath.sh \ + router_nh.sh \ router.sh \ router_vid_1.sh \ sch_ets.sh \ + sch_red.sh \ sch_tbf_ets.sh \ sch_tbf_prio.sh \ sch_tbf_root.sh \ + skbedit_priority.sh \ tc_actions.sh \ tc_chains.sh \ tc_flower_router.sh \ tc_flower.sh \ tc_mpls_l2vpn.sh \ + tc_police.sh \ tc_shblocks.sh \ tc_vlan_modify.sh \ + vxlan_asymmetric_ipv6.sh \ vxlan_asymmetric.sh \ + vxlan_bridge_1d_ipv6.sh \ + vxlan_bridge_1d_port_8472_ipv6.sh \ vxlan_bridge_1d_port_8472.sh \ vxlan_bridge_1d.sh \ + vxlan_bridge_1q_ipv6.sh \ + vxlan_bridge_1q_port_8472_ipv6.sh vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ + vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh TEST_PROGS_EXTENDED := devlink_lib.sh \ ethtool_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ + ip6gre_lib.sh \ ipip_lib.sh \ lib.sh \ mirror_gre_lib.sh \ From ff5265d45345d01fefc98fcb9ae891b59633c919 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 28 Apr 2022 14:25:43 +0800 Subject: [PATCH 093/491] net: ethernet: mediatek: add missing of_node_put() in mtk_sgmii_init() The node pointer returned by of_parse_phandle() with refcount incremented, so add of_node_put() after using it in mtk_sgmii_init(). Fixes: 9ffee4a8276c ("net: ethernet: mediatek: Extend SGMII related functions") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220428062543.64883-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_sgmii.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c index 32d83421226a2..5897940a418b6 100644 --- a/drivers/net/ethernet/mediatek/mtk_sgmii.c +++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c @@ -26,6 +26,7 @@ int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *r, u32 ana_rgc3) break; ss->regmap[i] = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(ss->regmap[i])) return PTR_ERR(ss->regmap[i]); } From e87f66b38e66dffdec9daa9f8f0eb044e9a62e3b Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Thu, 28 Apr 2022 23:19:32 +0200 Subject: [PATCH 094/491] net: mdio: Fix ENOMEM return value in BCM6368 mux bus controller Error values inside the probe function must be < 0. The ENOMEM return value has the wrong sign: it is positive instead of negative. Add a minus sign. Fixes: e239756717b5 ("net: mdio: Add BCM6368 MDIO mux bus controller") Signed-off-by: Niels Dossche Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20220428211931.8130-1-dossche.niels@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-mux-bcm6368.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-mux-bcm6368.c b/drivers/net/mdio/mdio-mux-bcm6368.c index 6dcbf987d61b5..8b444a8eb6b55 100644 --- a/drivers/net/mdio/mdio-mux-bcm6368.c +++ b/drivers/net/mdio/mdio-mux-bcm6368.c @@ -115,7 +115,7 @@ static int bcm6368_mdiomux_probe(struct platform_device *pdev) md->mii_bus = devm_mdiobus_alloc(&pdev->dev); if (!md->mii_bus) { dev_err(&pdev->dev, "mdiomux bus alloc failed\n"); - return ENOMEM; + return -ENOMEM; } bus = md->mii_bus; From 52b2abef450a78e25d485ac61e32f4ce86a87701 Mon Sep 17 00:00:00 2001 From: Qiao Ma Date: Thu, 28 Apr 2022 20:30:16 +0800 Subject: [PATCH 095/491] hinic: fix bug of wq out of bound access If wq has only one page, we need to check wqe rolling over page by compare end_idx and curr_idx, and then copy wqe to shadow wqe to avoid out of bound access. This work has been done in hinic_get_wqe, but missed for hinic_read_wqe. This patch fixes it, and removes unnecessary MASKED_WQE_IDX(). Fixes: 7dd29ee12865 ("hinic: add sriov feature support") Signed-off-by: Qiao Ma Reviewed-by: Xunlei Pang Link: https://lore.kernel.org/r/282817b0e1ae2e28fdf3ed8271a04e77f57bf42e.1651148587.git.mqaio@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c index 2d9b06d7caadb..f7dc7d825f637 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c @@ -771,7 +771,7 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size, /* If we only have one page, still need to get shadown wqe when * wqe rolling-over page */ - if (curr_pg != end_pg || MASKED_WQE_IDX(wq, end_prod_idx) < *prod_idx) { + if (curr_pg != end_pg || end_prod_idx < *prod_idx) { void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size]; copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *prod_idx); @@ -841,7 +841,10 @@ struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size, *cons_idx = curr_cons_idx; - if (curr_pg != end_pg) { + /* If we only have one page, still need to get shadown wqe when + * wqe rolling-over page + */ + if (curr_pg != end_pg || end_cons_idx < curr_cons_idx) { void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size]; copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *cons_idx); From fee34dd199384a483f84806a5cbcf8d657a481cc Mon Sep 17 00:00:00 2001 From: Arun Ramadoss Date: Thu, 28 Apr 2022 12:37:09 +0530 Subject: [PATCH 096/491] net: dsa: ksz9477: port mirror sniffing limited to one port This patch limits the sniffing to only one port during the mirror add. And during the mirror_del it checks for all the ports using the sniff, if and only if no other ports are referring, sniffing is disabled. The code is updated based on the review comments of LAN937x port mirror patch. Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210422094257.1641396-8-prasanna.vengateshan@microchip.com/ Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477") Signed-off-by: Prasanna Vengateshan Signed-off-by: Arun Ramadoss Link: https://lore.kernel.org/r/20220428070709.7094-1-arun.ramadoss@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 38 ++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 8222c8a6c5ec5..7310d19d1f064 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1021,14 +1021,32 @@ static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port, bool ingress, struct netlink_ext_ack *extack) { struct ksz_device *dev = ds->priv; + u8 data; + int p; + + /* Limit to one sniffer port + * Check if any of the port is already set for sniffing + * If yes, instruct the user to remove the previous entry & exit + */ + for (p = 0; p < dev->port_cnt; p++) { + /* Skip the current sniffing port */ + if (p == mirror->to_local_port) + continue; + + ksz_pread8(dev, p, P_MIRROR_CTRL, &data); + + if (data & PORT_MIRROR_SNIFFER) { + NL_SET_ERR_MSG_MOD(extack, + "Sniffer port is already configured, delete existing rules & retry"); + return -EBUSY; + } + } if (ingress) ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true); else ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, true); - ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, false); - /* configure mirror port */ ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, true); @@ -1042,16 +1060,28 @@ static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror) { struct ksz_device *dev = ds->priv; + bool in_use = false; u8 data; + int p; if (mirror->ingress) ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, false); else ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, false); - ksz_pread8(dev, port, P_MIRROR_CTRL, &data); - if (!(data & (PORT_MIRROR_RX | PORT_MIRROR_TX))) + /* Check if any of the port is still referring to sniffer port */ + for (p = 0; p < dev->port_cnt; p++) { + ksz_pread8(dev, p, P_MIRROR_CTRL, &data); + + if ((data & (PORT_MIRROR_RX | PORT_MIRROR_TX))) { + in_use = true; + break; + } + } + + /* delete sniffing if there are no other mirroring rules */ + if (!in_use) ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, false); } From a9e9b091a1c14ecd8bd9d3214a62142a1786fe30 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 28 Apr 2022 17:53:17 +0800 Subject: [PATCH 097/491] net: dsa: mt7530: add missing of_node_put() in mt7530_setup() Add of_node_put() if of_get_phy_mode() fails in mt7530_setup() Fixes: 0c65b2b90d13 ("net: of_get_phy_mode: Change API to solve int/unit warnings") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220428095317.538829-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 19f0035d4410f..fe3cb26f4287e 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2229,6 +2229,7 @@ mt7530_setup(struct dsa_switch *ds) ret = of_get_phy_mode(mac_np, &interface); if (ret && ret != -ENODEV) { of_node_put(mac_np); + of_node_put(phy_node); return ret; } id = of_mdio_parse_addr(ds->dev, phy_node); From 1a15267b7be77e0792cf0c7b36ca65c8eb2df0d8 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 28 Apr 2022 17:57:16 +0800 Subject: [PATCH 098/491] net: stmmac: dwmac-sun8i: add missing of_node_put() in sun8i_dwmac_register_mdio_mux() The node pointer returned by of_get_child_by_name() with refcount incremented, so add of_node_put() after using it. Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220428095716.540452-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index f86cc83003f2d..f834472599f75 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -907,6 +907,7 @@ static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv) ret = mdio_mux_init(priv->device, mdio_mux, mdio_mux_syscon_switch_fn, &gmac->mux_handle, priv, priv->mii); + of_node_put(mdio_mux); return ret; } From 95098d5ac2551769807031444e55a0da5d4f0952 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 29 Apr 2022 09:53:37 +0800 Subject: [PATCH 099/491] net: cpsw: add missing of_node_put() in cpsw_probe_dt() 'tmp_node' need be put before returning from cpsw_probe_dt(), so add missing of_node_put() in error path. Fixes: ed3525eda4c4 ("net: ethernet: ti: introduce cpsw switchdev based driver part 1 - dual-emac") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw_new.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index bd4b1528cf992..79e850fe4621c 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1246,8 +1246,10 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw) data->slave_data = devm_kcalloc(dev, CPSW_SLAVE_PORTS_NUM, sizeof(struct cpsw_slave_data), GFP_KERNEL); - if (!data->slave_data) + if (!data->slave_data) { + of_node_put(tmp_node); return -ENOMEM; + } /* Populate all the child nodes here... */ @@ -1341,6 +1343,7 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw) err_node_put: of_node_put(port_np); + of_node_put(tmp_node); return ret; } From 39cb9faa5d46d0d0694f4b594ef905f517600c8e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 29 Apr 2022 21:05:16 +0100 Subject: [PATCH 100/491] rxrpc: Enable IPv6 checksums on transport socket AF_RXRPC doesn't currently enable IPv6 UDP Tx checksums on the transport socket it opens and the checksums in the packets it generates end up 0. It probably should also enable IPv6 UDP Rx checksums and IPv4 UDP checksums. The latter only seem to be applied if the socket family is AF_INET and don't seem to apply if it's AF_INET6. IPv4 packets from an IPv6 socket seem to have checksums anyway. What seems to have happened is that the inet_inv_convert_csum() call didn't get converted to the appropriate udp_port_cfg parameters - and udp_sock_create() disables checksums unless explicitly told not too. Fix this by enabling the three udp_port_cfg checksum options. Fixes: 1a9b86c9fd95 ("rxrpc: use udp tunnel APIs instead of open code in rxrpc_open_socket") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Xin Long Reviewed-by: Marc Dionne cc: Vadim Fedorenko cc: David S. Miller cc: linux-afs@lists.infradead.org Signed-off-by: David S. Miller --- net/rxrpc/local_object.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index a4111408ffd0c..6a1611b0e3037 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -117,6 +117,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) local, srx->transport_type, srx->transport.family); udp_conf.family = srx->transport.family; + udp_conf.use_udp_checksums = true; if (udp_conf.family == AF_INET) { udp_conf.local_ip = srx->transport.sin.sin_addr; udp_conf.local_udp_port = srx->transport.sin.sin_port; @@ -124,6 +125,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } else { udp_conf.local_ip6 = srx->transport.sin6.sin6_addr; udp_conf.local_udp_port = srx->transport.sin6.sin6_port; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; #endif } ret = udp_sock_create(net, &udp_conf, &local->socket); From dba5bdd57bea587ea4f0b79b03c71135f84a7e8b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Apr 2022 08:42:57 -0700 Subject: [PATCH 101/491] net: igmp: respect RCU rules in ip_mc_source() and ip_mc_msfilter() syzbot reported an UAF in ip_mc_sf_allow() [1] Whenever RCU protected list replaces an object, the pointer to the new object needs to be updated _before_ the call to kfree_rcu() or call_rcu() Because kfree_rcu(ptr, rcu) got support for NULL ptr only recently in commit 12edff045bc6 ("rcu: Make kfree_rcu() ignore NULL pointers"), I chose to use the conditional to make sure stable backports won't miss this detail. if (psl) kfree_rcu(psl, rcu); net/ipv6/mcast.c has similar issues, addressed in a separate patch. [1] BUG: KASAN: use-after-free in ip_mc_sf_allow+0x6bb/0x6d0 net/ipv4/igmp.c:2655 Read of size 4 at addr ffff88807d37b904 by task syz-executor.5/908 CPU: 0 PID: 908 Comm: syz-executor.5 Not tainted 5.18.0-rc4-syzkaller-00064-g8f4dd16603ce #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313 print_report mm/kasan/report.c:429 [inline] kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 ip_mc_sf_allow+0x6bb/0x6d0 net/ipv4/igmp.c:2655 raw_v4_input net/ipv4/raw.c:190 [inline] raw_local_deliver+0x4d1/0xbe0 net/ipv4/raw.c:218 ip_protocol_deliver_rcu+0xcf/0xb30 net/ipv4/ip_input.c:193 ip_local_deliver_finish+0x2ee/0x4c0 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_local_deliver+0x1b3/0x200 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish+0x1cb/0x2f0 net/ipv4/ip_input.c:437 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_rcv+0xaa/0xd0 net/ipv4/ip_input.c:556 __netif_receive_skb_one_core+0x114/0x180 net/core/dev.c:5405 __netif_receive_skb+0x24/0x1b0 net/core/dev.c:5519 netif_receive_skb_internal net/core/dev.c:5605 [inline] netif_receive_skb+0x13e/0x8e0 net/core/dev.c:5664 tun_rx_batched.isra.0+0x460/0x720 drivers/net/tun.c:1534 tun_get_user+0x28b7/0x3e30 drivers/net/tun.c:1985 tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2015 call_write_iter include/linux/fs.h:2050 [inline] new_sync_write+0x38a/0x560 fs/read_write.c:504 vfs_write+0x7c0/0xac0 fs/read_write.c:591 ksys_write+0x127/0x250 fs/read_write.c:644 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f3f12c3bbff Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 99 fd ff ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 cc fd ff ff 48 RSP: 002b:00007f3f13ea9130 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007f3f12d9bf60 RCX: 00007f3f12c3bbff RDX: 0000000000000036 RSI: 0000000020002ac0 RDI: 00000000000000c8 RBP: 00007f3f12ce308d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000036 R11: 0000000000000293 R12: 0000000000000000 R13: 00007fffb68dd79f R14: 00007f3f13ea9300 R15: 0000000000022000 Allocated by task 908: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:45 [inline] set_alloc_info mm/kasan/common.c:436 [inline] ____kasan_kmalloc mm/kasan/common.c:515 [inline] ____kasan_kmalloc mm/kasan/common.c:474 [inline] __kasan_kmalloc+0xa6/0xd0 mm/kasan/common.c:524 kasan_kmalloc include/linux/kasan.h:234 [inline] __do_kmalloc mm/slab.c:3710 [inline] __kmalloc+0x209/0x4d0 mm/slab.c:3719 kmalloc include/linux/slab.h:586 [inline] sock_kmalloc net/core/sock.c:2501 [inline] sock_kmalloc+0xb5/0x100 net/core/sock.c:2492 ip_mc_source+0xba2/0x1100 net/ipv4/igmp.c:2392 do_ip_setsockopt net/ipv4/ip_sockglue.c:1296 [inline] ip_setsockopt+0x2312/0x3ab0 net/ipv4/ip_sockglue.c:1432 raw_setsockopt+0x274/0x2c0 net/ipv4/raw.c:861 __sys_setsockopt+0x2db/0x6a0 net/socket.c:2180 __do_sys_setsockopt net/socket.c:2191 [inline] __se_sys_setsockopt net/socket.c:2188 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2188 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 753: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:45 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free+0x13d/0x180 mm/kasan/common.c:328 kasan_slab_free include/linux/kasan.h:200 [inline] __cache_free mm/slab.c:3439 [inline] kmem_cache_free_bulk+0x69/0x460 mm/slab.c:3774 kfree_bulk include/linux/slab.h:437 [inline] kfree_rcu_work+0x51c/0xa10 kernel/rcu/tree.c:3318 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 Last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0x7e/0x90 mm/kasan/generic.c:348 kvfree_call_rcu+0x74/0x990 kernel/rcu/tree.c:3595 ip_mc_msfilter+0x712/0xb60 net/ipv4/igmp.c:2510 do_ip_setsockopt net/ipv4/ip_sockglue.c:1257 [inline] ip_setsockopt+0x32e1/0x3ab0 net/ipv4/ip_sockglue.c:1432 raw_setsockopt+0x274/0x2c0 net/ipv4/raw.c:861 __sys_setsockopt+0x2db/0x6a0 net/socket.c:2180 __do_sys_setsockopt net/socket.c:2191 [inline] __se_sys_setsockopt net/socket.c:2188 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2188 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Second to last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0x7e/0x90 mm/kasan/generic.c:348 call_rcu+0x99/0x790 kernel/rcu/tree.c:3074 mpls_dev_notify+0x552/0x8a0 net/mpls/af_mpls.c:1656 notifier_call_chain+0xb5/0x200 kernel/notifier.c:84 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1938 call_netdevice_notifiers_extack net/core/dev.c:1976 [inline] call_netdevice_notifiers net/core/dev.c:1990 [inline] unregister_netdevice_many+0x92e/0x1890 net/core/dev.c:10751 default_device_exit_batch+0x449/0x590 net/core/dev.c:11245 ops_exit_list+0x125/0x170 net/core/net_namespace.c:167 cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 The buggy address belongs to the object at ffff88807d37b900 which belongs to the cache kmalloc-64 of size 64 The buggy address is located 4 bytes inside of 64-byte region [ffff88807d37b900, ffff88807d37b940) The buggy address belongs to the physical page: page:ffffea0001f4dec0 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88807d37b180 pfn:0x7d37b flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 ffff888010c41340 ffffea0001c795c8 ffff888010c40200 raw: ffff88807d37b180 ffff88807d37b000 000000010000001f 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x342040(__GFP_IO|__GFP_NOWARN|__GFP_COMP|__GFP_HARDWALL|__GFP_THISNODE), pid 2963, tgid 2963 (udevd), ts 139732238007, free_ts 139730893262 prep_new_page mm/page_alloc.c:2441 [inline] get_page_from_freelist+0xba2/0x3e00 mm/page_alloc.c:4182 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5408 __alloc_pages_node include/linux/gfp.h:587 [inline] kmem_getpages mm/slab.c:1378 [inline] cache_grow_begin+0x75/0x350 mm/slab.c:2584 cache_alloc_refill+0x27f/0x380 mm/slab.c:2957 ____cache_alloc mm/slab.c:3040 [inline] ____cache_alloc mm/slab.c:3023 [inline] __do_cache_alloc mm/slab.c:3267 [inline] slab_alloc mm/slab.c:3309 [inline] __do_kmalloc mm/slab.c:3708 [inline] __kmalloc+0x3b3/0x4d0 mm/slab.c:3719 kmalloc include/linux/slab.h:586 [inline] kzalloc include/linux/slab.h:714 [inline] tomoyo_encode2.part.0+0xe9/0x3a0 security/tomoyo/realpath.c:45 tomoyo_encode2 security/tomoyo/realpath.c:31 [inline] tomoyo_encode+0x28/0x50 security/tomoyo/realpath.c:80 tomoyo_realpath_from_path+0x186/0x620 security/tomoyo/realpath.c:288 tomoyo_get_realpath security/tomoyo/file.c:151 [inline] tomoyo_path_perm+0x21b/0x400 security/tomoyo/file.c:822 security_inode_getattr+0xcf/0x140 security/security.c:1350 vfs_getattr fs/stat.c:157 [inline] vfs_statx+0x16a/0x390 fs/stat.c:232 vfs_fstatat+0x8c/0xb0 fs/stat.c:255 __do_sys_newfstatat+0x91/0x110 fs/stat.c:425 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1356 [inline] free_pcp_prepare+0x549/0xd20 mm/page_alloc.c:1406 free_unref_page_prepare mm/page_alloc.c:3328 [inline] free_unref_page+0x19/0x6a0 mm/page_alloc.c:3423 __vunmap+0x85d/0xd30 mm/vmalloc.c:2667 __vfree+0x3c/0xd0 mm/vmalloc.c:2715 vfree+0x5a/0x90 mm/vmalloc.c:2746 __do_replace+0x16b/0x890 net/ipv6/netfilter/ip6_tables.c:1117 do_replace net/ipv6/netfilter/ip6_tables.c:1157 [inline] do_ip6t_set_ctl+0x90d/0xb90 net/ipv6/netfilter/ip6_tables.c:1639 nf_setsockopt+0x83/0xe0 net/netfilter/nf_sockopt.c:101 ipv6_setsockopt+0x122/0x180 net/ipv6/ipv6_sockglue.c:1026 tcp_setsockopt+0x136/0x2520 net/ipv4/tcp.c:3696 __sys_setsockopt+0x2db/0x6a0 net/socket.c:2180 __do_sys_setsockopt net/socket.c:2191 [inline] __se_sys_setsockopt net/socket.c:2188 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2188 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Memory state around the buggy address: ffff88807d37b800: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc ffff88807d37b880: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc >ffff88807d37b900: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ^ ffff88807d37b980: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff88807d37ba00: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc Fixes: c85bb41e9318 ("igmp: fix ip_mc_sf_allow race [v5]") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 2ad3c7b42d6d2..1d9e6d5e9a76c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2403,9 +2403,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct /* decrease mem now to avoid the memleak warning */ atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } rcu_assign_pointer(pmc->sflist, newpsl); + if (psl) + kfree_rcu(psl, rcu); psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ @@ -2507,11 +2508,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) /* decrease mem now to avoid the memleak warning */ atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); - } else + } else { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); + } rcu_assign_pointer(pmc->sflist, newpsl); + if (psl) + kfree_rcu(psl, rcu); pmc->sfmode = msf->imsf_fmode; err = 0; done: From a9384a4c1d250cb40cebf50e41459426d160b08e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Apr 2022 09:20:36 -0700 Subject: [PATCH 102/491] mld: respect RCU rules in ip6_mc_source() and ip6_mc_msfilter() Whenever RCU protected list replaces an object, the pointer to the new object needs to be updated _before_ the call to kfree_rcu() or call_rcu() Also ip6_mc_msfilter() needs to update the pointer before releasing the mc_lock mutex. Note that linux-5.13 was supporting kfree_rcu(NULL, rcu), so this fix does not need the conditional test I was forced to use in the equivalent patch for IPv4. Fixes: 882ba1f73c06 ("mld: convert ipv6_mc_socklist->sflist to RCU") Signed-off-by: Eric Dumazet Cc: Taehee Yoo Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 909f937befd71..7f695c39d9a8c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -460,10 +460,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk, newpsl->sl_addr[i] = psl->sl_addr[i]; atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } + rcu_assign_pointer(pmc->sflist, newpsl); + kfree_rcu(psl, rcu); psl = newpsl; - rcu_assign_pointer(pmc->sflist, psl); } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i = 0; i < psl->sl_count; i++) { @@ -565,12 +565,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, psl->sl_count, psl->sl_addr, 0); atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); } - mutex_unlock(&idev->mc_lock); rcu_assign_pointer(pmc->sflist, newpsl); + mutex_unlock(&idev->mc_lock); + kfree_rcu(psl, rcu); pmc->sfmode = gsf->gf_fmode; err = 0; done: From 2667ed10d9f01e250ba806276740782c89d77fda Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 28 Apr 2022 11:00:41 -0700 Subject: [PATCH 103/491] mm: Fix PASID use-after-free issue The PASID is being freed too early. It needs to stay around until after device drivers that might be using it have had a chance to clear it out of the hardware. The relevant refcounts are: mmget() /mmput() refcount the mm's address space mmgrab()/mmdrop() refcount the mm itself The PASID is currently tied to the life of the mm's address space and freed in __mmput(). This makes logical sense because the PASID can't be used once the address space is gone. But, this misses an important point: even after the address space is gone, the PASID will still be programmed into a device. Device drivers might, for instance, still need to flush operations that are outstanding and need to use that PASID. They do this at file->release() time. Device drivers call the IOMMU driver to hold a reference on the mm itself and drop it at file->release() time. But, the IOMMU driver holds a reference on the mm itself, not the address space. The address space (and the PASID) is long gone by the time the driver tries to clean up. This is effectively a use-after-free bug on the PASID. To fix this, move the PASID free operation from __mmput() to __mmdrop(). This ensures that the IOMMU driver's existing mmgrab() keeps the PASID allocated until it drops its mm reference. Fixes: 701fac40384f ("iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit") Reported-by: Zhangfei Gao Suggested-by: Jean-Philippe Brucker Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Tested-by: Zhangfei Gao Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20220428180041.806809-1-fenghua.yu@intel.com --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 9796897560ab1..35a3beff140b6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -792,6 +792,7 @@ void __mmdrop(struct mm_struct *mm) mmu_notifier_subscriptions_destroy(mm); check_mm(mm); put_user_ns(mm->user_ns); + mm_pasid_drop(mm); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -1190,7 +1191,6 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); - mm_pasid_drop(mm); mmdrop(mm); } From 47f753c1108e287edb3e27fad8a7511a9d55578e Mon Sep 17 00:00:00 2001 From: Tan Tee Min Date: Fri, 29 Apr 2022 19:58:07 +0800 Subject: [PATCH 104/491] net: stmmac: disable Split Header (SPH) for Intel platforms Based on DesignWare Ethernet QoS datasheet, we are seeing the limitation of Split Header (SPH) feature is not supported for Ipv4 fragmented packet. This SPH limitation will cause ping failure when the packets size exceed the MTU size. For example, the issue happens once the basic ping packet size is larger than the configured MTU size and the data is lost inside the fragmented packet, replaced by zeros/corrupted values, and leads to ping fail. So, disable the Split Header for Intel platforms. v2: Add fixes tag in commit message. Fixes: 67afd6d1cfdf("net: stmmac: Add Split Header support and enable it in XGMAC cores") Cc: # 5.10.x Suggested-by: Ong, Boon Leong Signed-off-by: Mohammad Athari Bin Ismail Signed-off-by: Wong Vee Khee Signed-off-by: Tan Tee Min Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- include/linux/stmmac.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 63754a9c4ba72..0b0be0898ac57 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -454,6 +454,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->has_gmac4 = 1; plat->force_sf_dma_mode = 0; plat->tso_en = 1; + plat->sph_disable = 1; /* Multiplying factor to the clk_eee_i clock time * period to make it closer to 100 ns. This value diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 4a4b3651ab3e2..2525a80353b70 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7021,7 +7021,7 @@ int stmmac_dvr_probe(struct device *device, dev_info(priv->device, "TSO feature enabled\n"); } - if (priv->dma_cap.sphen) { + if (priv->dma_cap.sphen && !priv->plat->sph_disable) { ndev->hw_features |= NETIF_F_GRO; priv->sph_cap = true; priv->sph = priv->sph_cap; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 24eea1b05ca27..29917850f0794 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -270,5 +270,6 @@ struct plat_stmmacenet_data { int msi_rx_base_vec; int msi_tx_base_vec; bool use_phy_wol; + bool sph_disable; }; #endif From da5c0f119203ad9728920456a0f52a6d850c01cd Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 29 Apr 2022 20:45:50 +0800 Subject: [PATCH 105/491] nfc: replace improper check device_is_registered() in netlink related functions The device_is_registered() in nfc core is used to check whether nfc device is registered in netlink related functions such as nfc_fw_download(), nfc_dev_up() and so on. Although device_is_registered() is protected by device_lock, there is still a race condition between device_del() and device_is_registered(). The root cause is that kobject_del() in device_del() is not protected by device_lock. (cleanup task) | (netlink task) | nfc_unregister_device | nfc_fw_download device_del | device_lock ... | if (!device_is_registered)//(1) kobject_del//(2) | ... ... | device_unlock The device_is_registered() returns the value of state_in_sysfs and the state_in_sysfs is set to zero in kobject_del(). If we pass check in position (1), then set zero in position (2). As a result, the check in position (1) is useless. This patch uses bool variable instead of device_is_registered() to judge whether the nfc device is registered, which is well synchronized. Fixes: 3e256b8f8dfa ("NFC: add nfc subsystem core") Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- net/nfc/core.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/net/nfc/core.c b/net/nfc/core.c index dc7a2404efdf9..5b286e1e0a6ff 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -38,7 +38,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -94,7 +94,7 @@ int nfc_dev_up(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -142,7 +142,7 @@ int nfc_dev_down(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -207,7 +207,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -246,7 +246,7 @@ int nfc_stop_poll(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -291,7 +291,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -335,7 +335,7 @@ int nfc_dep_link_down(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -401,7 +401,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -448,7 +448,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -495,7 +495,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; kfree_skb(skb); goto error; @@ -552,7 +552,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -601,7 +601,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -1134,6 +1134,7 @@ int nfc_register_device(struct nfc_dev *dev) dev->rfkill = NULL; } } + dev->shutting_down = false; device_unlock(&dev->dev); rc = nfc_genl_device_added(dev); @@ -1166,12 +1167,10 @@ void nfc_unregister_device(struct nfc_dev *dev) rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); } + dev->shutting_down = true; device_unlock(&dev->dev); if (dev->ops->check_presence) { - device_lock(&dev->dev); - dev->shutting_down = true; - device_unlock(&dev->dev); del_timer_sync(&dev->check_pres_timer); cancel_work_sync(&dev->check_pres_work); } From d270453a0d9ec10bb8a802a142fb1b3601a83098 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 29 Apr 2022 20:45:51 +0800 Subject: [PATCH 106/491] nfc: nfcmrvl: main: reorder destructive operations in nfcmrvl_nci_unregister_dev to avoid bugs There are destructive operations such as nfcmrvl_fw_dnld_abort and gpio_free in nfcmrvl_nci_unregister_dev. The resources such as firmware, gpio and so on could be destructed while the upper layer functions such as nfcmrvl_fw_dnld_start and nfcmrvl_nci_recv_frame is executing, which leads to double-free, use-after-free and null-ptr-deref bugs. There are three situations that could lead to double-free bugs. The first situation is shown below: (Thread 1) | (Thread 2) nfcmrvl_fw_dnld_start | ... | nfcmrvl_nci_unregister_dev release_firmware() | nfcmrvl_fw_dnld_abort kfree(fw) //(1) | fw_dnld_over | release_firmware ... | kfree(fw) //(2) | ... The second situation is shown below: (Thread 1) | (Thread 2) nfcmrvl_fw_dnld_start | ... | mod_timer | (wait a time) | fw_dnld_timeout | nfcmrvl_nci_unregister_dev fw_dnld_over | nfcmrvl_fw_dnld_abort release_firmware | fw_dnld_over kfree(fw) //(1) | release_firmware ... | kfree(fw) //(2) The third situation is shown below: (Thread 1) | (Thread 2) nfcmrvl_nci_recv_frame | if(..->fw_download_in_progress)| nfcmrvl_fw_dnld_recv_frame | queue_work | | fw_dnld_rx_work | nfcmrvl_nci_unregister_dev fw_dnld_over | nfcmrvl_fw_dnld_abort release_firmware | fw_dnld_over kfree(fw) //(1) | release_firmware | kfree(fw) //(2) The firmware struct is deallocated in position (1) and deallocated in position (2) again. The crash trace triggered by POC is like below: BUG: KASAN: double-free or invalid-free in fw_dnld_over Call Trace: kfree fw_dnld_over nfcmrvl_nci_unregister_dev nci_uart_tty_close tty_ldisc_kill tty_ldisc_hangup __tty_hangup.part.0 tty_release ... What's more, there are also use-after-free and null-ptr-deref bugs in nfcmrvl_fw_dnld_start. If we deallocate firmware struct, gpio or set null to the members of priv->fw_dnld in nfcmrvl_nci_unregister_dev, then, we dereference firmware, gpio or the members of priv->fw_dnld in nfcmrvl_fw_dnld_start, the UAF or NPD bugs will happen. This patch reorders destructive operations after nci_unregister_device in order to synchronize between cleanup routine and firmware download routine. The nci_unregister_device is well synchronized. If the device is detaching, the firmware download routine will goto error. If firmware download routine is executing, nci_unregister_device will wait until firmware download routine is finished. Fixes: 3194c6870158 ("NFC: nfcmrvl: add firmware download support") Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- drivers/nfc/nfcmrvl/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index 2fcf545012b16..1a5284de4341b 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -183,6 +183,7 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv) { struct nci_dev *ndev = priv->ndev; + nci_unregister_device(ndev); if (priv->ndev->nfc_dev->fw_download_in_progress) nfcmrvl_fw_dnld_abort(priv); @@ -191,7 +192,6 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv) if (gpio_is_valid(priv->config.reset_n_io)) gpio_free(priv->config.reset_n_io); - nci_unregister_device(ndev); nci_free_device(ndev); kfree(priv); } From 6b292a04c694573a302686323fe15b1c7e673e5b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Apr 2022 15:54:24 +0200 Subject: [PATCH 107/491] pci_irq_vector() can't be used in atomic context any longer. This conflicts with the usage of this function in nic_mbx_intr_handler(). Cache the Linux interrupt numbers in struct nicpf and use that cache in the interrupt handler to select the mailbox. Fixes: 495c66aca3da ("genirq/msi: Convert to new functions") Reported-by: Ondrej Mosnacek Signed-off-by: Thomas Gleixner Cc: Sunil Goutham Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Link: https://bugzilla.redhat.com/show_bug.cgi?id=2041772 Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index f2f1ce81fd9cc..0ec65ec634df5 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -59,7 +59,7 @@ struct nicpf { /* MSI-X */ u8 num_vec; - bool irq_allocated[NIC_PF_MSIX_VECTORS]; + unsigned int irq_allocated[NIC_PF_MSIX_VECTORS]; char irq_name[NIC_PF_MSIX_VECTORS][20]; }; @@ -1150,7 +1150,7 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq) u64 intr; u8 vf; - if (irq == pci_irq_vector(nic->pdev, NIC_PF_INTR_ID_MBOX0)) + if (irq == nic->irq_allocated[NIC_PF_INTR_ID_MBOX0]) mbx = 0; else mbx = 1; @@ -1176,14 +1176,14 @@ static void nic_free_all_interrupts(struct nicpf *nic) for (irq = 0; irq < nic->num_vec; irq++) { if (nic->irq_allocated[irq]) - free_irq(pci_irq_vector(nic->pdev, irq), nic); - nic->irq_allocated[irq] = false; + free_irq(nic->irq_allocated[irq], nic); + nic->irq_allocated[irq] = 0; } } static int nic_register_interrupts(struct nicpf *nic) { - int i, ret; + int i, ret, irq; nic->num_vec = pci_msix_vec_count(nic->pdev); /* Enable MSI-X */ @@ -1201,13 +1201,13 @@ static int nic_register_interrupts(struct nicpf *nic) sprintf(nic->irq_name[i], "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0)); - ret = request_irq(pci_irq_vector(nic->pdev, i), - nic_mbx_intr_handler, 0, + irq = pci_irq_vector(nic->pdev, i); + ret = request_irq(irq, nic_mbx_intr_handler, 0, nic->irq_name[i], nic); if (ret) goto fail; - nic->irq_allocated[i] = true; + nic->irq_allocated[i] = irq; } /* Enable mailbox interrupt */ From 79396934e289dbc501316c1d1f975bb4c88ae460 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 29 Apr 2022 09:43:03 -0700 Subject: [PATCH 108/491] net: dsa: b53: convert to phylink_pcs Convert B53 to use phylink_pcs for the serdes rather than hooking it into the MAC-layer callbacks. Fixes: 81c1681cbb9f ("net: dsa: b53: mark as non-legacy") Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 36 +++------------- drivers/net/dsa/b53/b53_priv.h | 24 ++++++----- drivers/net/dsa/b53/b53_serdes.c | 74 ++++++++++++++++++++++---------- drivers/net/dsa/b53/b53_serdes.h | 9 ++-- drivers/net/dsa/b53/b53_srab.c | 4 +- 5 files changed, 75 insertions(+), 72 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 77501f9c59159..fbb32aa49b241 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1354,46 +1354,25 @@ static void b53_phylink_get_caps(struct dsa_switch *ds, int port, config->legacy_pre_march2020 = false; } -int b53_phylink_mac_link_state(struct dsa_switch *ds, int port, - struct phylink_link_state *state) +static struct phylink_pcs *b53_phylink_mac_select_pcs(struct dsa_switch *ds, + int port, + phy_interface_t interface) { struct b53_device *dev = ds->priv; - int ret = -EOPNOTSUPP; - if ((phy_interface_mode_is_8023z(state->interface) || - state->interface == PHY_INTERFACE_MODE_SGMII) && - dev->ops->serdes_link_state) - ret = dev->ops->serdes_link_state(dev, port, state); + if (!dev->ops->phylink_mac_select_pcs) + return NULL; - return ret; + return dev->ops->phylink_mac_select_pcs(dev, port, interface); } -EXPORT_SYMBOL(b53_phylink_mac_link_state); void b53_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) { - struct b53_device *dev = ds->priv; - - if (mode == MLO_AN_PHY || mode == MLO_AN_FIXED) - return; - - if ((phy_interface_mode_is_8023z(state->interface) || - state->interface == PHY_INTERFACE_MODE_SGMII) && - dev->ops->serdes_config) - dev->ops->serdes_config(dev, port, mode, state); } EXPORT_SYMBOL(b53_phylink_mac_config); -void b53_phylink_mac_an_restart(struct dsa_switch *ds, int port) -{ - struct b53_device *dev = ds->priv; - - if (dev->ops->serdes_an_restart) - dev->ops->serdes_an_restart(dev, port); -} -EXPORT_SYMBOL(b53_phylink_mac_an_restart); - void b53_phylink_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface) @@ -2269,9 +2248,8 @@ static const struct dsa_switch_ops b53_switch_ops = { .phy_write = b53_phy_write16, .adjust_link = b53_adjust_link, .phylink_get_caps = b53_phylink_get_caps, - .phylink_mac_link_state = b53_phylink_mac_link_state, + .phylink_mac_select_pcs = b53_phylink_mac_select_pcs, .phylink_mac_config = b53_phylink_mac_config, - .phylink_mac_an_restart = b53_phylink_mac_an_restart, .phylink_mac_link_down = b53_phylink_mac_link_down, .phylink_mac_link_up = b53_phylink_mac_link_up, .port_enable = b53_enable_port, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 3085b6cc7d407..795cbffd5c2be 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include @@ -29,7 +29,6 @@ struct b53_device; struct net_device; -struct phylink_link_state; struct b53_io_ops { int (*read8)(struct b53_device *dev, u8 page, u8 reg, u8 *value); @@ -48,13 +47,10 @@ struct b53_io_ops { void (*irq_disable)(struct b53_device *dev, int port); void (*phylink_get_caps)(struct b53_device *dev, int port, struct phylink_config *config); + struct phylink_pcs *(*phylink_mac_select_pcs)(struct b53_device *dev, + int port, + phy_interface_t interface); u8 (*serdes_map_lane)(struct b53_device *dev, int port); - int (*serdes_link_state)(struct b53_device *dev, int port, - struct phylink_link_state *state); - void (*serdes_config)(struct b53_device *dev, int port, - unsigned int mode, - const struct phylink_link_state *state); - void (*serdes_an_restart)(struct b53_device *dev, int port); void (*serdes_link_set)(struct b53_device *dev, int port, unsigned int mode, phy_interface_t interface, bool link_up); @@ -85,8 +81,15 @@ enum { BCM7278_DEVICE_ID = 0x7278, }; +struct b53_pcs { + struct phylink_pcs pcs; + struct b53_device *dev; + u8 lane; +}; + #define B53_N_PORTS 9 #define B53_N_PORTS_25 6 +#define B53_N_PCS 2 struct b53_port { u16 vlan_ctl_mask; @@ -143,6 +146,8 @@ struct b53_device { bool vlan_enabled; unsigned int num_ports; struct b53_port *ports; + + struct b53_pcs pcs[B53_N_PCS]; }; #define b53_for_each_port(dev, i) \ @@ -336,12 +341,9 @@ int b53_br_flags(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack); int b53_setup_devlink_resources(struct dsa_switch *ds); void b53_port_event(struct dsa_switch *ds, int port); -int b53_phylink_mac_link_state(struct dsa_switch *ds, int port, - struct phylink_link_state *state); void b53_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state); -void b53_phylink_mac_an_restart(struct dsa_switch *ds, int port); void b53_phylink_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface); diff --git a/drivers/net/dsa/b53/b53_serdes.c b/drivers/net/dsa/b53/b53_serdes.c index 555e5b3723215..0690210770ffe 100644 --- a/drivers/net/dsa/b53/b53_serdes.c +++ b/drivers/net/dsa/b53/b53_serdes.c @@ -17,6 +17,11 @@ #include "b53_serdes.h" #include "b53_regs.h" +static inline struct b53_pcs *pcs_to_b53_pcs(struct phylink_pcs *pcs) +{ + return container_of(pcs, struct b53_pcs, pcs); +} + static void b53_serdes_write_blk(struct b53_device *dev, u8 offset, u16 block, u16 value) { @@ -60,51 +65,47 @@ static u16 b53_serdes_read(struct b53_device *dev, u8 lane, return b53_serdes_read_blk(dev, offset, block); } -void b53_serdes_config(struct b53_device *dev, int port, unsigned int mode, - const struct phylink_link_state *state) +static int b53_serdes_config(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) { - u8 lane = b53_serdes_map_lane(dev, port); + struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev; + u8 lane = pcs_to_b53_pcs(pcs)->lane; u16 reg; - if (lane == B53_INVALID_LANE) - return; - reg = b53_serdes_read(dev, lane, B53_SERDES_DIGITAL_CONTROL(1), SERDES_DIGITAL_BLK); - if (state->interface == PHY_INTERFACE_MODE_1000BASEX) + if (interface == PHY_INTERFACE_MODE_1000BASEX) reg |= FIBER_MODE_1000X; else reg &= ~FIBER_MODE_1000X; b53_serdes_write(dev, lane, B53_SERDES_DIGITAL_CONTROL(1), SERDES_DIGITAL_BLK, reg); + + return 0; } -EXPORT_SYMBOL(b53_serdes_config); -void b53_serdes_an_restart(struct b53_device *dev, int port) +static void b53_serdes_an_restart(struct phylink_pcs *pcs) { - u8 lane = b53_serdes_map_lane(dev, port); + struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev; + u8 lane = pcs_to_b53_pcs(pcs)->lane; u16 reg; - if (lane == B53_INVALID_LANE) - return; - reg = b53_serdes_read(dev, lane, B53_SERDES_MII_REG(MII_BMCR), SERDES_MII_BLK); reg |= BMCR_ANRESTART; b53_serdes_write(dev, lane, B53_SERDES_MII_REG(MII_BMCR), SERDES_MII_BLK, reg); } -EXPORT_SYMBOL(b53_serdes_an_restart); -int b53_serdes_link_state(struct b53_device *dev, int port, - struct phylink_link_state *state) +static void b53_serdes_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) { - u8 lane = b53_serdes_map_lane(dev, port); + struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev; + u8 lane = pcs_to_b53_pcs(pcs)->lane; u16 dig, bmsr; - if (lane == B53_INVALID_LANE) - return 1; - dig = b53_serdes_read(dev, lane, B53_SERDES_DIGITAL_STATUS, SERDES_DIGITAL_BLK); bmsr = b53_serdes_read(dev, lane, B53_SERDES_MII_REG(MII_BMSR), @@ -133,10 +134,7 @@ int b53_serdes_link_state(struct b53_device *dev, int port, state->pause |= MLO_PAUSE_RX; if (dig & PAUSE_RESOLUTION_TX_SIDE) state->pause |= MLO_PAUSE_TX; - - return 0; } -EXPORT_SYMBOL(b53_serdes_link_state); void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode, phy_interface_t interface, bool link_up) @@ -158,6 +156,12 @@ void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode, } EXPORT_SYMBOL(b53_serdes_link_set); +static const struct phylink_pcs_ops b53_pcs_ops = { + .pcs_get_state = b53_serdes_get_state, + .pcs_config = b53_serdes_config, + .pcs_an_restart = b53_serdes_an_restart, +}; + void b53_serdes_phylink_get_caps(struct b53_device *dev, int port, struct phylink_config *config) { @@ -187,9 +191,28 @@ void b53_serdes_phylink_get_caps(struct b53_device *dev, int port, } EXPORT_SYMBOL(b53_serdes_phylink_get_caps); +struct phylink_pcs *b53_serdes_phylink_mac_select_pcs(struct b53_device *dev, + int port, + phy_interface_t interface) +{ + u8 lane = b53_serdes_map_lane(dev, port); + + if (lane == B53_INVALID_LANE || lane >= B53_N_PCS || + !dev->pcs[lane].dev) + return NULL; + + if (!phy_interface_mode_is_8023z(interface) && + interface != PHY_INTERFACE_MODE_SGMII) + return NULL; + + return &dev->pcs[lane].pcs; +} +EXPORT_SYMBOL(b53_serdes_phylink_mac_select_pcs); + int b53_serdes_init(struct b53_device *dev, int port) { u8 lane = b53_serdes_map_lane(dev, port); + struct b53_pcs *pcs; u16 id0, msb, lsb; if (lane == B53_INVALID_LANE) @@ -212,6 +235,11 @@ int b53_serdes_init(struct b53_device *dev, int port) (id0 >> SERDES_ID0_REV_NUM_SHIFT) & SERDES_ID0_REV_NUM_MASK, (u32)msb << 16 | lsb); + pcs = &dev->pcs[lane]; + pcs->dev = dev; + pcs->lane = lane; + pcs->pcs.ops = &b53_pcs_ops; + return 0; } EXPORT_SYMBOL(b53_serdes_init); diff --git a/drivers/net/dsa/b53/b53_serdes.h b/drivers/net/dsa/b53/b53_serdes.h index f47d5caa75576..ef81f5da5f81f 100644 --- a/drivers/net/dsa/b53/b53_serdes.h +++ b/drivers/net/dsa/b53/b53_serdes.h @@ -107,14 +107,11 @@ static inline u8 b53_serdes_map_lane(struct b53_device *dev, int port) return dev->ops->serdes_map_lane(dev, port); } -int b53_serdes_get_link(struct b53_device *dev, int port); -int b53_serdes_link_state(struct b53_device *dev, int port, - struct phylink_link_state *state); -void b53_serdes_config(struct b53_device *dev, int port, unsigned int mode, - const struct phylink_link_state *state); -void b53_serdes_an_restart(struct b53_device *dev, int port); void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode, phy_interface_t interface, bool link_up); +struct phylink_pcs *b53_serdes_phylink_mac_select_pcs(struct b53_device *dev, + int port, + phy_interface_t interface); void b53_serdes_phylink_get_caps(struct b53_device *dev, int port, struct phylink_config *config); #if IS_ENABLED(CONFIG_B53_SERDES) diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index c51b716657db3..da0b889880f6a 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -491,10 +491,8 @@ static const struct b53_io_ops b53_srab_ops = { .irq_disable = b53_srab_irq_disable, .phylink_get_caps = b53_srab_phylink_get_caps, #if IS_ENABLED(CONFIG_B53_SERDES) + .phylink_mac_select_pcs = b53_serdes_phylink_mac_select_pcs, .serdes_map_lane = b53_srab_serdes_map_lane, - .serdes_link_state = b53_serdes_link_state, - .serdes_config = b53_serdes_config, - .serdes_an_restart = b53_serdes_an_restart, .serdes_link_set = b53_serdes_link_set, #endif }; From d3683eeb9d2b4aa5256f830721655ef2ee97e324 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 13 Apr 2022 21:29:18 +0200 Subject: [PATCH 109/491] pinctrl: ocelot: Fix for lan966x alt mode For lan966x, the GPIO 35 has the wrong function for alternate mode 2. The mode is not none but is PTP sync. Fixes: 531d6ab36571c2 ("pinctrl: ocelot: Extend support for lan966x") Signed-off-by: Horatiu Vultur Reviewed-by: Kavyasree Kotagiri Link: https://lore.kernel.org/r/20220413192918.3777234-1-horatiu.vultur@microchip.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-ocelot.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index 003fb0e341537..6a956ee94494f 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -129,6 +129,7 @@ enum { FUNC_PTP1, FUNC_PTP2, FUNC_PTP3, + FUNC_PTPSYNC_0, FUNC_PTPSYNC_1, FUNC_PTPSYNC_2, FUNC_PTPSYNC_3, @@ -252,6 +253,7 @@ static const char *const ocelot_function_names[] = { [FUNC_PTP1] = "ptp1", [FUNC_PTP2] = "ptp2", [FUNC_PTP3] = "ptp3", + [FUNC_PTPSYNC_0] = "ptpsync_0", [FUNC_PTPSYNC_1] = "ptpsync_1", [FUNC_PTPSYNC_2] = "ptpsync_2", [FUNC_PTPSYNC_3] = "ptpsync_3", @@ -983,7 +985,7 @@ LAN966X_P(31, GPIO, FC3_c, CAN1, NONE, OB_TRG, RECO_b, NON LAN966X_P(32, GPIO, FC3_c, NONE, SGPIO_a, NONE, MIIM_Sa, NONE, R); LAN966X_P(33, GPIO, FC1_b, NONE, SGPIO_a, NONE, MIIM_Sa, MIIM_b, R); LAN966X_P(34, GPIO, FC1_b, NONE, SGPIO_a, NONE, MIIM_Sa, MIIM_b, R); -LAN966X_P(35, GPIO, FC1_b, NONE, SGPIO_a, CAN0_b, NONE, NONE, R); +LAN966X_P(35, GPIO, FC1_b, PTPSYNC_0, SGPIO_a, CAN0_b, NONE, NONE, R); LAN966X_P(36, GPIO, NONE, PTPSYNC_1, NONE, CAN0_b, NONE, NONE, R); LAN966X_P(37, GPIO, FC_SHRD0, PTPSYNC_2, TWI_SLC_GATE_AD, NONE, NONE, NONE, R); LAN966X_P(38, GPIO, NONE, PTPSYNC_3, NONE, NONE, NONE, NONE, R); From f680058f406863b55ac226d1c157701939c63db4 Mon Sep 17 00:00:00 2001 From: Mattijs Korpershoek Date: Tue, 26 Apr 2022 14:57:14 +0200 Subject: [PATCH 110/491] pinctrl: mediatek: mt8365: fix IES control pins IES26 (BIT 16 of IES1_CFG_ADDR) controls the following pads: - PAD_I2S_DATA_IN (GPIO114) - PAD_I2S_LRCK (GPIO115) - PAD_I2S_BCK (GPIO116) The pinctrl table is wrong since it lists pins 114 to 112. Update the table with the correct values. Fixes: e94d8b6fb83a ("pinctrl: mediatek: add support for mt8365 SoC") Reported-by: Youngmin Han Signed-off-by: Mattijs Korpershoek Link: https://lore.kernel.org/r/20220426125714.298907-1-mkorpershoek@baylibre.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mt8365.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8365.c b/drivers/pinctrl/mediatek/pinctrl-mt8365.c index 727c65221aef9..57f37a294063c 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8365.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8365.c @@ -259,7 +259,7 @@ static const struct mtk_pin_ies_smt_set mt8365_ies_set[] = { MTK_PIN_IES_SMT_SPEC(104, 104, 0x420, 13), MTK_PIN_IES_SMT_SPEC(105, 109, 0x420, 14), MTK_PIN_IES_SMT_SPEC(110, 113, 0x420, 15), - MTK_PIN_IES_SMT_SPEC(114, 112, 0x420, 16), + MTK_PIN_IES_SMT_SPEC(114, 116, 0x420, 16), MTK_PIN_IES_SMT_SPEC(117, 119, 0x420, 17), MTK_PIN_IES_SMT_SPEC(120, 122, 0x420, 18), MTK_PIN_IES_SMT_SPEC(123, 125, 0x420, 19), From e75f88efac05bf4e107e4171d8db6d8c3937252d Mon Sep 17 00:00:00 2001 From: Andrei Lalaev Date: Fri, 15 Apr 2022 10:07:11 +0300 Subject: [PATCH 111/491] gpiolib: of: fix bounds check for 'gpio-reserved-ranges' Gpiolib interprets the elements of "gpio-reserved-ranges" as "start,size" because it clears "size" bits starting from the "start" bit in the according bitmap. So it has to use "greater" instead of "greater or equal" when performs bounds check to make sure that GPIOs are in the available range. Previous implementation skipped ranges that include the last GPIO in the range. I wrote the mail to the maintainers (https://lore.kernel.org/linux-gpio/20220412115554.159435-1-andrei.lalaev@emlid.com/T/#u) of the questioned DTSes (because I couldn't understand how the maintainers interpreted this property), but I haven't received a response. Since the questioned DTSes use "gpio-reserved-ranges = <0 4>" (i.e., the beginning of the range), this patch doesn't affect these DTSes at all. TBH this patch doesn't break any existing DTSes because none of them reserve gpios at the end of range. Fixes: 726cb3ba4969 ("gpiolib: Support 'gpio-reserved-ranges' property") Signed-off-by: Andrei Lalaev Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index ae1ce319cd78e..7e5e51d49d09e 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -910,7 +910,7 @@ static void of_gpiochip_init_valid_mask(struct gpio_chip *chip) i, &start); of_property_read_u32_index(np, "gpio-reserved-ranges", i + 1, &count); - if (start >= chip->ngpio || start + count >= chip->ngpio) + if (start >= chip->ngpio || start + count > chip->ngpio) continue; bitmap_clear(chip->valid_mask, start, count); From e5f6e5d554ac274f9c8ba60078103d0425b93c19 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 11 Apr 2022 09:23:40 +0300 Subject: [PATCH 112/491] gpio: mvebu: drop pwm base assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pwmchip_add() unconditionally assigns the base ID dynamically. Commit f9a8ee8c8bcd1 ("pwm: Always allocate PWM chip base ID dynamically") dropped all base assignment from drivers under drivers/pwm/. It missed this driver. Fix that. Fixes: f9a8ee8c8bcd1 ("pwm: Always allocate PWM chip base ID dynamically") Signed-off-by: Baruch Siach Reviewed-by: Uwe Kleine-König Acked-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mvebu.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 4c1f9e1091b7f..a2c8dd329b31b 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -871,13 +871,6 @@ static int mvebu_pwm_probe(struct platform_device *pdev, mvpwm->chip.dev = dev; mvpwm->chip.ops = &mvebu_pwm_ops; mvpwm->chip.npwm = mvchip->chip.ngpio; - /* - * There may already be some PWM allocated, so we can't force - * mvpwm->chip.base to a fixed point like mvchip->chip.base. - * So, we let pwmchip_add() do the numbering and take the next free - * region. - */ - mvpwm->chip.base = -1; spin_lock_init(&mvpwm->lock); From a196c78b5443fc61af2c0490213b9d125482cbd1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 1 May 2022 21:19:50 -0600 Subject: [PATCH 113/491] io_uring: assign non-fixed early for async work We defer file assignment to ensure that fixed files work with links between a direct accept/open and the links that follow it. But this has the side effect that normal file assignment is then not complete by the time that request submission has been done. For deferred execution, if the file is a regular file, assign it when we do the async prep anyway. Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e01f595f5b7d7..91de361ea9aba 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6947,7 +6947,12 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_req_prep_async(struct io_kiocb *req) { - if (!io_op_defs[req->opcode].needs_async_setup) + const struct io_op_def *def = &io_op_defs[req->opcode]; + + /* assign early for deferred execution for non-fixed file */ + if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE)) + req->file = io_file_get_normal(req, req->fd); + if (!def->needs_async_setup) return 0; if (WARN_ON_ONCE(req_has_async_data(req))) return -EFAULT; From 7b8943b821bafab492f43aafbd006b57c6b65845 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Mon, 25 Apr 2022 13:17:01 -0500 Subject: [PATCH 114/491] RDMA/irdma: Flush iWARP QP if modified to ERR from RTR state When connection establishment fails in iWARP mode, an app can drain the QPs and hang because flush isn't issued when the QP is modified from RTR state to error. Issue a flush in this case using function irdma_cm_disconn(). Update irdma_cm_disconn() to do flush when cm_id is NULL, which is the case when the QP is in RTR state and there is an error in the connection establishment. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20220425181703.1634-2-shiraz.saleem@intel.com Signed-off-by: Tatyana Nikolova Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/cm.c | 16 +++++----------- drivers/infiniband/hw/irdma/verbs.c | 4 ++-- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index a98d962e5efb6..90b4113e7071a 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -3462,12 +3462,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) } cm_id = iwqp->cm_id; - /* make sure we havent already closed this connection */ - if (!cm_id) { - spin_unlock_irqrestore(&iwqp->lock, flags); - return; - } - original_hw_tcp_state = iwqp->hw_tcp_state; original_ibqp_state = iwqp->ibqp_state; last_ae = iwqp->last_aeq; @@ -3489,11 +3483,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) disconn_status = -ECONNRESET; } - if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED || - original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT || - last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE || - last_ae == IRDMA_AE_BAD_CLOSE || - last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) { + if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED || + original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT || + last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE || + last_ae == IRDMA_AE_BAD_CLOSE || + last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) { issue_close = 1; iwqp->cm_id = NULL; qp->term_flags = 0; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 46f475394af5f..52f3e88f85695 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1618,13 +1618,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) { if (dont_wait) { - if (iwqp->cm_id && iwqp->hw_tcp_state) { + if (iwqp->hw_tcp_state) { spin_lock_irqsave(&iwqp->lock, flags); iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED; iwqp->last_aeq = IRDMA_AE_RESET_SENT; spin_unlock_irqrestore(&iwqp->lock, flags); - irdma_cm_disconn(iwqp); } + irdma_cm_disconn(iwqp); } else { int close_timer_started; From 2df6d895907b2f5dfbc558cbff7801bba82cb3cc Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 25 Apr 2022 13:17:02 -0500 Subject: [PATCH 115/491] RDMA/irdma: Reduce iWARP QP destroy time QP destroy is synchronous and waits for its refcnt to be decremented in irdma_cm_node_free_cb (for iWARP) which fires after the RCU grace period elapses. Applications running a large number of connections are exposed to high wait times on destroy QP for events like SIGABORT. The long pole for this wait time is the firing of the call_rcu callback during a CM node destroy which can be slow. It holds the QP reference count and blocks the destroy QP from completing. call_rcu only needs to make sure that list walkers have a reference to the cm_node object before freeing it and thus need to wait for grace period elapse. The rest of the connection teardown in irdma_cm_node_free_cb is moved out of the grace period wait in irdma_destroy_connection. Also, replace call_rcu with a simple kfree_rcu as it just needs to do a kfree on the cm_node Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Link: https://lore.kernel.org/r/20220425181703.1634-3-shiraz.saleem@intel.com Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/cm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 90b4113e7071a..638bf4a1ed946 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -2308,10 +2308,8 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, return NULL; } -static void irdma_cm_node_free_cb(struct rcu_head *rcu_head) +static void irdma_destroy_connection(struct irdma_cm_node *cm_node) { - struct irdma_cm_node *cm_node = - container_of(rcu_head, struct irdma_cm_node, rcu_head); struct irdma_cm_core *cm_core = cm_node->cm_core; struct irdma_qp *iwqp; struct irdma_cm_info nfo; @@ -2359,7 +2357,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head) } cm_core->cm_free_ah(cm_node); - kfree(cm_node); } /** @@ -2387,8 +2384,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node) spin_unlock_irqrestore(&cm_core->ht_lock, flags); - /* wait for all list walkers to exit their grace period */ - call_rcu(&cm_node->rcu_head, irdma_cm_node_free_cb); + irdma_destroy_connection(cm_node); + + kfree_rcu(cm_node, rcu_head); } /** From 1c9043ae0667a43bd87beeebbdd4bed674713629 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 25 Apr 2022 13:17:03 -0500 Subject: [PATCH 116/491] RDMA/irdma: Fix possible crash due to NULL netdev in notifier For some net events in irdma_net_event notifier, the netdev can be NULL which will cause a crash in rdma_vlan_dev_real_dev. Fix this by moving all processing to the NETEVENT_NEIGH_UPDATE case where the netdev is guaranteed to not be NULL. Fixes: 6702bc147448 ("RDMA/irdma: Fix netdev notifications for vlan's") Link: https://lore.kernel.org/r/20220425181703.1634-4-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/utils.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 346c2c5dabdf0..81760415d66c8 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -258,18 +258,16 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event, u32 local_ipaddr[4] = {}; bool ipv4 = true; - real_dev = rdma_vlan_dev_real_dev(netdev); - if (!real_dev) - real_dev = netdev; - - ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); - if (!ibdev) - return NOTIFY_DONE; - - iwdev = to_iwdev(ibdev); - switch (event) { case NETEVENT_NEIGH_UPDATE: + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); + if (!ibdev) + return NOTIFY_DONE; + + iwdev = to_iwdev(ibdev); p = (__be32 *)neigh->primary_key; if (neigh->tbl->family == AF_INET6) { ipv4 = false; @@ -290,13 +288,12 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event, irdma_manage_arp_cache(iwdev->rf, neigh->ha, local_ipaddr, ipv4, IRDMA_ARP_DELETE); + ib_device_put(ibdev); break; default: break; } - ib_device_put(ibdev); - return NOTIFY_DONE; } From 285d5731a0cb2dd3a12ddf34d67be4e4965e64da Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 26 Apr 2022 10:49:36 +0800 Subject: [PATCH 117/491] Revert "block: release rq qos structures for queue without disk" This reverts commit daaca3522a8e67c46e39ef09c1d542e866f85f3b. Commit daaca3522a8e ("block: release rq qos structures for queue without disk") is only needed for v5.15~v5.17, and isn't needed for v5.18, so revert it. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220426024936.3321341-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 937bb6b863317..bc05067721523 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -50,7 +50,6 @@ #include "blk-pm.h" #include "blk-cgroup.h" #include "blk-throttle.h" -#include "blk-rq-qos.h" struct dentry *blk_debugfs_root; @@ -315,9 +314,6 @@ void blk_cleanup_queue(struct request_queue *q) */ blk_freeze_queue(q); - /* cleanup rq qos structures for queue without disk */ - rq_qos_exit(q); - blk_queue_flag_set(QUEUE_FLAG_DEAD, q); blk_sync_queue(q); From b5d1274409d0eec6d826f65d6dafebf9d77a1b99 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Mar 2022 18:00:40 +0100 Subject: [PATCH 118/491] KVM: s390: Fix lockdep issue in vm memop Issuing a memop on a protected vm does not make sense, neither is the memory readable/writable, nor does it make sense to check storage keys. This is why the ioctl will return -EINVAL when it detects the vm to be protected. However, in order to ensure that the vm cannot become protected during the memop, the kvm->lock would need to be taken for the duration of the ioctl. This is also required because kvm_s390_pv_is_protected asserts that the lock must be held. Instead, don't try to prevent this. If user space enables secure execution concurrently with a memop it must accecpt the possibility of the memop failing. Still check if the vm is currently protected, but without locking and consider it a heuristic. Fixes: ef11c9463ae0 ("KVM: s390: Add vm IOCTL for key checked guest absolute memory access") Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20220322153204.2637400-1-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/kvm/kvm-s390.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index da3dabda1a126..76ad6408cb2cd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2384,7 +2384,16 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) return -EINVAL; if (mop->size > MEM_OP_MAX_SIZE) return -E2BIG; - if (kvm_s390_pv_is_protected(kvm)) + /* + * This is technically a heuristic only, if the kvm->lock is not + * taken, it is not guaranteed that the vm is/remains non-protected. + * This is ok from a kernel perspective, wrongdoing is detected + * on the access, -EFAULT is returned and the vm may crash the + * next time it accesses the memory in question. + * There is no sane usecase to do switching and a memop on two + * different CPUs at the same time. + */ + if (kvm_s390_pv_get_handle(kvm)) return -EINVAL; if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { if (access_key_invalid(mop->key)) From 6056a92ceb2a7705d61df7ec5370548e96aee258 Mon Sep 17 00:00:00 2001 From: Brian Bunker Date: Mon, 2 May 2022 08:09:17 -0700 Subject: [PATCH 119/491] scsi: scsi_dh_alua: Properly handle the ALUA transitioning state The handling of the ALUA transitioning state is currently broken. When a target goes into this state, it is expected that the target is allowed to stay in this state for the implicit transition timeout without a path failure. The handler has this logic, but it gets skipped currently. When the target transitions, there is in-flight I/O from the initiator. The first of these responses from the target will be a unit attention letting the initiator know that the ALUA state has changed. The remaining in-flight I/Os, before the initiator finds out that the portal state has changed, will return not ready, ALUA state is transitioning. The portal state will change to SCSI_ACCESS_STATE_TRANSITIONING. This will lead to all new I/O immediately failing the path unexpectedly. The path failure happens in less than a second instead of the expected successes until the transition timer is exceeded. Allow I/Os to continue while the path is in the ALUA transitioning state. The handler already takes care of a target that stays in the transitioning state for too long by changing the state to ALUA state standby once the transition timeout is exceeded at which point the path will fail. Link: https://lore.kernel.org/r/CAHZQxy+4sTPz9+pY3=7VJH+CLUJsDct81KtnR2be8ycN5mhqTg@mail.gmail.com Reviewed-by: Hannes Reinecke Acked-by: Krishna Kant Acked-by: Seamus Connor Signed-off-by: Brian Bunker Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 37d06f993b761..1d9be771f3ee0 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -1172,9 +1172,8 @@ static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req) case SCSI_ACCESS_STATE_OPTIMAL: case SCSI_ACCESS_STATE_ACTIVE: case SCSI_ACCESS_STATE_LBA: - return BLK_STS_OK; case SCSI_ACCESS_STATE_TRANSITIONING: - return BLK_STS_AGAIN; + return BLK_STS_OK; default: req->rq_flags |= RQF_QUIET; return BLK_STS_IOERR; From 26f9ce53817a8fd84b69a73473a7de852a24c897 Mon Sep 17 00:00:00 2001 From: Gleb Chesnokov Date: Fri, 15 Apr 2022 12:42:29 +0000 Subject: [PATCH 120/491] scsi: qla2xxx: Fix missed DMA unmap for aborted commands Aborting commands that have already been sent to the firmware can cause BUG in qlt_free_cmd(): BUG_ON(cmd->sg_mapped) For instance: - Command passes rdx_to_xfer state, maps sgl, sends to the firmware - Reset occurs, qla2xxx performs ISP error recovery, aborts the command - Target stack calls qlt_abort_cmd() and then qlt_free_cmd() - BUG_ON(cmd->sg_mapped) in qlt_free_cmd() occurs because sgl was not unmapped Thus, unmap sgl in qlt_abort_cmd() for commands with the aborted flag set. Link: https://lore.kernel.org/r/AS8PR10MB4952D545F84B6B1DFD39EC1E9DEE9@AS8PR10MB4952.EURPRD10.PROD.OUTLOOK.COM Reviewed-by: Himanshu Madhani Signed-off-by: Gleb Chesnokov Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 85dbf81f3204a..6dfcfd8e73371 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3826,6 +3826,9 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&cmd->cmd_lock, flags); if (cmd->aborted) { + if (cmd->sg_mapped) + qlt_unmap_sg(vha, cmd); + spin_unlock_irqrestore(&cmd->cmd_lock, flags); /* * It's normal to see 2 calls in this path: From b800528b97d0adc3a5ba42d78a8b0d3f07a31f44 Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Mon, 2 May 2022 12:57:49 +0530 Subject: [PATCH 121/491] net: emaclite: Don't advertise 1000BASE-T and do auto negotiation In xemaclite_open() function we are setting the max speed of emaclite to 100Mb using phy_set_max_speed() function so, there is no need to write the advertising registers to stop giga-bit speed and the phy_start() function starts the auto-negotiation so, there is no need to handle it separately using advertising registers. Remove the phy_read and phy_write of advertising registers in xemaclite_open() function. Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Reviewed-by: Andrew Lunn Signed-off-by: Paolo Abeni --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 57a24f62e353d..f7394a5160cfa 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -926,8 +926,6 @@ static int xemaclite_open(struct net_device *dev) xemaclite_disable_interrupts(lp); if (lp->phy_node) { - u32 bmcr; - lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node, xemaclite_adjust_link, 0, PHY_INTERFACE_MODE_MII); @@ -938,19 +936,6 @@ static int xemaclite_open(struct net_device *dev) /* EmacLite doesn't support giga-bit speeds */ phy_set_max_speed(lp->phy_dev, SPEED_100); - - /* Don't advertise 1000BASE-T Full/Half duplex speeds */ - phy_write(lp->phy_dev, MII_CTRL1000, 0); - - /* Advertise only 10 and 100mbps full/half duplex speeds */ - phy_write(lp->phy_dev, MII_ADVERTISE, ADVERTISE_ALL | - ADVERTISE_CSMA); - - /* Restart auto negotiation */ - bmcr = phy_read(lp->phy_dev, MII_BMCR); - bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); - phy_write(lp->phy_dev, MII_BMCR, bmcr); - phy_start(lp->phy_dev); } From 7a6bc33ab54923d325d9a1747ec9652c4361ebd1 Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Mon, 2 May 2022 12:57:50 +0530 Subject: [PATCH 122/491] net: emaclite: Add error handling for of_address_to_resource() check the return value of of_address_to_resource() and also add missing of_node_put() for np and npp nodes. Fixes: e0a3bc65448c ("net: emaclite: Support multiple phys connected to one MDIO bus") Addresses-Coverity: Event check_return value. Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Signed-off-by: Paolo Abeni --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index f7394a5160cfa..d770b3ac3f74f 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -823,10 +823,10 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg, static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) { struct mii_bus *bus; - int rc; struct resource res; struct device_node *np = of_get_parent(lp->phy_node); struct device_node *npp; + int rc, ret; /* Don't register the MDIO bus if the phy_node or its parent node * can't be found. @@ -836,8 +836,14 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) return -ENODEV; } npp = of_get_parent(np); - - of_address_to_resource(npp, 0, &res); + ret = of_address_to_resource(npp, 0, &res); + of_node_put(npp); + if (ret) { + dev_err(dev, "%s resource error!\n", + dev->of_node->full_name); + of_node_put(np); + return ret; + } if (lp->ndev->mem_start != res.start) { struct phy_device *phydev; phydev = of_phy_find_device(lp->phy_node); @@ -846,6 +852,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) "MDIO of the phy is not registered yet\n"); else put_device(&phydev->mdio.dev); + of_node_put(np); return 0; } @@ -858,6 +865,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) bus = mdiobus_alloc(); if (!bus) { dev_err(dev, "Failed to allocate mdiobus\n"); + of_node_put(np); return -ENOMEM; } @@ -870,6 +878,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) bus->parent = dev; rc = of_mdiobus_register(bus, np); + of_node_put(np); if (rc) { dev_err(dev, "Failed to register mdio bus.\n"); goto err_register; From 3122257c02afd9f199a8fc84ae981e1fc4958532 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 2 May 2022 11:45:07 +0300 Subject: [PATCH 123/491] selftests: mirror_gre_bridge_1q: Avoid changing PVID while interface is operational In emulated environments, the bridge ports enslaved to br1 get a carrier before changing br1's PVID. This means that by the time the PVID is changed, br1 is already operational and configured with an IPv6 link-local address. When the test is run with netdevs registered by mlxsw, changing the PVID is vetoed, as changing the VID associated with an existing L3 interface is forbidden. This restriction is similar to the 8021q driver's restriction of changing the VID of an existing interface. Fix this by taking br1 down and bringing it back up when it is fully configured. With this fix, the test reliably passes on top of both the SW and HW data paths (emulated or not). Fixes: 239e754af854 ("selftests: forwarding: Test mirror-to-gretap w/ UL 802.1q") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Link: https://lore.kernel.org/r/20220502084507.364774-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh index a3402cd8d5b68..9ff22f28032dd 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh @@ -61,9 +61,12 @@ setup_prepare() vrf_prepare mirror_gre_topo_create + # Avoid changing br1's PVID while it is operational as a L3 interface. + ip link set dev br1 down ip link set dev $swp3 master br1 bridge vlan add dev br1 vid 555 pvid untagged self + ip link set dev br1 up ip address add dev br1 192.0.2.129/28 ip address add dev br1 2001:db8:2::1/64 From 97926d5a847ca1758ad8702ce591e3b05a701e0d Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 2 May 2022 11:46:37 +0200 Subject: [PATCH 124/491] selftests/net: so_txtime: fix parsing of start time stamp on 32 bit systems This patch fixes the parsing of the cmd line supplied start time on 32 bit systems. A "long" on 32 bit systems is only 32 bit wide and cannot hold a timestamp in nano second resolution. Fixes: 040806343bb4 ("selftests/net: so_txtime multi-host support") Cc: Carlos Llamas Cc: Willem de Bruijn Signed-off-by: Marc Kleine-Budde Acked-by: Willem de Bruijn Reviewed-by: Carlos Llamas Link: https://lore.kernel.org/r/20220502094638.1921702-2-mkl@pengutronix.de Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/so_txtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 59067f64b7753..103f6bf28e355 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -475,7 +475,7 @@ static void parse_opts(int argc, char **argv) cfg_rx = true; break; case 't': - cfg_start_time_ns = strtol(optarg, NULL, 0); + cfg_start_time_ns = strtoll(optarg, NULL, 0); break; case 'm': cfg_mark = strtol(optarg, NULL, 0); From f5c2174a3775491e890ce285df52f5715fbef875 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 2 May 2022 11:46:38 +0200 Subject: [PATCH 125/491] selftests/net: so_txtime: usage(): fix documentation of default clock The program uses CLOCK_TAI as default clock since it was added to the Linux repo. In commit: | 040806343bb4 ("selftests/net: so_txtime multi-host support") a help text stating the wrong default clock was added. This patch fixes the help text. Fixes: 040806343bb4 ("selftests/net: so_txtime multi-host support") Cc: Carlos Llamas Cc: Willem de Bruijn Signed-off-by: Marc Kleine-Budde Acked-by: Willem de Bruijn Reviewed-by: Carlos Llamas Link: https://lore.kernel.org/r/20220502094638.1921702-3-mkl@pengutronix.de Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/so_txtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 103f6bf28e355..2672ac0b6d1f3 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -421,7 +421,7 @@ static void usage(const char *progname) "Options:\n" " -4 only IPv4\n" " -6 only IPv6\n" - " -c monotonic (default) or tai\n" + " -c monotonic or tai (default)\n" " -D destination IP address (server)\n" " -S source IP address (client)\n" " -r run rx mode\n" From 706c9c55e5a32800605eb6a864ef6e1ca0c6c179 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 23 Apr 2022 03:47:41 +0000 Subject: [PATCH 126/491] KVM: x86/mmu: Don't treat fully writable SPTEs as volatile (modulo A/D) Don't treat SPTEs that are truly writable, i.e. writable in hardware, as being volatile (unless they're volatile for other reasons, e.g. A/D bits). KVM _sets_ the WRITABLE bit out of mmu_lock, but never _clears_ the bit out of mmu_lock, so if the WRITABLE bit is set, it cannot magically get cleared just because the SPTE is MMU-writable. Rename the wrapper of MMU-writable to be more literal, the previous name of spte_can_locklessly_be_made_writable() is wrong and misleading. Fixes: c7ba5b48cc8d ("KVM: MMU: fast path of handling guest page fault") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220423034752.1161007-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 17 +++++++++-------- arch/x86/kvm/mmu/spte.h | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 64a2a7e2be904..48dcb6a782f43 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -484,13 +484,15 @@ static bool spte_has_volatile_bits(u64 spte) * also, it can help us to get a stable is_writable_pte() * to ensure tlb flush is not missed. */ - if (spte_can_locklessly_be_made_writable(spte) || - is_access_track_spte(spte)) + if (!is_writable_pte(spte) && is_mmu_writable_spte(spte)) + return true; + + if (is_access_track_spte(spte)) return true; if (spte_ad_enabled(spte)) { - if ((spte & shadow_accessed_mask) == 0 || - (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0)) + if (!(spte & shadow_accessed_mask) || + (is_writable_pte(spte) && !(spte & shadow_dirty_mask))) return true; } @@ -557,7 +559,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) * we always atomically update it, see the comments in * spte_has_volatile_bits(). */ - if (spte_can_locklessly_be_made_writable(old_spte) && + if (is_mmu_writable_spte(old_spte) && !is_writable_pte(new_spte)) flush = true; @@ -1187,7 +1189,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect) u64 spte = *sptep; if (!is_writable_pte(spte) && - !(pt_protect && spte_can_locklessly_be_made_writable(spte))) + !(pt_protect && is_mmu_writable_spte(spte))) return false; rmap_printk("spte %p %llx\n", sptep, *sptep); @@ -3196,8 +3198,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) * be removed in the fast path only if the SPTE was * write-protected for dirty-logging or access tracking. */ - if (fault->write && - spte_can_locklessly_be_made_writable(spte)) { + if (fault->write && is_mmu_writable_spte(spte)) { new_spte |= PT_WRITABLE_MASK; /* diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index e4abeb5df1b13..c571784cb567e 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -390,7 +390,7 @@ static inline void check_spte_writable_invariants(u64 spte) "kvm: Writable SPTE is not MMU-writable: %llx", spte); } -static inline bool spte_can_locklessly_be_made_writable(u64 spte) +static inline bool is_mmu_writable_spte(u64 spte) { return spte & shadow_mmu_writable_mask; } From 54eb3ef56f36827aad90915df33387d4c2b5df5a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 23 Apr 2022 03:47:42 +0000 Subject: [PATCH 127/491] KVM: x86/mmu: Move shadow-present check out of spte_has_volatile_bits() Move the is_shadow_present_pte() check out of spte_has_volatile_bits() and into its callers. Well, caller, since only one of its two callers doesn't already do the shadow-present check. Opportunistically move the helper to spte.c/h so that it can be used by the TDP MMU, which is also the primary motivation for the shadow-present change. Unlike the legacy MMU, the TDP MMU uses a single path for clear leaf and non-leaf SPTEs, and to avoid unnecessary atomic updates, the TDP MMU will need to check is_last_spte() prior to calling spte_has_volatile_bits(), and calling is_last_spte() without first calling is_shadow_present_spte() is at best odd, and at worst a violation of KVM's loosely defines SPTE rules. Note, mmu_spte_clear_track_bits() could likely skip the write entirely for SPTEs that are not shadow-present. Leave that cleanup for a future patch to avoid introducing a functional change, and because the shadow-present check can likely be moved further up the stack, e.g. drop_large_spte() appears to be the only path that doesn't already explicitly check for a shadow-present SPTE. No functional change intended. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220423034752.1161007-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 29 ++--------------------------- arch/x86/kvm/mmu/spte.c | 28 ++++++++++++++++++++++++++++ arch/x86/kvm/mmu/spte.h | 2 ++ 3 files changed, 32 insertions(+), 27 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 48dcb6a782f43..311e4e1d7870e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -473,32 +473,6 @@ static u64 __get_spte_lockless(u64 *sptep) } #endif -static bool spte_has_volatile_bits(u64 spte) -{ - if (!is_shadow_present_pte(spte)) - return false; - - /* - * Always atomically update spte if it can be updated - * out of mmu-lock, it can ensure dirty bit is not lost, - * also, it can help us to get a stable is_writable_pte() - * to ensure tlb flush is not missed. - */ - if (!is_writable_pte(spte) && is_mmu_writable_spte(spte)) - return true; - - if (is_access_track_spte(spte)) - return true; - - if (spte_ad_enabled(spte)) { - if (!(spte & shadow_accessed_mask) || - (is_writable_pte(spte) && !(spte & shadow_dirty_mask))) - return true; - } - - return false; -} - /* Rules for using mmu_spte_set: * Set the sptep from nonpresent to present. * Note: the sptep being assigned *must* be either not present @@ -593,7 +567,8 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) u64 old_spte = *sptep; int level = sptep_to_sp(sptep)->role.level; - if (!spte_has_volatile_bits(old_spte)) + if (!is_shadow_present_pte(old_spte) || + !spte_has_volatile_bits(old_spte)) __update_clear_spte_fast(sptep, 0ull); else old_spte = __update_clear_spte_slow(sptep, 0ull); diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 4739b53c9734d..e5c0b6db6f2ca 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -90,6 +90,34 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) E820_TYPE_RAM); } +/* + * Returns true if the SPTE has bits that may be set without holding mmu_lock. + * The caller is responsible for checking if the SPTE is shadow-present, and + * for determining whether or not the caller cares about non-leaf SPTEs. + */ +bool spte_has_volatile_bits(u64 spte) +{ + /* + * Always atomically update spte if it can be updated + * out of mmu-lock, it can ensure dirty bit is not lost, + * also, it can help us to get a stable is_writable_pte() + * to ensure tlb flush is not missed. + */ + if (!is_writable_pte(spte) && is_mmu_writable_spte(spte)) + return true; + + if (is_access_track_spte(spte)) + return true; + + if (spte_ad_enabled(spte)) { + if (!(spte & shadow_accessed_mask) || + (is_writable_pte(spte) && !(spte & shadow_dirty_mask))) + return true; + } + + return false; +} + bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, const struct kvm_memory_slot *slot, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index c571784cb567e..80ab0f5cff01b 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -404,6 +404,8 @@ static inline u64 get_mmio_spte_generation(u64 spte) return gen; } +bool spte_has_volatile_bits(u64 spte); + bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, const struct kvm_memory_slot *slot, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, From ba3a6120a4e7efc13d19fe43eb6c5caf1da05b72 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 23 Apr 2022 03:47:43 +0000 Subject: [PATCH 128/491] KVM: x86/mmu: Use atomic XCHG to write TDP MMU SPTEs with volatile bits Use an atomic XCHG to write TDP MMU SPTEs that have volatile bits, even if mmu_lock is held for write, as volatile SPTEs can be written by other tasks/vCPUs outside of mmu_lock. If a vCPU uses the to-be-modified SPTE to write a page, the CPU can cache the translation as WRITABLE in the TLB despite it being seen by KVM as !WRITABLE, and/or KVM can clobber the Accessed/Dirty bits and not properly tag the backing page. Exempt non-leaf SPTEs from atomic updates as KVM itself doesn't modify non-leaf SPTEs without holding mmu_lock, they do not have Dirty bits, and KVM doesn't consume the Accessed bit of non-leaf SPTEs. Dropping the Dirty and/or Writable bits is most problematic for dirty logging, as doing so can result in a missed TLB flush and eventually a missed dirty page. In the unlikely event that the only dirty page(s) is a clobbered SPTE, clear_dirty_gfn_range() will see the SPTE as not dirty (based on the Dirty or Writable bit depending on the method) and so not update the SPTE and ultimately not flush. If the SPTE is cached in the TLB as writable before it is clobbered, the guest can continue writing the associated page without ever taking a write-protect fault. For most (all?) file back memory, dropping the Dirty bit is a non-issue. The primary MMU write-protects its PTEs on writeback, i.e. KVM's dirty bit is effectively ignored because the primary MMU will mark that page dirty when the write-protection is lifted, e.g. when KVM faults the page back in for write. The Accessed bit is a complete non-issue. Aside from being unused for non-leaf SPTEs, KVM doesn't do a TLB flush when aging SPTEs, i.e. the Accessed bit may be dropped anyways. Lastly, the Writable bit is also problematic as an extension of the Dirty bit, as KVM (correctly) treats the Dirty bit as volatile iff the SPTE is !DIRTY && WRITABLE. If KVM fixes an MMU-writable, but !WRITABLE, SPTE out of mmu_lock, then it can allow the CPU to set the Dirty bit despite the SPTE being !WRITABLE when it is checked by KVM. But that all depends on the Dirty bit being problematic in the first place. Fixes: 2f2fad0897cb ("kvm: x86/mmu: Add functions to handle changed TDP SPTEs") Cc: stable@vger.kernel.org Cc: Ben Gardon Cc: David Matlack Cc: Venkatesh Srinivas Signed-off-by: Sean Christopherson Message-Id: <20220423034752.1161007-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_iter.h | 34 ++++++++++++++- arch/x86/kvm/mmu/tdp_mmu.c | 82 ++++++++++++++++++++++++------------- 2 files changed, 85 insertions(+), 31 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index b1eaf6ec0e0b1..f0af385c56e03 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -6,6 +6,7 @@ #include #include "mmu.h" +#include "spte.h" /* * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs) @@ -17,9 +18,38 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep) { return READ_ONCE(*rcu_dereference(sptep)); } -static inline void kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 val) + +static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte) +{ + return xchg(rcu_dereference(sptep), new_spte); +} + +static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte) +{ + WRITE_ONCE(*rcu_dereference(sptep), new_spte); +} + +static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte, + u64 new_spte, int level) { - WRITE_ONCE(*rcu_dereference(sptep), val); + /* + * Atomically write the SPTE if it is a shadow-present, leaf SPTE with + * volatile bits, i.e. has bits that can be set outside of mmu_lock. + * The Writable bit can be set by KVM's fast page fault handler, and + * Accessed and Dirty bits can be set by the CPU. + * + * Note, non-leaf SPTEs do have Accessed bits and those bits are + * technically volatile, but KVM doesn't consume the Accessed bit of + * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This + * logic needs to be reassessed if KVM were to use non-leaf Accessed + * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs. + */ + if (is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) && + spte_has_volatile_bits(old_spte)) + return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte); + + __kvm_tdp_mmu_write_spte(sptep, new_spte); + return old_spte; } /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index edc68538819bc..922b06bf4b948 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -426,9 +426,9 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) tdp_mmu_unlink_sp(kvm, sp, shared); for (i = 0; i < PT64_ENT_PER_PAGE; i++) { - u64 *sptep = rcu_dereference(pt) + i; + tdp_ptep_t sptep = pt + i; gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level); - u64 old_child_spte; + u64 old_spte; if (shared) { /* @@ -440,8 +440,8 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) * value to the removed SPTE value. */ for (;;) { - old_child_spte = xchg(sptep, REMOVED_SPTE); - if (!is_removed_spte(old_child_spte)) + old_spte = kvm_tdp_mmu_write_spte_atomic(sptep, REMOVED_SPTE); + if (!is_removed_spte(old_spte)) break; cpu_relax(); } @@ -455,23 +455,43 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) * are guarded by the memslots generation, not by being * unreachable. */ - old_child_spte = READ_ONCE(*sptep); - if (!is_shadow_present_pte(old_child_spte)) + old_spte = kvm_tdp_mmu_read_spte(sptep); + if (!is_shadow_present_pte(old_spte)) continue; /* - * Marking the SPTE as a removed SPTE is not - * strictly necessary here as the MMU lock will - * stop other threads from concurrently modifying - * this SPTE. Using the removed SPTE value keeps - * the two branches consistent and simplifies - * the function. + * Use the common helper instead of a raw WRITE_ONCE as + * the SPTE needs to be updated atomically if it can be + * modified by a different vCPU outside of mmu_lock. + * Even though the parent SPTE is !PRESENT, the TLB + * hasn't yet been flushed, and both Intel and AMD + * document that A/D assists can use upper-level PxE + * entries that are cached in the TLB, i.e. the CPU can + * still access the page and mark it dirty. + * + * No retry is needed in the atomic update path as the + * sole concern is dropping a Dirty bit, i.e. no other + * task can zap/remove the SPTE as mmu_lock is held for + * write. Marking the SPTE as a removed SPTE is not + * strictly necessary for the same reason, but using + * the remove SPTE value keeps the shared/exclusive + * paths consistent and allows the handle_changed_spte() + * call below to hardcode the new value to REMOVED_SPTE. + * + * Note, even though dropping a Dirty bit is the only + * scenario where a non-atomic update could result in a + * functional bug, simply checking the Dirty bit isn't + * sufficient as a fast page fault could read the upper + * level SPTE before it is zapped, and then make this + * target SPTE writable, resume the guest, and set the + * Dirty bit between reading the SPTE above and writing + * it here. */ - WRITE_ONCE(*sptep, REMOVED_SPTE); + old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, + REMOVED_SPTE, level); } handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn, - old_child_spte, REMOVED_SPTE, level, - shared); + old_spte, REMOVED_SPTE, level, shared); } call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); @@ -667,14 +687,13 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, KVM_PAGES_PER_HPAGE(iter->level)); /* - * No other thread can overwrite the removed SPTE as they - * must either wait on the MMU lock or use - * tdp_mmu_set_spte_atomic which will not overwrite the - * special removed SPTE value. No bookkeeping is needed - * here since the SPTE is going from non-present - * to non-present. + * No other thread can overwrite the removed SPTE as they must either + * wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not + * overwrite the special removed SPTE value. No bookkeeping is needed + * here since the SPTE is going from non-present to non-present. Use + * the raw write helper to avoid an unnecessary check on volatile bits. */ - kvm_tdp_mmu_write_spte(iter->sptep, 0); + __kvm_tdp_mmu_write_spte(iter->sptep, 0); return 0; } @@ -699,10 +718,13 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, * unless performing certain dirty logging operations. * Leaving record_dirty_log unset in that case prevents page * writes from being double counted. + * + * Returns the old SPTE value, which _may_ be different than @old_spte if the + * SPTE had voldatile bits. */ -static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, - u64 old_spte, u64 new_spte, gfn_t gfn, int level, - bool record_acc_track, bool record_dirty_log) +static u64 __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, + u64 old_spte, u64 new_spte, gfn_t gfn, int level, + bool record_acc_track, bool record_dirty_log) { lockdep_assert_held_write(&kvm->mmu_lock); @@ -715,7 +737,7 @@ static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, */ WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte)); - kvm_tdp_mmu_write_spte(sptep, new_spte); + old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level); __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false); @@ -724,6 +746,7 @@ static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, if (record_dirty_log) handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte, new_spte, level); + return old_spte; } static inline void _tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, @@ -732,9 +755,10 @@ static inline void _tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, { WARN_ON_ONCE(iter->yielded); - __tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep, iter->old_spte, - new_spte, iter->gfn, iter->level, - record_acc_track, record_dirty_log); + iter->old_spte = __tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep, + iter->old_spte, new_spte, + iter->gfn, iter->level, + record_acc_track, record_dirty_log); } static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, From 3a58f13a881ed351198ffab4cf9953cf19d2ab3a Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 2 May 2022 10:40:18 +0900 Subject: [PATCH 129/491] net: rds: acquire refcount on TCP sockets syzbot is reporting use-after-free read in tcp_retransmit_timer() [1], for TCP socket used by RDS is accessing sock_net() without acquiring a refcount on net namespace. Since TCP's retransmission can happen after a process which created net namespace terminated, we need to explicitly acquire a refcount. Link: https://syzkaller.appspot.com/bug?extid=694120e1002c117747ed [1] Reported-by: syzbot Fixes: 26abe14379f8e2fa ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") Fixes: 8a68173691f03661 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket") Signed-off-by: Tetsuo Handa Tested-by: syzbot Link: https://lore.kernel.org/r/a5fb1fc4-2284-3359-f6a0-e4e390239d7b@I-love.SAKURA.ne.jp Signed-off-by: Paolo Abeni --- net/rds/tcp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 5327d130c4b56..2f638f8b7b1e7 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -495,6 +495,14 @@ void rds_tcp_tune(struct socket *sock) tcp_sock_set_nodelay(sock->sk); lock_sock(sk); + /* TCP timer functions might access net namespace even after + * a process which created this net namespace terminated. + */ + if (!sk->sk_net_refcnt) { + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); + } if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; sk->sk_userlocks |= SOCK_SNDBUF_LOCK; From 5eb849322d7f7ae9d5c587c7bc3b4f7c6872cd2f Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 2 May 2022 22:01:36 -0700 Subject: [PATCH 130/491] KVM: x86/svm: Account for family 17h event renumberings in amd_pmc_perf_hw_id Zen renumbered some of the performance counters that correspond to the well known events in perf_hw_id. This code in KVM was never updated for that, so guest that attempt to use counters on Zen that correspond to the pre-Zen perf_hw_id values will silently receive the wrong values. This has been observed in the wild with rr[0] when running in Zen 3 guests. rr uses the retired conditional branch counter 00d1 which is incorrectly recognized by KVM as PERF_COUNT_HW_STALLED_CYCLES_BACKEND. [0] https://rr-project.org/ Signed-off-by: Kyle Huey Message-Id: <20220503050136.86298-1-khuey@kylehuey.com> Cc: stable@vger.kernel.org [Check guest family, not host. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/pmu.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 24eb935b6f85c..311cbaa0c3ddf 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -45,6 +45,22 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, }; +/* duplicated from amd_f17h_perfmon_event_map. */ +static struct kvm_event_hw_type_mapping amd_f17h_event_mapping[] = { + [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, + [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, + [2] = { 0x60, 0xff, PERF_COUNT_HW_CACHE_REFERENCES }, + [3] = { 0x64, 0x09, PERF_COUNT_HW_CACHE_MISSES }, + [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, + [6] = { 0x87, 0x02, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, + [7] = { 0x87, 0x01, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, +}; + +/* amd_pmc_perf_hw_id depends on these being the same size */ +static_assert(ARRAY_SIZE(amd_event_mapping) == + ARRAY_SIZE(amd_f17h_event_mapping)); + static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type) { struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); @@ -140,6 +156,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc) { + struct kvm_event_hw_type_mapping *event_mapping; u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT; u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; int i; @@ -148,15 +165,20 @@ static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc) if (WARN_ON(pmc_is_fixed(pmc))) return PERF_COUNT_HW_MAX; + if (guest_cpuid_family(pmc->vcpu) >= 0x17) + event_mapping = amd_f17h_event_mapping; + else + event_mapping = amd_event_mapping; + for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++) - if (amd_event_mapping[i].eventsel == event_select - && amd_event_mapping[i].unit_mask == unit_mask) + if (event_mapping[i].eventsel == event_select + && event_mapping[i].unit_mask == unit_mask) break; if (i == ARRAY_SIZE(amd_event_mapping)) return PERF_COUNT_HW_MAX; - return amd_event_mapping[i].event_type; + return event_mapping[i].event_type; } /* check if a PMC is enabled by comparing it against global_ctrl bits. Because From 5a1bde46f98b893cda6122b00e94c0c40a6ead3c Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Wed, 27 Apr 2022 17:01:49 +0530 Subject: [PATCH 131/491] kvm: x86/cpuid: Only provide CPUID leaf 0xA if host has architectural PMU On some x86 processors, CPUID leaf 0xA provides information on Architectural Performance Monitoring features. It advertises a PMU version which Qemu uses to determine the availability of additional MSRs to manage the PMCs. Upon receiving a KVM_GET_SUPPORTED_CPUID ioctl request for the same, the kernel constructs return values based on the x86_pmu_capability irrespective of the vendor. This leaf and the additional MSRs are not supported on AMD and Hygon processors. If AMD PerfMonV2 is detected, the PMU version is set to 2 and guest startup breaks because of an attempt to access a non-existent MSR. Return zeros to avoid this. Fixes: a6c06ed1a60a ("KVM: Expose the architectural performance monitoring CPUID leaf") Reported-by: Vasant Hegde Signed-off-by: Sandipan Das Message-Id: <3fef83d9c2b2f7516e8ff50d60851f29a4bcb716.1651058600.git.sandipan.das@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b24ca7f4ed7c8..732724ea5b100 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -887,6 +887,11 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) union cpuid10_eax eax; union cpuid10_edx edx; + if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) { + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + } + perf_get_x86_pmu_capability(&cap); /* From a06afe8383080c630a7a528b8382fc6bb4925b61 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 29 Apr 2022 17:15:26 +0200 Subject: [PATCH 132/491] KVM: s390: vsie/gmap: reduce gmap_rmap overhead there are cases that trigger a 2nd shadow event for the same vmaddr/raddr combination. (prefix changes, reboots, some known races) This will increase memory usages and it will result in long latencies when cleaning up, e.g. on shutdown. To avoid cases with a list that has hundreds of identical raddrs we check existing entries at insert time. As this measurably reduces the list length this will be faster than traversing the list at shutdown time. In the long run several places will be optimized to create less entries and a shrinker might be necessary. Fixes: 4be130a08420 ("s390/mm: add shadow gmap support") Signed-off-by: Christian Borntraeger Acked-by: David Hildenbrand Link: https://lore.kernel.org/r/20220429151526.1560-1-borntraeger@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/mm/gmap.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index af03cacf34ec7..1ac73917a8d39 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1183,6 +1183,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table); static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, struct gmap_rmap *rmap) { + struct gmap_rmap *temp; void __rcu **slot; BUG_ON(!gmap_is_shadow(sg)); @@ -1190,6 +1191,12 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, if (slot) { rmap->next = radix_tree_deref_slot_protected(slot, &sg->guest_table_lock); + for (temp = rmap->next; temp; temp = temp->next) { + if (temp->raddr == rmap->raddr) { + kfree(rmap); + return; + } + } radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap); } else { rmap->next = NULL; From 3481551f035725fdc46885425eac3ef9b58ae7b7 Mon Sep 17 00:00:00 2001 From: Camel Guo Date: Tue, 3 May 2022 13:43:33 +0200 Subject: [PATCH 133/491] hwmon: (tmp401) Add OF device ID table This driver doesn't have of_match_table. This makes the kernel module tmp401.ko lack alias patterns (e.g: of:N*T*Cti,tmp411) to match DT node of the supported devices hence this kernel module will not be automatically loaded. After adding of_match_table to this driver, the folllowing alias will be added into tmp401.ko. $ modinfo drivers/hwmon/tmp401.ko filename: drivers/hwmon/tmp401.ko ...... author: Hans de Goede alias: of:N*T*Cti,tmp435C* alias: of:N*T*Cti,tmp435 alias: of:N*T*Cti,tmp432C* alias: of:N*T*Cti,tmp432 alias: of:N*T*Cti,tmp431C* alias: of:N*T*Cti,tmp431 alias: of:N*T*Cti,tmp411C* alias: of:N*T*Cti,tmp411 alias: of:N*T*Cti,tmp401C* alias: of:N*T*Cti,tmp401 ...... Fixes: af503716ac14 ("i2c: core: report OF style module alias for devices registered via OF") Signed-off-by: Camel Guo Link: https://lore.kernel.org/r/20220503114333.456476-1-camel.guo@axis.com Signed-off-by: Guenter Roeck --- drivers/hwmon/tmp401.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c index b86d9df7105d1..52c9e7d3f2ae7 100644 --- a/drivers/hwmon/tmp401.c +++ b/drivers/hwmon/tmp401.c @@ -708,10 +708,21 @@ static int tmp401_probe(struct i2c_client *client) return 0; } +static const struct of_device_id __maybe_unused tmp4xx_of_match[] = { + { .compatible = "ti,tmp401", }, + { .compatible = "ti,tmp411", }, + { .compatible = "ti,tmp431", }, + { .compatible = "ti,tmp432", }, + { .compatible = "ti,tmp435", }, + { }, +}; +MODULE_DEVICE_TABLE(of, tmp4xx_of_match); + static struct i2c_driver tmp401_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "tmp401", + .of_match_table = of_match_ptr(tmp4xx_of_match), }, .probe_new = tmp401_probe, .id_table = tmp401_id, From aafa025c76dcc7d1a8c8f0bdefcbe4eb480b2f6a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 2 May 2022 15:50:14 +0200 Subject: [PATCH 134/491] fbdev: Make fb_release() return -ENODEV if fbdev was unregistered A reference to the framebuffer device struct fb_info is stored in the file private data, but this reference could no longer be valid and must not be accessed directly. Instead, the file_fb_info() accessor function must be used since it does sanity checking to make sure that the fb_info is valid. This can happen for example if the registered framebuffer device is for a driver that just uses a framebuffer provided by the system firmware. In that case, the fbdev core would unregister the framebuffer device when a real video driver is probed and ask to remove conflicting framebuffers. The bug has been present for a long time but commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") unmasked it since the fbdev core started unregistering the framebuffers' devices associated. Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") Reported-by: Maxime Ripard Reported-by: Junxiao Chang Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220502135014.377945-1-javierm@redhat.com --- drivers/video/fbdev/core/fbmem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index a6bb0e4382167..97eb0dee411cf 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1434,7 +1434,10 @@ fb_release(struct inode *inode, struct file *file) __acquires(&info->lock) __releases(&info->lock) { - struct fb_info * const info = file->private_data; + struct fb_info * const info = file_fb_info(file); + + if (!info) + return -ENODEV; lock_fb_info(info); if (info->fbops->fb_release) From 20ce30fb4750f2ffc130cdcb26232b1dd87cd0a5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 26 Apr 2022 18:32:26 -0700 Subject: [PATCH 135/491] interconnect: Restore sync state by ignoring ipa-virt in provider count Ignore compatible strings for the IPA virt drivers that were removed in commits 2fb251c26560 ("interconnect: qcom: sdx55: Drop IP0 interconnects") and 2f3724930eb4 ("interconnect: qcom: sc7180: Drop IP0 interconnects") so that the sync state logic can kick in again. Otherwise all the interconnects in the system will stay pegged at max speeds because 'providers_count' is always going to be one larger than the number of drivers that will ever probe on sc7180 or sdx55. This fixes suspend on sc7180 and sdx55 devices when you don't have a devicetree patch to remove the ipa-virt compatible node. Cc: Bjorn Andersson Cc: Doug Anderson Cc: Alex Elder Cc: Taniya Das Cc: Mike Tipton Fixes: 2fb251c26560 ("interconnect: qcom: sdx55: Drop IP0 interconnects") Fixes: 2f3724930eb4 ("interconnect: qcom: sc7180: Drop IP0 interconnects") Signed-off-by: Stephen Boyd Reviewed-by: Alex Elder Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20220427013226.341209-1-swboyd@chromium.org Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 9050ca1f4285c..808f6e7a80482 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1087,9 +1087,15 @@ static int of_count_icc_providers(struct device_node *np) { struct device_node *child; int count = 0; + const struct of_device_id __maybe_unused ignore_list[] = { + { .compatible = "qcom,sc7180-ipa-virt" }, + { .compatible = "qcom,sdx55-ipa-virt" }, + {} + }; for_each_available_child_of_node(np, child) { - if (of_property_read_bool(child, "#interconnect-cells")) + if (of_property_read_bool(child, "#interconnect-cells") && + likely(!of_match_node(ignore_list, child))) count++; count += of_count_icc_providers(child); } From 134b5ce3ed33d3857d5d6e1edcd1656ed9364bbf Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 30 Mar 2022 18:34:15 -0700 Subject: [PATCH 136/491] PCI: qcom: Remove ddrss_sf_tbu clock from SC8180X The Qualcomm SC8180X platform was piggy-backing on the SM8250 qcom_pcie_cfg, but SC8180X doesn't have the ddrss_sf_tbu clock, so it now fails to probe due to the missing clock. Give SC8180X its own qcom_pcie_cfg, without the ddrss_sf_tbu flag set. Fixes: 0614f98bbb9f ("PCI: qcom: Add ddrss_sf_tbu flag") Link: https://lore.kernel.org/r/20220331013415.592748-1-bjorn.andersson@linaro.org Tested-by: Steev Klimaszewski Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Helgaas Reviewed-by: Dmitry Baryshkov Acked-by: Stanimir Varbanov --- drivers/pci/controller/dwc/pcie-qcom.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 6ab90891801d8..816028c0f6edb 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1550,6 +1550,11 @@ static const struct qcom_pcie_cfg sc7280_cfg = { .pipe_clk_need_muxing = true, }; +static const struct qcom_pcie_cfg sc8180x_cfg = { + .ops = &ops_1_9_0, + .has_tbu_clk = true, +}; + static const struct dw_pcie_ops dw_pcie_ops = { .link_up = qcom_pcie_link_up, .start_link = qcom_pcie_start_link, @@ -1656,7 +1661,7 @@ static const struct of_device_id qcom_pcie_match[] = { { .compatible = "qcom,pcie-qcs404", .data = &ipq4019_cfg }, { .compatible = "qcom,pcie-sdm845", .data = &sdm845_cfg }, { .compatible = "qcom,pcie-sm8250", .data = &sm8250_cfg }, - { .compatible = "qcom,pcie-sc8180x", .data = &sm8250_cfg }, + { .compatible = "qcom,pcie-sc8180x", .data = &sc8180x_cfg }, { .compatible = "qcom,pcie-sm8450-pcie0", .data = &sm8450_pcie0_cfg }, { .compatible = "qcom,pcie-sm8450-pcie1", .data = &sm8450_pcie1_cfg }, { .compatible = "qcom,pcie-sc7280", .data = &sc7280_cfg }, From 2069624dac19d62c558bb6468fe03678553ab01d Mon Sep 17 00:00:00 2001 From: Matthew Hagan Date: Mon, 2 May 2022 23:33:15 +0100 Subject: [PATCH 137/491] net: sfp: Add tx-fault workaround for Huawei MA5671A SFP ONT As noted elsewhere, various GPON SFP modules exhibit non-standard TX-fault behaviour. In the tested case, the Huawei MA5671A, when used in combination with a Marvell mv88e6085 switch, was found to persistently assert TX-fault, resulting in the module being disabled. This patch adds a quirk to ignore the SFP_F_TX_FAULT state, allowing the module to function. Change from v1: removal of erroneous return statment (Andrew Lunn) Signed-off-by: Matthew Hagan Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220502223315.1973376-1-mnhagan88@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 4dfb79807823d..9a5d5a10560fb 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -250,6 +250,7 @@ struct sfp { struct sfp_eeprom_id id; unsigned int module_power_mW; unsigned int module_t_start_up; + bool tx_fault_ignore; #if IS_ENABLED(CONFIG_HWMON) struct sfp_diag diag; @@ -1956,6 +1957,12 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) else sfp->module_t_start_up = T_START_UP; + if (!memcmp(id.base.vendor_name, "HUAWEI ", 16) && + !memcmp(id.base.vendor_pn, "MA5671A ", 16)) + sfp->tx_fault_ignore = true; + else + sfp->tx_fault_ignore = false; + return 0; } @@ -2409,7 +2416,10 @@ static void sfp_check_state(struct sfp *sfp) mutex_lock(&sfp->st_mutex); state = sfp_get_state(sfp); changed = state ^ sfp->state; - changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT; + if (sfp->tx_fault_ignore) + changed &= SFP_F_PRESENT | SFP_F_LOS; + else + changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT; for (i = 0; i < GPIO_MAX; i++) if (changed & BIT(i)) From 5ef9b803a4af0f5e42012176889b40bb2a978b18 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Mon, 2 May 2022 23:14:09 +0300 Subject: [PATCH 138/491] smsc911x: allow using IRQ0 The AlphaProject AP-SH4A-3A/AP-SH4AD-0A SH boards use IRQ0 for their SMSC LAN911x Ethernet chip, so the networking on them must have been broken by commit 965b2aa78fbc ("net/smsc911x: fix irq resource allocation failure") which filtered out 0 as well as the negative error codes -- it was kinda correct at the time, as platform_get_irq() could return 0 on of_irq_get() failure and on the actual 0 in an IRQ resource. This issue was fixed by me (back in 2016!), so we should be able to fix this driver to allow IRQ0 usage again... When merging this to the stable kernels, make sure you also merge commit e330b9a6bb35 ("platform: don't return 0 from platform_get_irq[_byname]() on error") -- that's my fix to platform_get_irq() for the DT platforms... Fixes: 965b2aa78fbc ("net/smsc911x: fix irq resource allocation failure") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/656036e4-6387-38df-b8a7-6ba683b16e63@omp.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/smsc/smsc911x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 7a50ba00f8ae3..c854efdf1f25f 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2431,7 +2431,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev) if (irq == -EPROBE_DEFER) { retval = -EPROBE_DEFER; goto out_0; - } else if (irq <= 0) { + } else if (irq < 0) { pr_warn("Could not allocate irq resource\n"); retval = -ENODEV; goto out_0; From 13ba794397e45e52893cfc21d7a69cb5f341b407 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Mon, 2 May 2022 21:13:10 -0400 Subject: [PATCH 139/491] bnxt_en: Fix possible bnxt_open() failure caused by wrong RFS flag bnxt_open() can fail in this code path, especially on a VF when it fails to reserve default rings: bnxt_open() __bnxt_open_nic() bnxt_clear_int_mode() bnxt_init_dflt_ring_mode() RX rings would be set to 0 when we hit this error path. It is possible for a subsequent bnxt_open() call to potentially succeed with a code path like this: bnxt_open() bnxt_hwrm_if_change() bnxt_fw_init_one() bnxt_fw_init_one_p3() bnxt_set_dflt_rfs() bnxt_rfs_capable() bnxt_hwrm_reserve_rings() On older chips, RFS is capable if we can reserve the number of vnics that is equal to RX rings + 1. But since RX rings is still set to 0 in this code path, we may mistakenly think that RFS is supported for 0 RX rings. Later, when the default RX rings are reserved and we try to enable RFS, it would fail and cause bnxt_open() to fail unnecessarily. We fix this in 2 places. bnxt_rfs_capable() will always return false if RX rings is not yet set. bnxt_init_dflt_ring_mode() will call bnxt_set_dflt_rfs() which will always clear the RFS flags if RFS is not supported. Fixes: 20d7d1c5c9b1 ("bnxt_en: reliably allocate IRQ table on reset to avoid crash") Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 874fad0a5cf8f..2818cfef42f81 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10983,7 +10983,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp) if (bp->flags & BNXT_FLAG_CHIP_P5) return bnxt_rfs_supported(bp); - if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp)) + if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings) return false; vnics = 1 + bp->rx_nr_rings; @@ -13234,10 +13234,9 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) goto init_dflt_ring_err; bp->tx_nr_rings_per_tc = bp->tx_nr_rings; - if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) { - bp->flags |= BNXT_FLAG_RFS; - bp->dev->features |= NETIF_F_NTUPLE; - } + + bnxt_set_dflt_rfs(bp); + init_dflt_ring_err: bnxt_ulp_irq_restart(bp, rc); return rc; From 2b156fb57d8f0d28f2207edc646751f4717cf20d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 2 May 2022 21:13:11 -0400 Subject: [PATCH 140/491] bnxt_en: Initiallize bp->ptp_lock first before using it bnxt_ptp_init() calls bnxt_ptp_init_rtc() which will acquire the ptp_lock spinlock. The spinlock is not initialized until later. Move the bnxt_ptp_init_rtc() call after the spinlock is initialized. Fixes: 24ac1ecd5240 ("bnxt_en: Add driver support to use Real Time Counter for PTP") Reviewed-by: Pavan Chebbi Reviewed-by: Saravanan Vajravel Reviewed-by: Andy Gospodarek Reviewed-by: Somnath Kotur Reviewed-by: Damodharam Ammepalli Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 9c2ad5e67a5d8..00f2f80c00733 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -846,13 +846,6 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) if (rc) return rc; - if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) { - bnxt_ptp_timecounter_init(bp, false); - rc = bnxt_ptp_init_rtc(bp, phc_cfg); - if (rc) - goto out; - } - if (ptp->ptp_clock && bnxt_pps_config_ok(bp)) return 0; @@ -861,8 +854,14 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS); spin_lock_init(&ptp->ptp_lock); - if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC)) + if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) { + bnxt_ptp_timecounter_init(bp, false); + rc = bnxt_ptp_init_rtc(bp, phc_cfg); + if (rc) + goto out; + } else { bnxt_ptp_timecounter_init(bp, true); + } ptp->ptp_info = bnxt_ptp_caps; if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) { From 195af57914d15229186658ed26dab24b9ada4122 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 2 May 2022 21:13:12 -0400 Subject: [PATCH 141/491] bnxt_en: Fix unnecessary dropping of RX packets In bnxt_poll_p5(), we first check cpr->has_more_work. If it is true, we are in NAPI polling mode and we will call __bnxt_poll_cqs() to continue polling. It is possible to exhanust the budget again when __bnxt_poll_cqs() returns. We then enter the main while loop to check for new entries in the NQ. If we had previously exhausted the NAPI budget, we may call __bnxt_poll_work() to process an RX entry with zero budget. This will cause packets to be dropped unnecessarily, thinking that we are in the netpoll path. Fix it by breaking out of the while loop if we need to process an RX NQ entry with no budget left. We will then exit NAPI and stay in polling mode. Fixes: 389a877a3b20 ("bnxt_en: Process the NQ under NAPI continuous polling.") Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2818cfef42f81..1d69fe0737a1c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2707,6 +2707,10 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget) u32 idx = le32_to_cpu(nqcmp->cq_handle_low); struct bnxt_cp_ring_info *cpr2; + /* No more budget for RX work */ + if (budget && work_done >= budget && idx == BNXT_RX_HDL) + break; + cpr2 = cpr->cp_ring_arr[idx]; work_done += __bnxt_poll_work(bp, cpr2, budget - work_done); From cb0d54cbf94866b48a73e10a73a55655f808cc7c Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 15 Mar 2022 18:20:48 +0200 Subject: [PATCH 142/491] net/mlx5e: Fix wrong source vport matching on tunnel rule When OVS internal port is the vtep device, the first decap rule is matching on the internal port's vport metadata value and then changes the metadata to be the uplink's value. Therefore, following rules on the tunnel, in chain > 0, should avoid matching on internal port metadata and use the uplink vport metadata instead. Select the uplink's metadata value for the source vport match in case the rule is in chain greater than zero, even if the tunnel route device is internal port. Fixes: 166f431ec6be ("net/mlx5e: Add indirect tc offload of ovs internal port") Signed-off-by: Ariel Levkovich Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3f63df1270912..3b151332e2f89 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -139,7 +139,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, if (mlx5_esw_indir_table_decap_vport(attr)) vport = mlx5_esw_indir_table_decap_vport(attr); - if (esw_attr->int_port) + if (attr && !attr->chain && esw_attr->int_port) metadata = mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); else From 7ba2d9d8de96696c1451fee1b01da11f45bdc2b9 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 3 Mar 2022 19:02:03 +0200 Subject: [PATCH 143/491] net/mlx5: Fix slab-out-of-bounds while reading resource dump menu Resource dump menu may span over more than a single page, support it. Otherwise, menu read may result in a memory access violation: reading outside of the allocated page. Note that page format of the first menu page contains menu headers while the proceeding menu pages contain only records. The KASAN logs are as follows: BUG: KASAN: slab-out-of-bounds in strcmp+0x9b/0xb0 Read of size 1 at addr ffff88812b2e1fd0 by task systemd-udevd/496 CPU: 5 PID: 496 Comm: systemd-udevd Tainted: G B 5.16.0_for_upstream_debug_2022_01_10_23_12 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x7d print_address_description.constprop.0+0x1f/0x140 ? strcmp+0x9b/0xb0 ? strcmp+0x9b/0xb0 kasan_report.cold+0x83/0xdf ? strcmp+0x9b/0xb0 strcmp+0x9b/0xb0 mlx5_rsc_dump_init+0x4ab/0x780 [mlx5_core] ? mlx5_rsc_dump_destroy+0x80/0x80 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x286/0x400 ? raw_spin_unlock_irqrestore+0x47/0x50 ? aomic_notifier_chain_register+0x32/0x40 mlx5_load+0x104/0x2e0 [mlx5_core] mlx5_init_one+0x41b/0x610 [mlx5_core] .... The buggy address belongs to the object at ffff88812b2e0000 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 4048 bytes to the right of 4096-byte region [ffff88812b2e0000, ffff88812b2e1000) The buggy address belongs to the page: page:000000009d69807a refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88812b2e6000 pfn:0x12b2e0 head:000000009d69807a order:3 compound_mapcount:0 compound_pincount:0 flags: 0x8000000000010200(slab|head|zone=2) raw: 8000000000010200 0000000000000000 dead000000000001 ffff888100043040 raw: ffff88812b2e6000 0000000080040000 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88812b2e1e80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88812b2e1f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88812b2e1f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff88812b2e2000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88812b2e2080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 12206b17235a ("net/mlx5: Add support for resource dump") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/diag/rsc_dump.c | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c index 538adab6878b5..c5b560a8b026e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c @@ -31,6 +31,7 @@ static const char *const mlx5_rsc_sgmt_name[] = { struct mlx5_rsc_dump { u32 pdn; u32 mkey; + u32 number_of_menu_items; u16 fw_segment_type[MLX5_SGMT_TYPE_NUM]; }; @@ -50,21 +51,37 @@ static int mlx5_rsc_dump_sgmt_get_by_name(char *name) return -EINVAL; } -static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page) +#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_menu_segment)) + +static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page, + int read_size, int start_idx) { void *data = page_address(page); enum mlx5_sgmt_type sgmt_idx; int num_of_items; char *sgmt_name; void *member; + int size = 0; void *menu; int i; - menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); - num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records); + if (!start_idx) { + menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); + rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu, + num_of_records); + size = MLX5_RSC_DUMP_MENU_HEADER_SIZE; + data += size; + } + num_of_items = rsc_dump->number_of_menu_items; + + for (i = 0; start_idx + i < num_of_items; i++) { + size += MLX5_ST_SZ_BYTES(resource_dump_menu_record); + if (size >= read_size) + return start_idx + i; - for (i = 0; i < num_of_items; i++) { - member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]); + member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i; sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name); sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name); if (sgmt_idx == -EINVAL) @@ -72,6 +89,7 @@ static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record, member, segment_type); } + return 0; } static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, @@ -168,6 +186,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) struct mlx5_rsc_dump_cmd *cmd = NULL; struct mlx5_rsc_key key = {}; struct page *page; + int start_idx = 0; int size; int err; @@ -189,7 +208,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) if (err < 0) goto destroy_cmd; - mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page); + start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx); } while (err > 0); From ada09af92e621ab500dd80a16d1d0299a18a1180 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 28 Mar 2022 15:54:52 +0300 Subject: [PATCH 144/491] net/mlx5e: Don't match double-vlan packets if cvlan is not set Currently, match VLAN rule also matches packets that have multiple VLAN headers. This behavior is similar to buggy flower classifier behavior that has recently been fixed. Fix the issue by matching on outer_second_cvlan_tag with value 0 which will cause the HW to verify the packet doesn't contain second vlan header. Fixes: 699e96ddf47f ("net/mlx5e: Support offloading tc double vlan headers match") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e3fc15ae7bb10..ac0f73074f7ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2459,6 +2459,17 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, match.key->vlan_priority); *match_level = MLX5_MATCH_L2; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) && + match.mask->vlan_eth_type && + MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, + ft_field_support.outer_second_vid, + fs_type)) { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_cvlan_tag, 1); + spec->match_criteria_enable |= + MLX5_MATCH_MISC_PARAMETERS; + } } } else if (*match_level != MLX5_MATCH_NONE) { /* cvlan_tag enabled in match criteria and From c4d963a588a6e7c4ef31160e80697ae8e5a47746 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 6 Apr 2022 10:30:21 +0300 Subject: [PATCH 145/491] net/mlx5e: Fix the calling of update_buffer_lossy() API The arguments of update_buffer_lossy() is in a wrong order. Fix it. Fixes: 88b3d5c90e96 ("net/mlx5e: Fix port buffers cell size value") Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 673f1c82d3815..c9d5d8d93994d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -309,8 +309,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz, - xoff, &port_buffer, &update_buffer); + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff, + port_buff_cell_sz, &port_buffer, &update_buffer); if (err) return err; } From 27b0420fd959e38e3500e60b637d39dfab065645 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 18 Apr 2022 17:32:19 +0300 Subject: [PATCH 146/491] net/mlx5e: Lag, Fix use-after-free in fib event handler Recent commit that modified fib route event handler to handle events according to their priority introduced use-after-free[0] in mp->mfi pointer usage. The pointer now is not just cached in order to be compared to following fib_info instances, but is also dereferenced to obtain fib_priority. However, since mlx5 lag code doesn't hold the reference to fin_info during whole mp->mfi lifetime, it could be used after fib_info instance has already been freed be kernel infrastructure code. Don't ever dereference mp->mfi pointer. Refactor it to be 'const void*' type and cache fib_info priority in dedicated integer. Group fib_info-related data into dedicated 'fib' structure that will be further extended by following patches in the series. [0]: [ 203.588029] ================================================================== [ 203.590161] BUG: KASAN: use-after-free in mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.592386] Read of size 4 at addr ffff888144df2050 by task kworker/u20:4/138 [ 203.594766] CPU: 3 PID: 138 Comm: kworker/u20:4 Tainted: G B 5.17.0-rc7+ #6 [ 203.596751] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 203.598813] Workqueue: mlx5_lag_mp mlx5_lag_fib_update [mlx5_core] [ 203.600053] Call Trace: [ 203.600608] [ 203.601110] dump_stack_lvl+0x48/0x5e [ 203.601860] print_address_description.constprop.0+0x1f/0x160 [ 203.602950] ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.604073] ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.605177] kasan_report.cold+0x83/0xdf [ 203.605969] ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.607102] mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.608199] ? mlx5_lag_init_fib_work+0x1c0/0x1c0 [mlx5_core] [ 203.609382] ? read_word_at_a_time+0xe/0x20 [ 203.610463] ? strscpy+0xa0/0x2a0 [ 203.611463] process_one_work+0x722/0x1270 [ 203.612344] worker_thread+0x540/0x11e0 [ 203.613136] ? rescuer_thread+0xd50/0xd50 [ 203.613949] kthread+0x26e/0x300 [ 203.614627] ? kthread_complete_and_exit+0x20/0x20 [ 203.615542] ret_from_fork+0x1f/0x30 [ 203.616273] [ 203.617174] Allocated by task 3746: [ 203.617874] kasan_save_stack+0x1e/0x40 [ 203.618644] __kasan_kmalloc+0x81/0xa0 [ 203.619394] fib_create_info+0xb41/0x3c50 [ 203.620213] fib_table_insert+0x190/0x1ff0 [ 203.621020] fib_magic.isra.0+0x246/0x2e0 [ 203.621803] fib_add_ifaddr+0x19f/0x670 [ 203.622563] fib_inetaddr_event+0x13f/0x270 [ 203.623377] blocking_notifier_call_chain+0xd4/0x130 [ 203.624355] __inet_insert_ifa+0x641/0xb20 [ 203.625185] inet_rtm_newaddr+0xc3d/0x16a0 [ 203.626009] rtnetlink_rcv_msg+0x309/0x880 [ 203.626826] netlink_rcv_skb+0x11d/0x340 [ 203.627626] netlink_unicast+0x4cc/0x790 [ 203.628430] netlink_sendmsg+0x762/0xc00 [ 203.629230] sock_sendmsg+0xb2/0xe0 [ 203.629955] ____sys_sendmsg+0x58a/0x770 [ 203.630756] ___sys_sendmsg+0xd8/0x160 [ 203.631523] __sys_sendmsg+0xb7/0x140 [ 203.632294] do_syscall_64+0x35/0x80 [ 203.633045] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 203.634427] Freed by task 0: [ 203.635063] kasan_save_stack+0x1e/0x40 [ 203.635844] kasan_set_track+0x21/0x30 [ 203.636618] kasan_set_free_info+0x20/0x30 [ 203.637450] __kasan_slab_free+0xfc/0x140 [ 203.638271] kfree+0x94/0x3b0 [ 203.638903] rcu_core+0x5e4/0x1990 [ 203.639640] __do_softirq+0x1ba/0x5d3 [ 203.640828] Last potentially related work creation: [ 203.641785] kasan_save_stack+0x1e/0x40 [ 203.642571] __kasan_record_aux_stack+0x9f/0xb0 [ 203.643478] call_rcu+0x88/0x9c0 [ 203.644178] fib_release_info+0x539/0x750 [ 203.644997] fib_table_delete+0x659/0xb80 [ 203.645809] fib_magic.isra.0+0x1a3/0x2e0 [ 203.646617] fib_del_ifaddr+0x93f/0x1300 [ 203.647415] fib_inetaddr_event+0x9f/0x270 [ 203.648251] blocking_notifier_call_chain+0xd4/0x130 [ 203.649225] __inet_del_ifa+0x474/0xc10 [ 203.650016] devinet_ioctl+0x781/0x17f0 [ 203.650788] inet_ioctl+0x1ad/0x290 [ 203.651533] sock_do_ioctl+0xce/0x1c0 [ 203.652315] sock_ioctl+0x27b/0x4f0 [ 203.653058] __x64_sys_ioctl+0x124/0x190 [ 203.653850] do_syscall_64+0x35/0x80 [ 203.654608] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 203.666952] The buggy address belongs to the object at ffff888144df2000 which belongs to the cache kmalloc-256 of size 256 [ 203.669250] The buggy address is located 80 bytes inside of 256-byte region [ffff888144df2000, ffff888144df2100) [ 203.671332] The buggy address belongs to the page: [ 203.672273] page:00000000bf6c9314 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x144df0 [ 203.674009] head:00000000bf6c9314 order:2 compound_mapcount:0 compound_pincount:0 [ 203.675422] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff) [ 203.676819] raw: 002ffff800010200 0000000000000000 dead000000000122 ffff888100042b40 [ 203.678384] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000 [ 203.679928] page dumped because: kasan: bad access detected [ 203.681455] Memory state around the buggy address: [ 203.682421] ffff888144df1f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 203.683863] ffff888144df1f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 203.685310] >ffff888144df2000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 203.686701] ^ [ 203.687820] ffff888144df2080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 203.689226] ffff888144df2100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 203.690620] ================================================================== Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lag/mp.c | 26 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/lag/mp.h | 5 +++- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 4a6ec15ef0460..bc77aba97ac1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -100,6 +100,12 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb) flush_workqueue(mp->wq); } +static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi) +{ + mp->fib.mfi = fi; + mp->fib.priority = fi->fib_priority; +} + struct mlx5_fib_event_work { struct work_struct work; struct mlx5_lag *ldev; @@ -121,13 +127,13 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { /* stop track */ - if (mp->mfi == fi) - mp->mfi = NULL; + if (mp->fib.mfi == fi) + mp->fib.mfi = NULL; return; } /* Handle multipath entry with lower priority value */ - if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority) + if (mp->fib.mfi && mp->fib.mfi != fi && fi->fib_priority >= mp->fib.priority) return; /* Handle add/replace event */ @@ -145,7 +151,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, mlx5_lag_set_port_affinity(ldev, i); } - mp->mfi = fi; + mlx5_lag_fib_set(mp, fi); return; } @@ -165,7 +171,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* First time we see multipath route */ - if (!mp->mfi && !__mlx5_lag_is_active(ldev)) { + if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) { struct lag_tracker tracker; tracker = ldev->tracker; @@ -173,7 +179,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); - mp->mfi = fi; + mlx5_lag_fib_set(mp, fi); } static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, @@ -184,7 +190,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, struct lag_mp *mp = &ldev->lag_mp; /* Check the nh event is related to the route */ - if (!mp->mfi || mp->mfi != fi) + if (!mp->fib.mfi || mp->fib.mfi != fi) return; /* nh added/removed */ @@ -313,7 +319,7 @@ void mlx5_lag_mp_reset(struct mlx5_lag *ldev) /* Clear mfi, as it might become stale when a route delete event * has been missed, see mlx5_lag_fib_route_event(). */ - ldev->lag_mp.mfi = NULL; + ldev->lag_mp.fib.mfi = NULL; } int mlx5_lag_mp_init(struct mlx5_lag *ldev) @@ -324,7 +330,7 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) /* always clear mfi, as it might become stale when a route delete event * has been missed */ - mp->mfi = NULL; + mp->fib.mfi = NULL; if (mp->fib_nb.notifier_call) return 0; @@ -354,5 +360,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) unregister_fib_notifier(&init_net, &mp->fib_nb); destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; - mp->mfi = NULL; + mp->fib.mfi = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h index 57af962cad298..143226753c3a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -15,7 +15,10 @@ enum mlx5_lag_port_affinity { struct lag_mp { struct notifier_block fib_nb; - struct fib_info *mfi; /* used in tracking fib events */ + struct { + const void *mfi; /* used in tracking fib events */ + u32 priority; + } fib; struct workqueue_struct *wq; }; From a6589155ec9847918e00e7279b8aa6d4c272bea7 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 18 Apr 2022 17:32:54 +0300 Subject: [PATCH 147/491] net/mlx5e: Lag, Fix fib_info pointer assignment Referenced change incorrectly sets single path fib_info even when LAG is not active. Fix it by moving call to mlx5_lag_fib_set() into conditional that verifies LAG state. Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index bc77aba97ac1d..9a5884e8a8bf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -149,9 +149,9 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, i++; mlx5_lag_set_port_affinity(ldev, i); + mlx5_lag_fib_set(mp, fi); } - mlx5_lag_fib_set(mp, fi); return; } From 4a2a664ed87962c4ddb806a84b5c9634820bcf55 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 18 Apr 2022 17:40:37 +0300 Subject: [PATCH 148/491] net/mlx5e: Lag, Don't skip fib events on current dst Referenced change added check to skip updating fib when new fib instance has same or lower priority. However, new fib instance can be an update on same dst address as existing one even though the structure is another instance that has different address. Ignoring events on such instances causes multipath LAG state to not be correctly updated. Track 'dst' and 'dst_len' fields of fib event fib_entry_notifier_info structure and don't skip events that have the same value of that fields. Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lag/mp.c | 20 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/lag/mp.h | 2 ++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 9a5884e8a8bf5..d6c3e6dfd71fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -100,10 +100,12 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb) flush_workqueue(mp->wq); } -static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi) +static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len) { mp->fib.mfi = fi; mp->fib.priority = fi->fib_priority; + mp->fib.dst = dst; + mp->fib.dst_len = dst_len; } struct mlx5_fib_event_work { @@ -116,10 +118,10 @@ struct mlx5_fib_event_work { }; }; -static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, - unsigned long event, - struct fib_info *fi) +static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, + struct fib_entry_notifier_info *fen_info) { + struct fib_info *fi = fen_info->fi; struct lag_mp *mp = &ldev->lag_mp; struct fib_nh *fib_nh0, *fib_nh1; unsigned int nhs; @@ -133,7 +135,9 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* Handle multipath entry with lower priority value */ - if (mp->fib.mfi && mp->fib.mfi != fi && fi->fib_priority >= mp->fib.priority) + if (mp->fib.mfi && mp->fib.mfi != fi && + (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && + fi->fib_priority >= mp->fib.priority) return; /* Handle add/replace event */ @@ -149,7 +153,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, i++; mlx5_lag_set_port_affinity(ldev, i); - mlx5_lag_fib_set(mp, fi); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); } return; @@ -179,7 +183,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); - mlx5_lag_fib_set(mp, fi); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); } static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, @@ -220,7 +224,7 @@ static void mlx5_lag_fib_update(struct work_struct *work) case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_DEL: mlx5_lag_fib_route_event(ldev, fib_work->event, - fib_work->fen_info.fi); + &fib_work->fen_info); fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_NH_ADD: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h index 143226753c3a9..056a066da604b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -18,6 +18,8 @@ struct lag_mp { struct { const void *mfi; /* used in tracking fib events */ u32 priority; + u32 dst; + int dst_len; } fib; struct workqueue_struct *wq; }; From 087032ee7021a22e4c7557c0ed16bfd792c3f6fe Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Wed, 23 Feb 2022 21:29:17 +0200 Subject: [PATCH 149/491] net/mlx5e: TC, Fix ct_clear overwriting ct action metadata ct_clear action is translated to clearing reg_c metadata which holds ct state and zone information using mod header actions. These actions are allocated during the actions parsing, as part of the flow attributes main mod header action list. If ct action exists in the rule, the flow's main mod header is used only in the post action table rule, after the ct tables which set the ct info in the reg_c as part of the ct actions. Therefore, if the original rule has a ct_clear action followed by a ct action, the ct action reg_c setting will be done first and will be followed by the ct_clear resetting reg_c and overwriting the ct info. Fix this by moving the ct_clear mod header actions allocation from the ct action parsing stage to the ct action post parsing stage where it is already known if ct_clear is followed by a ct action. In such case, we skip the mod header actions allocation for the ct clear since the ct action will write to reg_c anyway after clearing it. Fixes: 806401c20a0f ("net/mlx5e: CT, Fix multiple allocations and memleak of mod acts") Signed-off-by: Ariel Levkovich Reviewed-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc/act/ct.c | 34 +++++++++++++++++-- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 20 ++++------- .../ethernet/mellanox/mlx5/core/en/tc_ct.h | 11 ++++++ 3 files changed, 49 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index b9d38fe807df5..a829c94289c10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -45,12 +45,41 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, if (mlx5e_is_eswitch_flow(parse_state->flow)) attr->esw_attr->split_count = attr->esw_attr->out_count; - if (!clear_action) { + if (clear_action) { + parse_state->ct_clear = true; + } else { attr->flags |= MLX5_ATTR_FLAG_CT; flow_flag_set(parse_state->flow, CT); parse_state->ct = true; } - parse_state->ct_clear = clear_action; + + return 0; +} + +static int +tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts; + int err; + + /* If ct action exist, we can ignore previous ct_clear actions */ + if (parse_state->ct) + return 0; + + if (parse_state->ct_clear) { + err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts); + if (err) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Failed to set registers for ct clear"); + return err; + } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + /* Prevent handling of additional, redundant clear actions */ + parse_state->ct_clear = false; + } return 0; } @@ -70,5 +99,6 @@ struct mlx5e_tc_act mlx5e_tc_act_ct = { .can_offload = tc_act_can_offload_ct, .parse_action = tc_act_parse_ct, .is_multi_table_act = tc_act_is_multi_table_act_ct, + .post_parse = tc_act_post_parse_ct, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index e49f51124c749..73a1e0a4818d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -582,6 +582,12 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, return 0; } +int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); +} + static int mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, char *modact) @@ -1410,9 +1416,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { - bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; - int err; - if (!priv) { NL_SET_ERR_MSG_MOD(extack, "offload of ct action isn't available"); @@ -1423,17 +1426,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.ct_action = act->ct.action; attr->ct_attr.nf_ft = act->ct.flow_table; - if (!clear_action) - goto out; - - err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear"); - return err; - } - attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - -out: return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 36d3652bf8297..00a3ba862afb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -129,6 +129,10 @@ bool mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, struct sk_buff *skb, u8 zone_restore_id); +int +mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts); + #else /* CONFIG_MLX5_TC_CT */ static inline struct mlx5_tc_ct_priv * @@ -170,6 +174,13 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) return 0; } +static inline int +mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + return -EOPNOTSUPP; +} + static inline int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, From e3fdc71bcb6ffe1d4870a89252ba296a9558e294 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 25 Apr 2022 17:12:12 +0300 Subject: [PATCH 150/491] net/mlx5e: TC, fix decap fallback to uplink when int port not supported When resolving the decap route device for a tunnel decap rule, the result may be an OVS internal port device. Prior to adding the support for internal port offload, such case would result in using the uplink as the default decap route device which allowed devices that can't support internal port offload to offload this decap rule. This behavior got broken by adding the internal port offload which will fail in case the device can't support internal port offload. To restore the old behavior, use the uplink device as the decap route as before when internal port offload is not supported. Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Ariel Levkovich Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 378fc8e3bd975..d87bbb0be7c86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -713,6 +713,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, struct net_device *filter_dev) { struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_int_port *int_port; TC_TUN_ROUTE_ATTR_INIT(attr); u16 vport_num; @@ -747,7 +748,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni); esw_attr->rx_tun_attr->decap_vport = vport_num; - } else if (netif_is_ovs_master(attr.route_dev)) { + } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) { int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), attr.route_dev->ifindex, MLX5E_TC_INT_PORT_INGRESS); From b069e14fff46c8da9fcc79957f8acaa3e2dfdb6b Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 29 Mar 2022 17:42:46 +0300 Subject: [PATCH 151/491] net/mlx5e: CT: Fix queued up restore put() executing after relevant ft release __mlx5_tc_ct_entry_put() queues release of tuple related to some ct FT, if that is the last reference to that tuple, the actual deletion of the tuple can happen after the FT is already destroyed and freed. Flush the used workqueue before destroying the ct FT. Fixes: a2173131526d ("net/mlx5e: CT: manage the lifetime of the ct entry object") Reviewed-by: Oz Shlomo Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 73a1e0a4818d2..ab4b0f3ee2a0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1741,6 +1741,8 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) static void mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) { + struct mlx5e_priv *priv; + if (!refcount_dec_and_test(&ft->refcount)) return; @@ -1750,6 +1752,8 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) rhashtable_free_and_destroy(&ft->ct_entries_ht, mlx5_tc_ct_flush_ft_entry, ct_priv); + priv = netdev_priv(ct_priv->netdev); + flush_workqueue(priv->wq); mlx5_tc_ct_free_pre_ct_tables(ft); mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); kfree(ft); From 0e322efd64d2ad11a773f7a314512086ebbe000c Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 28 Mar 2022 18:29:13 +0300 Subject: [PATCH 152/491] net/mlx5e: Avoid checking offload capability in post_parse action During TC action parsing, the can_offload callback is called before calling the action's main parsing callback. Later on, the can_offload callback is called again before handling the action's post_parse callback if exists. Since the main parsing callback might have changed and set parsing params for the rule, following can_offload checks might fail because some parsing params were already set. Specifically, the ct action main parsing sets the ct param in the parsing status structure and when the second can_offload for ct action is called, before handling the ct post parsing, it will return an error since it checks this ct param to indicate multiple ct actions which are not supported. Therefore, the can_offload call is removed from the post parsing handling to prevent such cases. This is allowed since the first can_offload call will ensure that the action can be offloaded and the fact the code reached the post parsing handling already means that the action can be offloaded. Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions") Signed-off-by: Ariel Levkovich Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index af37a8d247a16..2755c25ba324a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -145,8 +145,7 @@ mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, flow_action_for_each(i, act, flow_action) { tc_act = mlx5e_tc_act_get(act->id, ns_type); - if (!tc_act || !tc_act->post_parse || - !tc_act->can_offload(parse_state, act, i, attr)) + if (!tc_act || !tc_act->post_parse) continue; err = tc_act->post_parse(parse_state, priv, attr); From b781bff882d16175277ca129c382886cb4c74a2c Mon Sep 17 00:00:00 2001 From: Moshe Tal Date: Wed, 9 Feb 2022 19:23:56 +0200 Subject: [PATCH 153/491] net/mlx5e: Fix trust state reset in reload Setting dscp2prio during the driver reload can cause dcb ieee app list to be not empty after the reload finish and as a result to a conflict between the priority trust state reported by the app and the state in the device register. Reset the dcb ieee app list on initialization in case this is conflicting with the register status. Fixes: 2a5e7a1344f4 ("net/mlx5e: Add dcbnl dscp to priority support") Signed-off-by: Moshe Tal Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index d659fe07d4645..8ead2c82a52aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1200,6 +1200,16 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv) return err; WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state); + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) { + /* + * Align the driver state with the register state. + * Temporary state change is required to enable the app list reset. + */ + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP; + mlx5e_dcbnl_delete_app(priv); + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; + } + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params, priv->dcbx_dp.trust_state); From cb7786a76ea39f394f0a059787fe24fa8e340fb6 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Apr 2022 21:31:06 +0300 Subject: [PATCH 154/491] net/mlx5: Fix deadlock in sync reset flow The sync reset flow can lead to the following deadlock when poll_sync_reset() is called by timer softirq and waiting on del_timer_sync() for the same timer. Fix that by moving the part of the flow that waits for the timer to reset_reload_work. It fixes the following kernel Trace: RIP: 0010:del_timer_sync+0x32/0x40 ... Call Trace: mlx5_sync_reset_clear_reset_requested+0x26/0x50 [mlx5_core] poll_sync_reset.cold+0x36/0x52 [mlx5_core] call_timer_fn+0x32/0x130 __run_timers.part.0+0x180/0x280 ? tick_sched_handle+0x33/0x60 ? tick_sched_timer+0x3d/0x80 ? ktime_get+0x3e/0xa0 run_timer_softirq+0x2a/0x50 __do_softirq+0xe1/0x2d6 ? hrtimer_interrupt+0x136/0x220 irq_exit+0xae/0xb0 smp_apic_timer_interrupt+0x7b/0x140 apic_timer_interrupt+0xf/0x20 Fixes: 3c5193a87b0f ("net/mlx5: Use del_timer_sync in fw reset flow of halting poll") Signed-off-by: Moshe Shemesh Reviewed-by: Maher Sanalla Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/fw_reset.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 4aa22dce9b77f..ec18d4ccbc118 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -155,22 +155,6 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) } } -static void mlx5_sync_reset_reload_work(struct work_struct *work) -{ - struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, - reset_reload_work); - struct mlx5_core_dev *dev = fw_reset->dev; - int err; - - mlx5_enter_error_state(dev, true); - mlx5_unload_one(dev); - err = mlx5_health_wait_pci_up(dev); - if (err) - mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); - fw_reset->ret = err; - mlx5_fw_reset_complete_reload(dev); -} - static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; @@ -188,6 +172,23 @@ static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, boo mlx5_start_health_poll(dev); } +static void mlx5_sync_reset_reload_work(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_reload_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + mlx5_sync_reset_clear_reset_requested(dev, false); + mlx5_enter_error_state(dev, true); + mlx5_unload_one(dev); + err = mlx5_health_wait_pci_up(dev); + if (err) + mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); + fw_reset->ret = err; + mlx5_fw_reset_complete_reload(dev); +} + #define MLX5_RESET_POLL_INTERVAL (HZ / 10) static void poll_sync_reset(struct timer_list *t) { @@ -202,7 +203,6 @@ static void poll_sync_reset(struct timer_list *t) if (fatal_error) { mlx5_core_warn(dev, "Got Device Reset\n"); - mlx5_sync_reset_clear_reset_requested(dev, false); queue_work(fw_reset->wq, &fw_reset->reset_reload_work); return; } From fc3d3db07b35885f238e1fa06b9f04a8fa7a62d0 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Apr 2022 20:38:44 +0300 Subject: [PATCH 155/491] net/mlx5: Avoid double clear or set of sync reset requested Double clear of reset requested state can lead to NULL pointer as it will try to delete the timer twice. This can happen for example on a race between abort from FW and pci error or reset. Avoid such case using test_and_clear_bit() to verify only one time reset requested state clear flow. Similarly use test_and_set_bit() to verify only one time reset requested state set flow. Fixes: 7dd6df329d4c ("net/mlx5: Handle sync reset abort event") Signed-off-by: Moshe Shemesh Reviewed-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/fw_reset.c | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index ec18d4ccbc118..ca1aba845dd6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -162,14 +162,19 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) del_timer_sync(&fw_reset->timer); } -static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) +static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already cleared\n"); + return -EALREADY; + } + mlx5_stop_sync_reset_poll(dev); - clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); if (poll_health) mlx5_start_health_poll(dev); + return 0; } static void mlx5_sync_reset_reload_work(struct work_struct *work) @@ -229,13 +234,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev) return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false); } -static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) +static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already set\n"); + return -EALREADY; + } mlx5_stop_health_poll(dev, true); - set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); mlx5_start_sync_reset_poll(dev); + return 0; } static void mlx5_fw_live_patch_event(struct work_struct *work) @@ -264,7 +273,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) err ? "Failed" : "Sent"); return; } - mlx5_sync_reset_set_reset_requested(dev); + if (mlx5_sync_reset_set_reset_requested(dev)) + return; + err = mlx5_fw_reset_set_reset_sync_ack(dev); if (err) mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); @@ -362,7 +373,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) struct mlx5_core_dev *dev = fw_reset->dev; int err; - mlx5_sync_reset_clear_reset_requested(dev, false); + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); @@ -391,10 +403,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work) reset_abort_work); struct mlx5_core_dev *dev = fw_reset->dev; - if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) + if (mlx5_sync_reset_clear_reset_requested(dev, true)) return; - - mlx5_sync_reset_clear_reset_requested(dev, true); mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); } From a042d7f5bb68c47f6e0e546ca367d14e1e4b25ba Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sun, 10 Apr 2022 11:58:05 +0000 Subject: [PATCH 156/491] net/mlx5: Fix matching on inner TTC The cited commits didn't use proper matching on inner TTC as a result distribution of encapsulated packets wasn't symmetric between the physical ports. Fixes: 4c71ce50d2fe ("net/mlx5: Support partial TTC rules") Fixes: 8e25a2bc6687 ("net/mlx5: Lag, add support to create TTC tables for LAG port selection") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index a6592f9c3c05f..5be322528279a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -505,7 +505,7 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) struct ttc_params ttc_params = {}; mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); - port_sel->inner.ttc = mlx5_create_ttc_table(dev, &ttc_params); + port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params); if (IS_ERR(port_sel->inner.ttc)) return PTR_ERR(port_sel->inner.ttc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index b63dec24747ab..b78f2ba25c19b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -408,6 +408,8 @@ static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, for (tt = 0; tt < MLX5_NUM_TT; tt++) { struct mlx5_ttc_rule *rule = &rules[tt]; + if (test_bit(tt, params->ignore_dests)) + continue; rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, ¶ms->dests[tt], ttc_rules[tt].etype, From 841e512ffb64898db6322c0619f6bbc41266d86f Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Tue, 26 Apr 2022 16:15:36 +0200 Subject: [PATCH 157/491] drm/bridge: ite-it6505: add missing Kconfig option select The IT6505 is using functions provided by the DRM_DP_HELPER driver. In order to avoid having the bridge enabled but the helper disabled, let's add a select in order to be sure that the DP helper functions are always available. Fixes: b5c84a9edcd4 ("drm/bridge: add it6505 driver") Signed-off-by: Fabien Parent Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20220426141536.274727-1-fparent@baylibre.com --- drivers/gpu/drm/bridge/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 007e5a282f67d..2145b08f95346 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -78,6 +78,7 @@ config DRM_ITE_IT6505 tristate "ITE IT6505 DisplayPort bridge" depends on OF select DRM_KMS_HELPER + select DRM_DP_HELPER select EXTCON help ITE IT6505 DisplayPort bridge chip driver. From 2ac2fab52917ae82cbca97cf6e5d2993530257ed Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 2 May 2022 18:22:38 +0900 Subject: [PATCH 158/491] iommu/dart: Add missing module owner to ops structure This is required to make loading this as a module work. Signed-off-by: Hector Martin Fixes: 46d1fb072e76 ("iommu/dart: Add DART iommu driver") Reviewed-by: Sven Peter Link: https://lore.kernel.org/r/20220502092238.30486-1-marcan@marcan.st Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 15b77f16cfa31..8af0242a90d91 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -773,6 +773,7 @@ static const struct iommu_ops apple_dart_iommu_ops = { .get_resv_regions = apple_dart_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during dart probe */ + .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = apple_dart_attach_dev, .detach_dev = apple_dart_detach_dev, From 5e469ed9764d4722c59562da13120bd2dc6834c5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 20 Apr 2022 12:50:38 +0200 Subject: [PATCH 159/491] mac80211: fix rx reordering with non explicit / psmp ack policy When the QoS ack policy was set to non explicit / psmp ack, frames are treated as not being part of a BA session, which causes extra latency on reordering. Fix this by only bypassing reordering for packets with no-ack policy Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20220420105038.36443-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index beb6b92eb7804..88d797fa82ff6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1405,8 +1405,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, goto dont_reorder; /* not part of a BA session */ - if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && - ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) + if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK) goto dont_reorder; /* new, potentially un-ordered, ampdu frame - process it */ From 5d087aa759eb82b8208411913f6c2158bd85abc0 Mon Sep 17 00:00:00 2001 From: Kieran Frewen Date: Wed, 20 Apr 2022 04:13:21 +0000 Subject: [PATCH 160/491] nl80211: validate S1G channel width Validate the S1G channel width input by user to ensure it matches that of the requested channel Signed-off-by: Kieran Frewen Signed-off-by: Bassem Dawood Link: https://lore.kernel.org/r/20220420041321.3788789-2-kieran.frewen@morsemicro.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 21e808fcb676c..aa6094c3c9b00 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3173,6 +3173,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) { chandef->width = nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]); + if (chandef->chan->band == NL80211_BAND_S1GHZ) { + /* User input error for channel width doesn't match channel */ + if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) { + NL_SET_ERR_MSG_ATTR(extack, + attrs[NL80211_ATTR_CHANNEL_WIDTH], + "bad channel width"); + return -EINVAL; + } + } if (attrs[NL80211_ATTR_CENTER_FREQ1]) { chandef->center_freq1 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]); From e847ffe2d146cfd52980ca688d84358e024a6e70 Mon Sep 17 00:00:00 2001 From: Kieran Frewen Date: Wed, 20 Apr 2022 04:13:20 +0000 Subject: [PATCH 161/491] cfg80211: retrieve S1G operating channel number When retrieving the S1G channel number from IEs, we should retrieve the operating channel instead of the primary channel. The S1G operation element specifies the main channel of operation as the oper channel, unlike for HT and HE which specify their main channel of operation as the primary channel. Signed-off-by: Kieran Frewen Signed-off-by: Bassem Dawood Link: https://lore.kernel.org/r/20220420041321.3788789-1-kieran.frewen@morsemicro.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 4a6d864329106..6d82bd9eaf8c7 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1829,7 +1829,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, if (tmp && tmp->datalen >= sizeof(struct ieee80211_s1g_oper_ie)) { struct ieee80211_s1g_oper_ie *s1gop = (void *)tmp->data; - return s1gop->primary_ch; + return s1gop->oper_ch; } } else { tmp = cfg80211_find_elem(WLAN_EID_DS_PARAMS, ie, ielen); From 86af062f40a73bf63321694e6bf637144f0383fe Mon Sep 17 00:00:00 2001 From: Manikanta Pubbisetty Date: Thu, 28 Apr 2022 10:57:44 +0530 Subject: [PATCH 162/491] mac80211: Reset MBSSID parameters upon connection Currently MBSSID parameters in struct ieee80211_bss_conf are not reset upon connection. This could be problematic with some drivers in a scenario where the device first connects to a non-transmit BSS and then connects to a transmit BSS of a Multi BSS AP. The MBSSID parameters which are set after connecting to a non-transmit BSS will not be reset and the same parameters will be passed on to the driver during the subsequent connection to a transmit BSS of a Multi BSS AP. For example, firmware running on the ath11k device uses the Multi BSS data for tracking the beacon of a non-transmit BSS and reports the driver when there is a beacon miss. If we do not reset the MBSSID parameters during the subsequent connection to a transmit BSS, then the driver would have wrong MBSSID data and FW would be looking for an incorrect BSSID in the MBSSID beacon of a Multi BSS AP and reports beacon loss leading to an unstable connection. Reset the MBSSID parameters upon every connection to solve this problem. Fixes: 78ac51f81532 ("mac80211: support multi-bssid") Signed-off-by: Manikanta Pubbisetty Link: https://lore.kernel.org/r/20220428052744.27040-1-quic_mpubbise@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1b30c724ca8d1..dc8aec1a5d3dd 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3657,6 +3657,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, cbss->transmitted_bss->bssid); bss_conf->bssid_indicator = cbss->max_bssid_indicator; bss_conf->bssid_index = cbss->bssid_index; + } else { + bss_conf->nontransmitted = false; + memset(bss_conf->transmitter_bssid, 0, + sizeof(bss_conf->transmitter_bssid)); + bss_conf->bssid_indicator = 0; + bss_conf->bssid_index = 0; } /* From e9f3fb523dbf476dc86beea23f5b5ca8f9687c93 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 24 Apr 2022 18:17:50 -0500 Subject: [PATCH 163/491] mmc: sunxi-mmc: Fix DMA descriptors allocated above 32 bits Newer variants of the MMC controller support a 34-bit physical address space by using word addresses instead of byte addresses. However, the code truncates the DMA descriptor address to 32 bits before applying the shift. This breaks DMA for descriptors allocated above the 32-bit limit. Fixes: 3536b82e5853 ("mmc: sunxi: add support for A100 mmc controller") Signed-off-by: Samuel Holland Reviewed-by: Andre Przywara Reviewed-by: Jernej Skrabec Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220424231751.32053-1-samuel@sholland.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sunxi-mmc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index c62afd2126925..46f9e2923d869 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -377,8 +377,9 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host, pdes[i].buf_addr_ptr1 = cpu_to_le32(sg_dma_address(&data->sg[i]) >> host->cfg->idma_des_shift); - pdes[i].buf_addr_ptr2 = cpu_to_le32((u32)next_desc >> - host->cfg->idma_des_shift); + pdes[i].buf_addr_ptr2 = + cpu_to_le32(next_desc >> + host->cfg->idma_des_shift); } pdes[0].config |= cpu_to_le32(SDXC_IDMAC_DES0_FD); From 3e5a8e8494a8122fe4eb3f167662f406cab753b9 Mon Sep 17 00:00:00 2001 From: Shaik Sajida Bhanu Date: Sun, 24 Apr 2022 21:32:33 +0530 Subject: [PATCH 164/491] mmc: sdhci-msm: Reset GCC_SDCC_BCR register for SDHC Reset GCC_SDCC_BCR register before every fresh initilazation. This will reset whole SDHC-msm controller, clears the previous power control states and avoids, software reset timeout issues as below. [ 5.458061][ T262] mmc1: Reset 0x1 never completed. [ 5.462454][ T262] mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== [ 5.469065][ T262] mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00007202 [ 5.475688][ T262] mmc1: sdhci: Blk size: 0x00000000 | Blk cnt: 0x00000000 [ 5.482315][ T262] mmc1: sdhci: Argument: 0x00000000 | Trn mode: 0x00000000 [ 5.488927][ T262] mmc1: sdhci: Present: 0x01f800f0 | Host ctl: 0x00000000 [ 5.495539][ T262] mmc1: sdhci: Power: 0x00000000 | Blk gap: 0x00000000 [ 5.502162][ T262] mmc1: sdhci: Wake-up: 0x00000000 | Clock: 0x00000003 [ 5.508768][ T262] mmc1: sdhci: Timeout: 0x00000000 | Int stat: 0x00000000 [ 5.515381][ T262] mmc1: sdhci: Int enab: 0x00000000 | Sig enab: 0x00000000 [ 5.521996][ T262] mmc1: sdhci: ACmd stat: 0x00000000 | Slot int: 0x00000000 [ 5.528607][ T262] mmc1: sdhci: Caps: 0x362dc8b2 | Caps_1: 0x0000808f [ 5.535227][ T262] mmc1: sdhci: Cmd: 0x00000000 | Max curr: 0x00000000 [ 5.541841][ T262] mmc1: sdhci: Resp[0]: 0x00000000 | Resp[1]: 0x00000000 [ 5.548454][ T262] mmc1: sdhci: Resp[2]: 0x00000000 | Resp[3]: 0x00000000 [ 5.555079][ T262] mmc1: sdhci: Host ctl2: 0x00000000 [ 5.559651][ T262] mmc1: sdhci_msm: ----------- VENDOR REGISTER DUMP----------- [ 5.566621][ T262] mmc1: sdhci_msm: DLL sts: 0x00000000 | DLL cfg: 0x6000642c | DLL cfg2: 0x0020a000 [ 5.575465][ T262] mmc1: sdhci_msm: DLL cfg3: 0x00000000 | DLL usr ctl: 0x00010800 | DDR cfg: 0x80040873 [ 5.584658][ T262] mmc1: sdhci_msm: Vndr func: 0x00018a9c | Vndr func2 : 0xf88218a8 Vndr func3: 0x02626040 Fixes: 0eb0d9f4de34 ("mmc: sdhci-msm: Initial support for Qualcomm chipsets") Signed-off-by: Shaik Sajida Bhanu Acked-by: Adrian Hunter Reviewed-by: Philipp Zabel Tested-by: Konrad Dybcio Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1650816153-23797-1-git-send-email-quic_c_sbhanu@quicinc.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 42 ++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 50c71e0ba5e4e..ff9f5b63c337e 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "sdhci-pltfm.h" #include "cqhci.h" @@ -2482,6 +2483,43 @@ static inline void sdhci_msm_get_of_property(struct platform_device *pdev, of_property_read_u32(node, "qcom,dll-config", &msm_host->dll_config); } +static int sdhci_msm_gcc_reset(struct device *dev, struct sdhci_host *host) +{ + struct reset_control *reset; + int ret = 0; + + reset = reset_control_get_optional_exclusive(dev, NULL); + if (IS_ERR(reset)) + return dev_err_probe(dev, PTR_ERR(reset), + "unable to acquire core_reset\n"); + + if (!reset) + return ret; + + ret = reset_control_assert(reset); + if (ret) { + reset_control_put(reset); + return dev_err_probe(dev, ret, "core_reset assert failed\n"); + } + + /* + * The hardware requirement for delay between assert/deassert + * is at least 3-4 sleep clock (32.7KHz) cycles, which comes to + * ~125us (4/32768). To be on the safe side add 200us delay. + */ + usleep_range(200, 210); + + ret = reset_control_deassert(reset); + if (ret) { + reset_control_put(reset); + return dev_err_probe(dev, ret, "core_reset deassert failed\n"); + } + + usleep_range(200, 210); + reset_control_put(reset); + + return ret; +} static int sdhci_msm_probe(struct platform_device *pdev) { @@ -2529,6 +2567,10 @@ static int sdhci_msm_probe(struct platform_device *pdev) msm_host->saved_tuning_phase = INVALID_TUNING_PHASE; + ret = sdhci_msm_gcc_reset(&pdev->dev, host); + if (ret) + goto pltfm_free; + /* Setup SDCC bus voter clock. */ msm_host->bus_clk = devm_clk_get(&pdev->dev, "bus"); if (!IS_ERR(msm_host->bus_clk)) { From 57831bfb5e78777dc399e351ed68ef77c3aee385 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Sat, 19 Mar 2022 02:28:09 -0700 Subject: [PATCH 165/491] powerpc/pseries/vas: Use QoS credits from the userspace The user can change the QoS credits dynamically with the management console interface which notifies OS with sysfs. After returning from the OS interface successfully, the management console updates the hypervisor. Since the VAS capabilities in the hypervisor is not updated when the OS gets the update, the kernel is using the old total credits value from the hypervisor. Fix this issue by using the new QoS credits from the userspace instead of depending on VAS capabilities from the hypervisor. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/76d156f8af1e03cc09369d68e0bfad0c40031bcc.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas-sysfs.c | 19 +++++++++++++----- arch/powerpc/platforms/pseries/vas.c | 23 +++++++++++----------- arch/powerpc/platforms/pseries/vas.h | 2 +- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c index 909535ca513a0..ec65586cbeb39 100644 --- a/arch/powerpc/platforms/pseries/vas-sysfs.c +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -27,22 +27,31 @@ struct vas_caps_entry { /* * This function is used to get the notification from the drmgr when - * QoS credits are changed. Though receiving the target total QoS - * credits here, get the official QoS capabilities from the hypervisor. + * QoS credits are changed. */ -static ssize_t update_total_credits_trigger(struct vas_cop_feat_caps *caps, +static ssize_t update_total_credits_store(struct vas_cop_feat_caps *caps, const char *buf, size_t count) { int err; u16 creds; err = kstrtou16(buf, 0, &creds); + /* + * The user space interface from the management console + * notifies OS with the new QoS credits and then the + * hypervisor. So OS has to use this new credits value + * and reconfigure VAS windows (close or reopen depends + * on the credits available) instead of depending on VAS + * QoS capabilities from the hypervisor. + */ if (!err) - err = vas_reconfig_capabilties(caps->win_type); + err = vas_reconfig_capabilties(caps->win_type, creds); if (err) return -EINVAL; + pr_info("Set QoS total credits %u\n", creds); + return count; } @@ -92,7 +101,7 @@ VAS_ATTR_RO(nr_total_credits); VAS_ATTR_RO(nr_used_credits); static struct vas_sysfs_entry update_total_credits_attribute = - __ATTR(update_total_credits, 0200, NULL, update_total_credits_trigger); + __ATTR(update_total_credits, 0200, NULL, update_total_credits_store); static struct attribute *vas_def_capab_attrs[] = { &nr_total_credits_attribute.attr, diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 1f59d78c77a1f..ec643bbdb67fc 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -779,10 +779,10 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, * changes. Reconfig window configurations based on the credits * availability from this new capabilities. */ -int vas_reconfig_capabilties(u8 type) +int vas_reconfig_capabilties(u8 type, int new_nr_creds) { struct vas_cop_feat_caps *caps; - int old_nr_creds, new_nr_creds; + int old_nr_creds; struct vas_caps *vcaps; int rc = 0, nr_active_wins; @@ -795,12 +795,6 @@ int vas_reconfig_capabilties(u8 type) caps = &vcaps->caps; mutex_lock(&vas_pseries_mutex); - rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat, - (u64)virt_to_phys(&hv_cop_caps)); - if (rc) - goto out; - - new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); old_nr_creds = atomic_read(&caps->nr_total_credits); @@ -832,7 +826,6 @@ int vas_reconfig_capabilties(u8 type) false); } -out: mutex_unlock(&vas_pseries_mutex); return rc; } @@ -850,7 +843,7 @@ static int pseries_vas_notifier(struct notifier_block *nb, struct of_reconfig_data *rd = data; struct device_node *dn = rd->dn; const __be32 *intserv = NULL; - int len, rc = 0; + int new_nr_creds, len, rc = 0; if ((action == OF_RECONFIG_ATTACH_NODE) || (action == OF_RECONFIG_DETACH_NODE)) @@ -862,7 +855,15 @@ static int pseries_vas_notifier(struct notifier_block *nb, if (!intserv) return NOTIFY_OK; - rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE); + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, + vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, + (u64)virt_to_phys(&hv_cop_caps)); + if (!rc) { + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); + rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, + new_nr_creds); + } + if (rc) pr_err("Failed reconfig VAS capabilities with DLPAR\n"); diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 34177881e9980..333ffa2f9f426 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -135,7 +135,7 @@ struct pseries_vas_window { }; int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); -int vas_reconfig_capabilties(u8 type); +int vas_reconfig_capabilties(u8 type, int new_nr_creds); int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); #ifdef CONFIG_PPC_VAS From 6d65028eb67dbb7627651adfc460d64196d38bd8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 2 May 2022 22:50:10 +1000 Subject: [PATCH 166/491] powerpc/vdso: Fix incorrect CFI in gettimeofday.S As reported by Alan, the CFI (Call Frame Information) in the VDSO time routines is incorrect since commit ce7d8056e38b ("powerpc/vdso: Prepare for switching VDSO to generic C implementation."). DWARF has a concept called the CFA (Canonical Frame Address), which on powerpc is calculated as an offset from the stack pointer (r1). That means when the stack pointer is changed there must be a corresponding CFI directive to update the calculation of the CFA. The current code is missing those directives for the changes to r1, which prevents gdb from being able to generate a backtrace from inside VDSO functions, eg: Breakpoint 1, 0x00007ffff7f804dc in __kernel_clock_gettime () (gdb) bt #0 0x00007ffff7f804dc in __kernel_clock_gettime () #1 0x00007ffff7d8872c in clock_gettime@@GLIBC_2.17 () from /lib64/libc.so.6 #2 0x00007fffffffd960 in ?? () #3 0x00007ffff7d8872c in clock_gettime@@GLIBC_2.17 () from /lib64/libc.so.6 Backtrace stopped: frame did not save the PC Alan helpfully describes some rules for correctly maintaining the CFI information: 1) Every adjustment to the current frame address reg (ie. r1) must be described, and exactly at the instruction where r1 changes. Why? Because stack unwinding might want to access previous frames. 2) If a function changes LR or any non-volatile register, the save location for those regs must be given. The CFI can be at any instruction after the saves up to the point that the reg is changed. (Exception: LR save should be described before a bl. not after) 3) If asychronous unwind info is needed then restores of LR and non-volatile regs must also be described. The CFI can be at any instruction after the reg is restored up to the point where the save location is (potentially) trashed. Fix the inability to backtrace by adding CFI directives describing the changes to r1, ie. satisfying rule 1. Also change the information for LR to point to the copy saved on the stack, not the value in r0 that will be overwritten by the function call. Finally, add CFI directives describing the save/restore of r2. With the fix gdb can correctly back trace and navigate up and down the stack: Breakpoint 1, 0x00007ffff7f804dc in __kernel_clock_gettime () (gdb) bt #0 0x00007ffff7f804dc in __kernel_clock_gettime () #1 0x00007ffff7d8872c in clock_gettime@@GLIBC_2.17 () from /lib64/libc.so.6 #2 0x0000000100015b60 in gettime () #3 0x000000010000c8bc in print_long_format () #4 0x000000010000d180 in print_current_files () #5 0x00000001000054ac in main () (gdb) up #1 0x00007ffff7d8872c in clock_gettime@@GLIBC_2.17 () from /lib64/libc.so.6 (gdb) #2 0x0000000100015b60 in gettime () (gdb) #3 0x000000010000c8bc in print_long_format () (gdb) #4 0x000000010000d180 in print_current_files () (gdb) #5 0x00000001000054ac in main () (gdb) Initial frame selected; you cannot go up. (gdb) down #4 0x000000010000d180 in print_current_files () (gdb) #3 0x000000010000c8bc in print_long_format () (gdb) #2 0x0000000100015b60 in gettime () (gdb) #1 0x00007ffff7d8872c in clock_gettime@@GLIBC_2.17 () from /lib64/libc.so.6 (gdb) #0 0x00007ffff7f804dc in __kernel_clock_gettime () (gdb) Fixes: ce7d8056e38b ("powerpc/vdso: Prepare for switching VDSO to generic C implementation.") Cc: stable@vger.kernel.org # v5.11+ Reported-by: Alan Modra Signed-off-by: Michael Ellerman Reviewed-by: Segher Boessenkool Link: https://lore.kernel.org/r/20220502125010.1319370-1-mpe@ellerman.id.au --- arch/powerpc/kernel/vdso/gettimeofday.S | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index eb9c81e1c2185..0c4ecc8fec5a6 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -22,12 +22,15 @@ .macro cvdso_call funct call_time=0 .cfi_startproc PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM mflr r0 - .cfi_register lr, r0 PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) + .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF #ifdef __powerpc64__ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif get_datapage r5 .ifeq \call_time @@ -39,13 +42,15 @@ PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_restore r2 #endif .ifeq \call_time cmpwi r3, 0 .endif mtlr r0 - .cfi_restore lr addi r1, r1, 2 * PPC_MIN_STKFRM + .cfi_restore lr + .cfi_def_cfa_offset 0 crclr so .ifeq \call_time beqlr+ From 392bf51946c2463436a1ba237c1ec5865b234825 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 4 May 2022 13:39:58 +0100 Subject: [PATCH 167/491] iommu: Make sysfs robust for non-API groups Groups created by VFIO backends outside the core IOMMU API should never be passed directly into the API itself, however they still expose their standard sysfs attributes, so we can still stumble across them that way. Take care to consider those cases before jumping into our normal assumptions of a fully-initialised core API group. Fixes: 3f6634d997db ("iommu: Use right way to retrieve iommu_ops") Reported-by: Jan Stancek Tested-by: Jan Stancek Reviewed-by: Jason Gunthorpe Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/86ada41986988511a8424e84746dfe9ba7f87573.1651667683.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f2c45b85b9fc2..857d4c2fd1a20 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -506,6 +506,13 @@ int iommu_get_group_resv_regions(struct iommu_group *group, list_for_each_entry(device, &group->devices, list) { struct list_head dev_resv_regions; + /* + * Non-API groups still expose reserved_regions in sysfs, + * so filter out calls that get here that way. + */ + if (!device->dev->iommu) + break; + INIT_LIST_HEAD(&dev_resv_regions); iommu_get_resv_regions(device->dev, &dev_resv_regions); ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); @@ -3019,7 +3026,7 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; - if (WARN_ON(!group)) + if (WARN_ON(!group) || !group->default_domain) return -EINVAL; if (sysfs_streq(buf, "identity")) From 19965d8259fdabc6806da92adda49684f5bcbec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 27 Apr 2022 01:57:15 +0200 Subject: [PATCH 168/491] drm/amdgpu: do not use passthrough mode in Xen dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While technically Xen dom0 is a virtual machine too, it does have access to most of the hardware so it doesn't need to be considered a "passthrough". Commit b818a5d37454 ("drm/amdgpu/gmc: use PCI BARs for APUs in passthrough") changed how FB is accessed based on passthrough mode. This breaks amdgpu in Xen dom0 with message like this: [drm:dc_dmub_srv_wait_idle [amdgpu]] *ERROR* Error waiting for DMUB idle: status=3 While the reason for this failure is unclear, the passthrough mode is not really necessary in Xen dom0 anyway. So, to unbreak booting affected kernels, disable passthrough mode in this case. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1985 Fixes: b818a5d37454 ("drm/amdgpu/gmc: use PCI BARs for APUs in passthrough") Signed-off-by: Marek Marczykowski-Górecki Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a025f080aa6a6..5e3756643da3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -24,6 +24,7 @@ #include #include +#include #include "amdgpu.h" #include "amdgpu_ras.h" @@ -710,7 +711,8 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; if (!reg) { - if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ + /* passthrough mode exclus sriov mod */ + if (is_virtual_machine() && !xen_initial_domain()) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } From 3dfe85fa87b2a26bdbd292b66653bba065cf9941 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 19 Apr 2022 13:03:12 -0400 Subject: [PATCH 169/491] drm/amd/display: Avoid reading audio pattern past AUDIO_CHANNELS_COUNT A faulty receiver might report an erroneous channel count. We should guard against reading beyond AUDIO_CHANNELS_COUNT as that would overflow the dpcd_pattern_period array. Signed-off-by: Harry Wentland Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 22dabe596dfcc..95b5b5bfa1ffa 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4440,7 +4440,7 @@ static void dp_test_get_audio_test_data(struct dc_link *link, bool disable_video &dpcd_pattern_type.value, sizeof(dpcd_pattern_type)); - channel_count = dpcd_test_mode.bits.channel_count + 1; + channel_count = min(dpcd_test_mode.bits.channel_count + 1, AUDIO_CHANNELS_COUNT); // read pattern periods for requested channels when sawTooth pattern is requested if (dpcd_pattern_type.value == AUDIO_TEST_PATTERN_SAWTOOTH || From 770fb0942c338545f93a584342b64848cff31efe Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 4 May 2022 11:07:45 -0700 Subject: [PATCH 170/491] MAINTAINERS: Update Josh Poimboeuf's email address Change to my kernel.org email address. Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/1abc3de4b00dc6f915ac975a2ec29ed545d96dc4.1651687652.git.jpoimboe@redhat.com --- MAINTAINERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index edc96cdb85e85..1e1a2264792dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7499,7 +7499,7 @@ F: Documentation/hwmon/f71805f.rst F: drivers/hwmon/f71805f.c FADDR2LINE -M: Josh Poimboeuf +M: Josh Poimboeuf S: Maintained F: scripts/faddr2line @@ -11348,7 +11348,7 @@ F: drivers/mmc/host/litex_mmc.c N: litex LIVE PATCHING -M: Josh Poimboeuf +M: Josh Poimboeuf M: Jiri Kosina M: Miroslav Benes M: Petr Mladek @@ -14224,7 +14224,7 @@ F: lib/objagg.c F: lib/test_objagg.c OBJTOOL -M: Josh Poimboeuf +M: Josh Poimboeuf M: Peter Zijlstra S: Supported F: tools/objtool/ @@ -18792,7 +18792,7 @@ F: include/dt-bindings/reset/starfive-jh7100.h STATIC BRANCH/CALL M: Peter Zijlstra -M: Josh Poimboeuf +M: Josh Poimboeuf M: Jason Baron R: Steven Rostedt R: Ard Biesheuvel @@ -21444,7 +21444,7 @@ F: arch/x86/kernel/apic/x2apic_uv_x.c F: arch/x86/platform/uv/ X86 STACK UNWINDING -M: Josh Poimboeuf +M: Josh Poimboeuf M: Peter Zijlstra S: Supported F: arch/x86/include/asm/unwind*.h From b2b701b31e1c5827957c88e5c1f0c3dde1f55b2f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 29 Apr 2022 14:46:11 -0500 Subject: [PATCH 171/491] dt-bindings: pinctrl: Allow values for drive-push-pull and drive-open-drain A few platforms, at91 and tegra, use drive-push-pull and drive-open-drain with a 0 or 1 value. There's not really a need for values as '1' should be equivalent to no value (it wasn't treated that way) and drive-push-pull disabled is equivalent to drive-open-drain. So dropping the value can't be done without breaking existing OSs. As we don't want new cases, mark the case with values as deprecated. Cc: Arnd Bergmann Cc: Thierry Reding Cc: Jonathan Hunter Cc: Nicolas Ferre Cc: Claudiu Beznea Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220429194610.2741437-1-robh@kernel.org --- .../devicetree/bindings/pinctrl/pincfg-node.yaml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml index 4b22a9e3a4471..f5a121311f612 100644 --- a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml +++ b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml @@ -52,11 +52,19 @@ properties: hardware supporting it the pull strength in Ohm. drive-push-pull: - type: boolean + oneOf: + - type: boolean + - $ref: /schemas/types.yaml#/definitions/uint32 + enum: [ 0, 1 ] + deprecated: true description: drive actively high and low drive-open-drain: - type: boolean + oneOf: + - type: boolean + - $ref: /schemas/types.yaml#/definitions/uint32 + const: 1 # No known cases of 0 + deprecated: true description: drive with open drain drive-open-source: From caf83e494de965dbb5f8add655c526b9af6a96cb Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 3 May 2022 11:27:38 -0500 Subject: [PATCH 172/491] dt-bindings: Drop redundant 'maxItems/minItems' in if/then schemas Another round of removing redundant minItems/maxItems when 'items' list is specified. This time it is in if/then schemas as the meta-schema was failing to check this case. If a property has an 'items' list, then a 'minItems' or 'maxItems' with the same size as the list is redundant and can be dropped. Note that is DT schema specific behavior and not standard json-schema behavior. The tooling will fixup the final schema adding any unspecified minItems/maxItems. Signed-off-by: Rob Herring Acked-By: Vinod Koul Acked-by: Marc Kleine-Budde Acked-by: Mark Brown Acked-by: Ulf Hansson # For MMC Acked-by: Jonathan Cameron #for IIO Link: https://lore.kernel.org/r/20220503162738.3827041-1-robh@kernel.org --- .../bindings/clock/imx8m-clock.yaml | 4 ---- .../bindings/display/bridge/renesas,lvds.yaml | 4 ---- .../bindings/display/renesas,du.yaml | 23 ------------------- .../bindings/iio/adc/st,stm32-adc.yaml | 2 -- .../bindings/mmc/nvidia,tegra20-sdhci.yaml | 7 +----- .../devicetree/bindings/mtd/gpmi-nand.yaml | 2 -- .../bindings/net/can/bosch,c_can.yaml | 3 --- .../bindings/phy/brcm,sata-phy.yaml | 10 ++++---- .../bindings/rtc/allwinner,sun6i-a31-rtc.yaml | 10 -------- .../bindings/serial/samsung_uart.yaml | 4 ---- .../sound/allwinner,sun4i-a10-i2s.yaml | 1 - .../bindings/sound/ti,j721e-cpb-audio.yaml | 2 -- .../bindings/thermal/rcar-gen3-thermal.yaml | 1 - 13 files changed, 5 insertions(+), 68 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml index 625f573a7b90e..458c7645ee683 100644 --- a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml @@ -55,8 +55,6 @@ allOf: then: properties: clocks: - minItems: 7 - maxItems: 7 items: - description: 32k osc - description: 25m osc @@ -66,8 +64,6 @@ allOf: - description: ext3 clock input - description: ext4 clock input clock-names: - minItems: 7 - maxItems: 7 items: - const: ckil - const: osc_25m diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml index a51baf8a4c768..bb9dbfb9beaf5 100644 --- a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml +++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml @@ -95,7 +95,6 @@ then: properties: clocks: minItems: 1 - maxItems: 4 items: - description: Functional clock - description: EXTAL input clock @@ -104,7 +103,6 @@ then: clock-names: minItems: 1 - maxItems: 4 items: - const: fck # The LVDS encoder can use the EXTAL or DU_DOTCLKINx clocks. @@ -128,12 +126,10 @@ then: else: properties: clocks: - maxItems: 1 items: - description: Functional clock clock-names: - maxItems: 1 items: - const: fck diff --git a/Documentation/devicetree/bindings/display/renesas,du.yaml b/Documentation/devicetree/bindings/display/renesas,du.yaml index 56cedcd6d5768..b3e588022082d 100644 --- a/Documentation/devicetree/bindings/display/renesas,du.yaml +++ b/Documentation/devicetree/bindings/display/renesas,du.yaml @@ -109,7 +109,6 @@ allOf: properties: clocks: minItems: 1 - maxItems: 3 items: - description: Functional clock - description: DU_DOTCLKIN0 input clock @@ -117,7 +116,6 @@ allOf: clock-names: minItems: 1 - maxItems: 3 items: - const: du.0 - pattern: '^dclkin\.[01]$' @@ -159,7 +157,6 @@ allOf: properties: clocks: minItems: 2 - maxItems: 4 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -168,7 +165,6 @@ allOf: clock-names: minItems: 2 - maxItems: 4 items: - const: du.0 - const: du.1 @@ -216,7 +212,6 @@ allOf: properties: clocks: minItems: 2 - maxItems: 4 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -225,7 +220,6 @@ allOf: clock-names: minItems: 2 - maxItems: 4 items: - const: du.0 - const: du.1 @@ -271,7 +265,6 @@ allOf: properties: clocks: minItems: 2 - maxItems: 4 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -280,7 +273,6 @@ allOf: clock-names: minItems: 2 - maxItems: 4 items: - const: du.0 - const: du.1 @@ -327,7 +319,6 @@ allOf: properties: clocks: minItems: 2 - maxItems: 4 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -336,7 +327,6 @@ allOf: clock-names: minItems: 2 - maxItems: 4 items: - const: du.0 - const: du.1 @@ -386,7 +376,6 @@ allOf: properties: clocks: minItems: 3 - maxItems: 6 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -397,7 +386,6 @@ allOf: clock-names: minItems: 3 - maxItems: 6 items: - const: du.0 - const: du.1 @@ -448,7 +436,6 @@ allOf: properties: clocks: minItems: 4 - maxItems: 8 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -461,7 +448,6 @@ allOf: clock-names: minItems: 4 - maxItems: 8 items: - const: du.0 - const: du.1 @@ -525,7 +511,6 @@ allOf: properties: clocks: minItems: 3 - maxItems: 6 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -536,7 +521,6 @@ allOf: clock-names: minItems: 3 - maxItems: 6 items: - const: du.0 - const: du.1 @@ -596,7 +580,6 @@ allOf: properties: clocks: minItems: 3 - maxItems: 6 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -607,7 +590,6 @@ allOf: clock-names: minItems: 3 - maxItems: 6 items: - const: du.0 - const: du.1 @@ -666,14 +648,12 @@ allOf: properties: clocks: minItems: 1 - maxItems: 2 items: - description: Functional clock for DU0 - description: DU_DOTCLKIN0 input clock clock-names: minItems: 1 - maxItems: 2 items: - const: du.0 - const: dclkin.0 @@ -723,7 +703,6 @@ allOf: properties: clocks: minItems: 2 - maxItems: 4 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -732,7 +711,6 @@ allOf: clock-names: minItems: 2 - maxItems: 4 items: - const: du.0 - const: du.1 @@ -791,7 +769,6 @@ allOf: - description: Functional clock clock-names: - maxItems: 1 items: - const: du.0 diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml index 4d6074518b5cd..fa8da42cb1e6b 100644 --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml @@ -138,7 +138,6 @@ allOf: - const: bus - const: adc minItems: 1 - maxItems: 2 interrupts: items: @@ -170,7 +169,6 @@ allOf: - const: bus - const: adc minItems: 1 - maxItems: 2 interrupts: items: diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml index f3f4d5b027448..fe0270207622d 100644 --- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml +++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml @@ -202,22 +202,17 @@ allOf: clocks: items: - description: module clock - minItems: 1 - maxItems: 1 else: properties: clocks: items: - description: module clock - description: timeout clock - minItems: 2 - maxItems: 2 + clock-names: items: - const: sdhci - const: tmclk - minItems: 2 - maxItems: 2 required: - clock-names diff --git a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml index 9d764e654e1db..849aeae319a92 100644 --- a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml +++ b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml @@ -147,8 +147,6 @@ allOf: - description: SoC gpmi io clock - description: SoC gpmi bch apb clock clock-names: - minItems: 2 - maxItems: 2 items: - const: gpmi_io - const: gpmi_bch_apb diff --git a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml index 8bad328b184df..51aa89ac7e850 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml @@ -80,8 +80,6 @@ if: then: properties: interrupts: - minItems: 4 - maxItems: 4 items: - description: Error and status IRQ - description: Message object IRQ @@ -91,7 +89,6 @@ then: else: properties: interrupts: - maxItems: 1 items: - description: Error and status IRQ diff --git a/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml b/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml index cb1aa325336f8..435b971dfd9be 100644 --- a/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml +++ b/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml @@ -102,19 +102,17 @@ if: then: properties: reg: - maxItems: 2 + minItems: 2 + reg-names: - items: - - const: "phy" - - const: "phy-ctrl" + minItems: 2 else: properties: reg: maxItems: 1 + reg-names: maxItems: 1 - items: - - const: "phy" required: - compatible diff --git a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml index 0b767fec39d87..6b38bd7eb3b45 100644 --- a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml @@ -71,7 +71,6 @@ allOf: then: properties: clock-output-names: - minItems: 1 maxItems: 1 - if: @@ -102,7 +101,6 @@ allOf: properties: clock-output-names: minItems: 3 - maxItems: 3 - if: properties: @@ -113,16 +111,12 @@ allOf: then: properties: clocks: - minItems: 3 - maxItems: 3 items: - description: Bus clock for register access - description: 24 MHz oscillator - description: 32 kHz clock from the CCU clock-names: - minItems: 3 - maxItems: 3 items: - const: bus - const: hosc @@ -142,7 +136,6 @@ allOf: properties: clocks: minItems: 3 - maxItems: 4 items: - description: Bus clock for register access - description: 24 MHz oscillator @@ -151,7 +144,6 @@ allOf: clock-names: minItems: 3 - maxItems: 4 items: - const: bus - const: hosc @@ -174,14 +166,12 @@ allOf: then: properties: interrupts: - minItems: 1 maxItems: 1 else: properties: interrupts: minItems: 2 - maxItems: 2 required: - "#clock-cells" diff --git a/Documentation/devicetree/bindings/serial/samsung_uart.yaml b/Documentation/devicetree/bindings/serial/samsung_uart.yaml index d4688e317fc54..901c1e2cea28c 100644 --- a/Documentation/devicetree/bindings/serial/samsung_uart.yaml +++ b/Documentation/devicetree/bindings/serial/samsung_uart.yaml @@ -100,7 +100,6 @@ allOf: maxItems: 3 clock-names: minItems: 2 - maxItems: 3 items: - const: uart - pattern: '^clk_uart_baud[0-1]$' @@ -118,11 +117,8 @@ allOf: then: properties: clocks: - minItems: 2 maxItems: 2 clock-names: - minItems: 2 - maxItems: 2 items: - const: uart - const: clk_uart_baud0 diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml index c21c807b667c4..34f6ee9de3922 100644 --- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml +++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml @@ -89,7 +89,6 @@ allOf: properties: dmas: minItems: 1 - maxItems: 2 items: - description: RX DMA Channel - description: TX DMA Channel diff --git a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml index 6806f53a4aed4..20ea5883b7ff6 100644 --- a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml +++ b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml @@ -80,7 +80,6 @@ allOf: then: properties: clocks: - minItems: 6 items: - description: AUXCLK clock for McASP used by CPB audio - description: Parent for CPB_McASP auxclk (for 48KHz) @@ -107,7 +106,6 @@ allOf: then: properties: clocks: - maxItems: 4 items: - description: AUXCLK clock for McASP used by CPB audio - description: Parent for CPB_McASP auxclk (for 48KHz) diff --git a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml index f963204e0b162..1368d90da0e85 100644 --- a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml @@ -67,7 +67,6 @@ then: properties: reg: minItems: 2 - maxItems: 3 items: - description: TSC1 registers - description: TSC2 registers From 5dc4630426511f641b7ac44fc550b8e21eafb237 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 2 May 2022 18:13:08 +0900 Subject: [PATCH 173/491] dt-bindings: pci: apple,pcie: Drop max-link-speed from example We no longer use these since 111659c2a570 (and they never worked anyway); drop them from the example to avoid confusion. Fixes: 111659c2a570 ("arm64: dts: apple: t8103: Remove PCIe max-link-speed properties") Signed-off-by: Hector Martin Reviewed-by: Alyssa Rosenzweig Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220502091308.28233-1-marcan@marcan.st --- Documentation/devicetree/bindings/pci/apple,pcie.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/apple,pcie.yaml b/Documentation/devicetree/bindings/pci/apple,pcie.yaml index 7f01e15fc81c2..daf602ac0d0fd 100644 --- a/Documentation/devicetree/bindings/pci/apple,pcie.yaml +++ b/Documentation/devicetree/bindings/pci/apple,pcie.yaml @@ -142,7 +142,6 @@ examples: device_type = "pci"; reg = <0x0 0x0 0x0 0x0 0x0>; reset-gpios = <&pinctrl_ap 152 0>; - max-link-speed = <2>; #address-cells = <3>; #size-cells = <2>; @@ -153,7 +152,6 @@ examples: device_type = "pci"; reg = <0x800 0x0 0x0 0x0 0x0>; reset-gpios = <&pinctrl_ap 153 0>; - max-link-speed = <2>; #address-cells = <3>; #size-cells = <2>; @@ -164,7 +162,6 @@ examples: device_type = "pci"; reg = <0x1000 0x0 0x0 0x0 0x0>; reset-gpios = <&pinctrl_ap 33 0>; - max-link-speed = <1>; #address-cells = <3>; #size-cells = <2>; From ef91271c65c12d36e4c2b61c61d4849fb6d11aa0 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Sun, 24 Apr 2022 16:01:03 +0800 Subject: [PATCH 174/491] RDMA/siw: Fix a condition race issue in MPA request processing The calling of siw_cm_upcall and detaching new_cep with its listen_cep should be atomistic semantics. Otherwise siw_reject may be called in a temporary state, e,g, siw_cm_upcall is called but the new_cep->listen_cep has not being cleared. This fixes a WARN: WARNING: CPU: 7 PID: 201 at drivers/infiniband/sw/siw/siw_cm.c:255 siw_cep_put+0x125/0x130 [siw] CPU: 2 PID: 201 Comm: kworker/u16:22 Kdump: loaded Tainted: G E 5.17.0-rc7 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Workqueue: iw_cm_wq cm_work_handler [iw_cm] RIP: 0010:siw_cep_put+0x125/0x130 [siw] Call Trace: siw_reject+0xac/0x180 [siw] iw_cm_reject+0x68/0xc0 [iw_cm] cm_work_handler+0x59d/0xe20 [iw_cm] process_one_work+0x1e2/0x3b0 worker_thread+0x50/0x3a0 ? rescuer_thread+0x390/0x390 kthread+0xe5/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Link: https://lore.kernel.org/r/d528d83466c44687f3872eadcb8c184528b2e2d4.1650526554.git.chengyou@linux.alibaba.com Reported-by: Luis Chamberlain Reviewed-by: Bernard Metzler Signed-off-by: Cheng Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_cm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 7acdd3c3a599d..17f34d584cd9e 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -968,14 +968,15 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_set_inuse(new_cep); rv = siw_proc_mpareq(new_cep); - siw_cep_set_free(new_cep); - if (rv != -EAGAIN) { siw_cep_put(cep); new_cep->listen_cep = NULL; - if (rv) + if (rv) { + siw_cep_set_free(new_cep); goto error; + } } + siw_cep_set_free(new_cep); } return; From a926a903b7dc39a8a949150258c09290998dd812 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 4 May 2022 15:28:17 -0500 Subject: [PATCH 175/491] RDMA/rxe: Do not call dev_mc_add/del() under a spinlock These routines were not intended to be called under a spinlock and will throw debugging warnings: raw_local_irq_restore() called with IRQs enabled WARNING: CPU: 13 PID: 3107 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x2f/0x50 CPU: 13 PID: 3107 Comm: python3 Tainted: G E 5.18.0-rc1+ #7 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 RIP: 0010:warn_bogus_irq_restore+0x2f/0x50 Call Trace: _raw_spin_unlock_irqrestore+0x75/0x80 rxe_attach_mcast+0x304/0x480 [rdma_rxe] ib_attach_mcast+0x88/0xa0 [ib_core] ib_uverbs_attach_mcast+0x186/0x1e0 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xcd/0x140 [ib_uverbs] ib_uverbs_cmd_verbs+0xdb0/0xea0 [ib_uverbs] ib_uverbs_ioctl+0xd2/0x160 [ib_uverbs] do_syscall_64+0x5c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Move them out of the spinlock, it is OK if there is some races setting up the MC reception at the ethernet layer with rbtree lookups. Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") Link: https://lore.kernel.org/r/20220504202817.98247-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mcast.c | 51 ++++++++++++--------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index ae8f11cb704af..77e45cabd8ea0 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -38,13 +38,13 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) } /** - * rxe_mcast_delete - delete multicast address from rxe device + * rxe_mcast_del - delete multicast address from rxe device * @rxe: rxe device object * @mgid: multicast address as a gid * * Returns 0 on success else an error */ -static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) +static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) { unsigned char ll_addr[ETH_ALEN]; @@ -159,17 +159,10 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) * @mcg: new mcg object * * Context: caller should hold rxe->mcg lock - * Returns: 0 on success else an error */ -static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, - struct rxe_mcg *mcg) +static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, + struct rxe_mcg *mcg) { - int err; - - err = rxe_mcast_add(rxe, mgid); - if (unlikely(err)) - return err; - kref_init(&mcg->ref_cnt); memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); INIT_LIST_HEAD(&mcg->qp_list); @@ -184,8 +177,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, */ kref_get(&mcg->ref_cnt); __rxe_insert_mcg(mcg); - - return 0; } /** @@ -209,6 +200,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) if (mcg) return mcg; + /* check to see if we have reached limit */ + if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { + err = -ENOMEM; + goto err_dec; + } + /* speculative alloc of new mcg */ mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); if (!mcg) @@ -218,27 +215,23 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) /* re-check to see if someone else just added it */ tmp = __rxe_lookup_mcg(rxe, mgid); if (tmp) { + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + atomic_dec(&rxe->mcg_num); kfree(mcg); - mcg = tmp; - goto out; - } - - if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { - err = -ENOMEM; - goto err_dec; + return tmp; } - err = __rxe_init_mcg(rxe, mgid, mcg); - if (err) - goto err_dec; -out: + __rxe_init_mcg(rxe, mgid, mcg); spin_unlock_irqrestore(&rxe->mcg_lock, flags); - return mcg; + /* add mcast address outside of lock */ + err = rxe_mcast_add(rxe, mgid); + if (!err) + return mcg; + + kfree(mcg); err_dec: atomic_dec(&rxe->mcg_num); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); - kfree(mcg); return ERR_PTR(err); } @@ -268,7 +261,6 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) __rxe_remove_mcg(mcg); kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); - rxe_mcast_delete(mcg->rxe, &mcg->mgid); atomic_dec(&rxe->mcg_num); } @@ -282,6 +274,9 @@ static void rxe_destroy_mcg(struct rxe_mcg *mcg) { unsigned long flags; + /* delete mcast address outside of lock */ + rxe_mcast_del(mcg->rxe, &mcg->mgid); + spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); __rxe_destroy_mcg(mcg); spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); From bfdc0edd11f9501b891a069b5bbd3b16731941e1 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 4 May 2022 15:28:17 -0500 Subject: [PATCH 176/491] RDMA/rxe: Change mcg_lock to a _bh lock rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock while rxe_recv.c uses _bh spinlocks for the same lock. As there is no case where the mcg_lock can be taken from an IRQ, change these all to bh locks so we don't have confusing mismatched lock types on the same spinlock. Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") Link: https://lore.kernel.org/r/20220504202817.98247-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++---------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index 77e45cabd8ea0..873a9b10307c0 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) { struct rxe_mcg *mcg; - unsigned long flags; - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); mcg = __rxe_lookup_mcg(rxe, mgid); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return mcg; } @@ -189,7 +188,6 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) { struct rxe_mcg *mcg, *tmp; - unsigned long flags; int err; if (rxe->attr.max_mcast_grp == 0) @@ -211,18 +209,18 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) if (!mcg) return ERR_PTR(-ENOMEM); - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); /* re-check to see if someone else just added it */ tmp = __rxe_lookup_mcg(rxe, mgid); if (tmp) { - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); atomic_dec(&rxe->mcg_num); kfree(mcg); return tmp; } __rxe_init_mcg(rxe, mgid, mcg); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); /* add mcast address outside of lock */ err = rxe_mcast_add(rxe, mgid); @@ -272,14 +270,12 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) */ static void rxe_destroy_mcg(struct rxe_mcg *mcg) { - unsigned long flags; - /* delete mcast address outside of lock */ rxe_mcast_del(mcg->rxe, &mcg->mgid); - spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); + spin_lock_bh(&mcg->rxe->mcg_lock); __rxe_destroy_mcg(mcg); - spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); + spin_unlock_bh(&mcg->rxe->mcg_lock); } /** @@ -334,25 +330,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) { struct rxe_dev *rxe = mcg->rxe; struct rxe_mca *mca, *tmp; - unsigned long flags; int err; /* check to see if the qp is already a member of the group */ - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); list_for_each_entry(mca, &mcg->qp_list, qp_list) { if (mca->qp == qp) { - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return 0; } } - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); /* speculative alloc new mca without using GFP_ATOMIC */ mca = kzalloc(sizeof(*mca), GFP_KERNEL); if (!mca) return -ENOMEM; - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); /* re-check to see if someone else just attached qp */ list_for_each_entry(tmp, &mcg->qp_list, qp_list) { if (tmp->qp == qp) { @@ -366,7 +361,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) if (err) kfree(mca); out: - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return err; } @@ -400,9 +395,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) { struct rxe_dev *rxe = mcg->rxe; struct rxe_mca *mca, *tmp; - unsigned long flags; - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { if (mca->qp == qp) { __rxe_cleanup_mca(mca, mcg); @@ -416,13 +410,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) if (atomic_read(&mcg->qp_num) <= 0) __rxe_destroy_mcg(mcg); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return 0; } } /* we didn't find the qp on the list */ - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return -EINVAL; } From 59f5ede3bc0f00eb856425f636dab0c10feb06d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 1 May 2022 21:31:43 +0200 Subject: [PATCH 177/491] x86/fpu: Prevent FPU state corruption The FPU usage related to task FPU management is either protected by disabling interrupts (switch_to, return to user) or via fpregs_lock() which is a wrapper around local_bh_disable(). When kernel code wants to use the FPU then it has to check whether it is possible by calling irq_fpu_usable(). But the condition in irq_fpu_usable() is wrong. It allows FPU to be used when: !in_interrupt() || interrupted_user_mode() || interrupted_kernel_fpu_idle() The latter is checking whether some other context already uses FPU in the kernel, but if that's not the case then it allows FPU to be used unconditionally even if the calling context interrupted a fpregs_lock() critical region. If that happens then the FPU state of the interrupted context becomes corrupted. Allow in kernel FPU usage only when no other context has in kernel FPU usage and either the calling context is not hard interrupt context or the hard interrupt did not interrupt a local bottomhalf disabled region. It's hard to find a proper Fixes tag as the condition was broken in one way or the other for a very long time and the eager/lazy FPU changes caused a lot of churn. Picked something remotely connected from the history. This survived undetected for quite some time as FPU usage in interrupt context is rare, but the recent changes to the random code unearthed it at least on a kernel which had FPU debugging enabled. There is probably a higher rate of silent corruption as not all issues can be detected by the FPU debugging code. This will be addressed in a subsequent change. Fixes: 5d2bd7009f30 ("x86, fpu: decouple non-lazy/eager fpu restore from xsave") Reported-by: Filipe Manana Signed-off-by: Thomas Gleixner Tested-by: Filipe Manana Reviewed-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220501193102.588689270@linutronix.de --- arch/x86/kernel/fpu/core.c | 67 +++++++++++++++----------------------- 1 file changed, 26 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c049561f373a0..e28ab0ecc5378 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -41,17 +41,7 @@ struct fpu_state_config fpu_user_cfg __ro_after_init; */ struct fpstate init_fpstate __ro_after_init; -/* - * Track whether the kernel is using the FPU state - * currently. - * - * This flag is used: - * - * - by IRQ context code to potentially use the FPU - * if it's unused. - * - * - to debug kernel_fpu_begin()/end() correctness - */ +/* Track in-kernel FPU usage */ static DEFINE_PER_CPU(bool, in_kernel_fpu); /* @@ -59,42 +49,37 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu); */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -static bool kernel_fpu_disabled(void) -{ - return this_cpu_read(in_kernel_fpu); -} - -static bool interrupted_kernel_fpu_idle(void) -{ - return !kernel_fpu_disabled(); -} - -/* - * Were we in user mode (or vm86 mode) when we were - * interrupted? - * - * Doing kernel_fpu_begin/end() is ok if we are running - * in an interrupt context from user mode - we'll just - * save the FPU state as required. - */ -static bool interrupted_user_mode(void) -{ - struct pt_regs *regs = get_irq_regs(); - return regs && user_mode(regs); -} - /* * Can we use the FPU in kernel mode with the * whole "kernel_fpu_begin/end()" sequence? - * - * It's always ok in process context (ie "not interrupt") - * but it is sometimes ok even from an irq. */ bool irq_fpu_usable(void) { - return !in_interrupt() || - interrupted_user_mode() || - interrupted_kernel_fpu_idle(); + if (WARN_ON_ONCE(in_nmi())) + return false; + + /* In kernel FPU usage already active? */ + if (this_cpu_read(in_kernel_fpu)) + return false; + + /* + * When not in NMI or hard interrupt context, FPU can be used in: + * + * - Task context except from within fpregs_lock()'ed critical + * regions. + * + * - Soft interrupt processing context which cannot happen + * while in a fpregs_lock()'ed critical region. + */ + if (!in_hardirq()) + return true; + + /* + * In hard interrupt context it's safe when soft interrupts + * are enabled, which means the interrupt did not hit in + * a fpregs_lock()'ed critical region. + */ + return !softirq_count(); } EXPORT_SYMBOL(irq_fpu_usable); From ae2de669c14a18b5144cdacf49933ad400ed7e1c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:15 +0200 Subject: [PATCH 178/491] wireguard: selftests: make routing loop test non-fatal I hate to do this, but I still do not have a good solution to actually fix this bug across architectures. So just disable it for now, so that the CI can still deliver actionable results. This commit adds a large red warning, so that at least the failure isn't lost forever, and hopefully this can be revisited down the line. Link: https://lore.kernel.org/netdev/CAHmME9pv1x6C4TNdL6648HydD8r+txpV4hTUXOBVkrapBXH4QQ@mail.gmail.com/ Link: https://lore.kernel.org/netdev/YmszSXueTxYOC41G@zx2c4.com/ Link: https://lore.kernel.org/wireguard/CAHmME9rNnBiNvBstb7MPwK-7AmAN0sOfnhdR=eeLrowWcKxaaQ@mail.gmail.com/ Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/netns.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 8a9461aa0878a..8a543200a61ad 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -280,7 +280,19 @@ read _ _ tx_bytes_before < <(n0 wg show wg1 transfer) ! n0 ping -W 1 -c 10 -f 192.168.241.2 || false sleep 1 read _ _ tx_bytes_after < <(n0 wg show wg1 transfer) -(( tx_bytes_after - tx_bytes_before < 70000 )) +if ! (( tx_bytes_after - tx_bytes_before < 70000 )); then + errstart=$'\x1b[37m\x1b[41m\x1b[1m' + errend=$'\x1b[0m' + echo "${errstart} ${errend}" + echo "${errstart} E R R O R ${errend}" + echo "${errstart} ${errend}" + echo "${errstart} This architecture does not do the right thing ${errend}" + echo "${errstart} with cross-namespace routing loops. This test ${errend}" + echo "${errstart} has thus technically failed but, as this issue ${errend}" + echo "${errstart} is as yet unsolved, these tests will continue ${errend}" + echo "${errstart} onward. :( ${errend}" + echo "${errstart} ${errend}" +fi ip0 link del wg1 ip1 link del wg0 From 39f02bf1e5ce9d72045de01e3d618ade1067158c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:16 +0200 Subject: [PATCH 179/491] wireguard: selftests: limit parallelism to $(nproc) tests at once The parallel tests were added to catch queueing issues from multiple cores. But what happens in reality when testing tons of processes is that these separate threads wind up fighting with the scheduler, and we wind up with contention in places we don't care about that decrease the chances of hitting a bug. So just do a test with the number of CPU cores, rather than trying to scale up arbitrarily. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/netns.sh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 8a543200a61ad..69c7796c7ca92 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -22,10 +22,12 @@ # interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further # details on how this is accomplished. set -e +shopt -s extglob exec 3>&1 export LANG=C export WG_HIDE_KEYS=never +NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]} netns0="wg-test-$$-0" netns1="wg-test-$$-1" netns2="wg-test-$$-2" @@ -143,17 +145,15 @@ tests() { n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 # TCP over IPv4, in parallel - for max in 4 5 50; do - local pids=( ) - for ((i=0; i < max; ++i)) do - n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & - pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) - done - for ((i=0; i < max; ++i)) do - n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & - done - wait "${pids[@]}" + local pids=( ) i + for ((i=0; i < NPROC; ++i)) do + n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & + pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) done + for ((i=0; i < NPROC; ++i)) do + n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & + done + wait "${pids[@]}" } [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" From d5d9b29bc963cc084c5c0f3a7c28e2632a22e0c4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:17 +0200 Subject: [PATCH 180/491] wireguard: selftests: use newer toolchains to fill out architectures Rather than relying on the system to have cross toolchains available, simply download musl.cc's ones and use that libc.so, and then we use it to fill in a few missing platforms, such as riscv64, riscv64, powerpc64, and s390x. Since riscv doesn't have a second serial port in its device description, we have to use virtio's vport. This is actually the same situation on ARM, but we were previously hacking QEMU up to work around this, which required a custom QEMU. Instead just do the vport trick on ARM too. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- .../testing/selftests/wireguard/qemu/Makefile | 169 ++++++++++++------ .../wireguard/qemu/arch/aarch64.config | 5 +- .../wireguard/qemu/arch/aarch64_be.config | 5 +- .../selftests/wireguard/qemu/arch/arm.config | 5 +- .../wireguard/qemu/arch/armeb.config | 5 +- .../wireguard/qemu/arch/powerpc64.config | 13 ++ .../wireguard/qemu/arch/riscv32.config | 12 ++ .../wireguard/qemu/arch/riscv64.config | 12 ++ .../wireguard/qemu/arch/s390x.config | 6 + 9 files changed, 169 insertions(+), 63 deletions(-) create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc64.config create mode 100644 tools/testing/selftests/wireguard/qemu/arch/riscv32.config create mode 100644 tools/testing/selftests/wireguard/qemu/arch/riscv64.config create mode 100644 tools/testing/selftests/wireguard/qemu/arch/s390x.config diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 4bdd6c1a19d35..930f59c9e7148 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -4,26 +4,24 @@ PWD := $(shell pwd) -CHOST := $(shell gcc -dumpmachine) -HOST_ARCH := $(firstword $(subst -, ,$(CHOST))) -ifneq (,$(ARCH)) -CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc)))))) -ifeq (,$(CBUILD)) -$(error The toolchain for $(ARCH) is not installed) -endif -else -CBUILD := $(CHOST) -ARCH := $(firstword $(subst -, ,$(CBUILD))) -endif - # Set these from the environment to override KERNEL_PATH ?= $(PWD)/../../../../.. BUILD_PATH ?= $(PWD)/build/$(ARCH) DISTFILES_PATH ?= $(PWD)/distfiles NR_CPUS ?= 4 +ARCH ?= +CBUILD := $(shell gcc -dumpmachine) +HOST_ARCH := $(firstword $(subst -, ,$(CBUILD))) +ifeq ($(ARCH),) +ARCH := $(HOST_ARCH) +endif MIRROR := https://download.wireguard.com/qemu-test/distfiles/ +KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) +WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) + default: qemu # variable name, tarball project name, version, tarball extension, default URI base @@ -36,12 +34,11 @@ $(call file_download,$$($(1)_NAME)$(4),$(5),$(6)) endef define file_download = -$(DISTFILES_PATH)/$(1): +$(DISTFILES_PATH)/$(1): | $(4) mkdir -p $(DISTFILES_PATH) - flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\) \($(1)\)\$$$$#\1 $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3) $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' endef -$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8)) $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) $(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692)) @@ -50,28 +47,20 @@ $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) $(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64)) -KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) -rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) -WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) - -export CFLAGS ?= -O3 -pipe -export LDFLAGS ?= -export CPPFLAGS := -I$(BUILD_PATH)/include - +export CFLAGS := -O3 -pipe ifeq ($(HOST_ARCH),$(ARCH)) -CROSS_COMPILE_FLAG := --host=$(CHOST) CFLAGS += -march=native -STRIP := strip -else -$(info Cross compilation: building for $(CBUILD) using $(CHOST)) -CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) -export CROSS_COMPILE=$(CBUILD)- -STRIP := $(CBUILD)-strip endif +export LDFLAGS := +export CPPFLAGS := + +QEMU_VPORT_RESULT := ifeq ($(ARCH),aarch64) +CHOST := aarch64-linux-musl QEMU_ARCH := aarch64 KERNEL_ARCH := arm64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm else @@ -79,9 +68,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt CFLAGS += -march=armv8-a -mtune=cortex-a53 endif else ifeq ($(ARCH),aarch64_be) +CHOST := aarch64_be-linux-musl QEMU_ARCH := aarch64 KERNEL_ARCH := arm64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm else @@ -89,9 +80,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt CFLAGS += -march=armv8-a -mtune=cortex-a53 endif else ifeq ($(ARCH),arm) +CHOST := arm-linux-musleabi QEMU_ARCH := arm KERNEL_ARCH := arm KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm else @@ -99,9 +92,11 @@ QEMU_MACHINE := -cpu cortex-a15 -machine virt CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux endif else ifeq ($(ARCH),armeb) +CHOST := armeb-linux-musleabi QEMU_ARCH := arm KERNEL_ARCH := arm KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm else @@ -110,6 +105,7 @@ CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due LDFLAGS += -Wl,--be8 endif else ifeq ($(ARCH),x86_64) +CHOST := x86_64-linux-musl QEMU_ARCH := x86_64 KERNEL_ARCH := x86_64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage @@ -120,6 +116,7 @@ QEMU_MACHINE := -cpu Skylake-Server -machine q35 CFLAGS += -march=skylake-avx512 endif else ifeq ($(ARCH),i686) +CHOST := i686-linux-musl QEMU_ARCH := i386 KERNEL_ARCH := x86 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage @@ -130,6 +127,7 @@ QEMU_MACHINE := -cpu coreduo -machine q35 CFLAGS += -march=prescott endif else ifeq ($(ARCH),mips64) +CHOST := mips64-linux-musl QEMU_ARCH := mips64 KERNEL_ARCH := mips KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux @@ -141,6 +139,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 CFLAGS += -march=mips64r2 -EB endif else ifeq ($(ARCH),mips64el) +CHOST := mips64el-linux-musl QEMU_ARCH := mips64el KERNEL_ARCH := mips KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux @@ -152,6 +151,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 CFLAGS += -march=mips64r2 -EL endif else ifeq ($(ARCH),mips) +CHOST := mips-linux-musl QEMU_ARCH := mips KERNEL_ARCH := mips KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux @@ -163,6 +163,7 @@ QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 CFLAGS += -march=mips32r2 -EB endif else ifeq ($(ARCH),mipsel) +CHOST := mipsel-linux-musl QEMU_ARCH := mipsel KERNEL_ARCH := mips KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux @@ -173,7 +174,18 @@ else QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 CFLAGS += -march=mips32r2 -EL endif +else ifeq ($(ARCH),powerpc64) +CHOST := powerpc64-linux-musl +QEMU_ARCH := ppc64 +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine pseries +else +QEMU_MACHINE := -machine pseries +endif else ifeq ($(ARCH),powerpc64le) +CHOST := powerpc64le-linux-musl QEMU_ARCH := ppc64 KERNEL_ARCH := powerpc KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux @@ -182,8 +194,8 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine pseries else QEMU_MACHINE := -machine pseries endif -CFLAGS += -mcpu=powerpc64le -mlong-double-64 else ifeq ($(ARCH),powerpc) +CHOST := powerpc-linux-musl QEMU_ARCH := ppc KERNEL_ARCH := powerpc KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage @@ -192,26 +204,72 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 else QEMU_MACHINE := -machine ppce500 endif -CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt else ifeq ($(ARCH),m68k) +CHOST := m68k-linux-musl QEMU_ARCH := m68k KERNEL_ARCH := m68k KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config) ifeq ($(HOST_ARCH),$(ARCH)) -QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE) else QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE) endif +else ifeq ($(ARCH),riscv64) +CHOST := riscv64-linux-musl +QEMU_ARCH := riscv64 +KERNEL_ARCH := riscv +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image +QEMU_VPORT_RESULT := virtio-serial-device +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine virt +else +QEMU_MACHINE := -cpu rv64 -machine virt +endif +else ifeq ($(ARCH),riscv32) +CHOST := riscv32-linux-musl +QEMU_ARCH := riscv32 +KERNEL_ARCH := riscv +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image +QEMU_VPORT_RESULT := virtio-serial-device +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine virt +else +QEMU_MACHINE := -cpu rv32 -machine virt +endif +else ifeq ($(ARCH),s390x) +CHOST := s390x-linux-musl +QEMU_ARCH := s390x +KERNEL_ARCH := s390 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/s390/boot/bzImage +KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/s390x.config) +QEMU_VPORT_RESULT := virtio-serial-ccw +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE) else -$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) +QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE) endif +else +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x) +endif + +TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz +TOOLCHAIN_TAR := $(DISTFILES_PATH)/$(TOOLCHAIN_FILENAME) +TOOLCHAIN_PATH := $(BUILD_PATH)/$(CHOST)-cross +TOOLCHAIN_DIR := https://download.wireguard.com/qemu-test/toolchains/20211123/ +$(eval $(call file_download,toolchain-sha256sums-20211123,$(TOOLCHAIN_DIR)SHA256SUMS#,83da033fd8c798df476c21d9612da2dfb896ec62fbed4ceec5eefc0e56b3f0c8)) +$(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_PATH)/toolchain-sha256sums-20211123)) -REAL_CC := $(CBUILD)-gcc -MUSL_CC := $(BUILD_PATH)/musl-gcc -export CC := $(MUSL_CC) -USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed +STRIP := $(CHOST)-strip +CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) +$(info Building for $(CHOST) using $(CBUILD)) +export CROSS_COMPILE := $(CHOST)- +export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH) +export CC := $(CHOST)-gcc + +USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed +comma := , build: $(KERNEL_BZIMAGE) qemu: $(KERNEL_BZIMAGE) rm -f $(BUILD_PATH)/result @@ -222,13 +280,14 @@ qemu: $(KERNEL_BZIMAGE) $(QEMU_MACHINE) \ -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \ -serial stdio \ - -serial file:$(BUILD_PATH)/result \ + -chardev file,path=$(BUILD_PATH)/result,id=result \ + $(if $(QEMU_VPORT_RESULT),-device $(QEMU_VPORT_RESULT) -device virtserialport$(comma)chardev=result,-serial chardev:result) \ -no-reboot \ -monitor none \ -kernel $< grep -Fq success $(BUILD_PATH)/result -$(BUILD_PATH)/init-cpio-spec.txt: +$(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init mkdir -p $(BUILD_PATH) echo "file /init $(BUILD_PATH)/init 755 0 0" > $@ echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@ @@ -246,10 +305,10 @@ $(BUILD_PATH)/init-cpio-spec.txt: echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@ echo "slink /bin/ping6 ping 777 0 0" >> $@ echo "dir /lib 755 0 0" >> $@ - echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ - echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@ + echo "file /lib/libc.so $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so 755 0 0" >> $@ + echo "slink $$($(CHOST)-readelf -p .interp '$(BUILD_PATH)/init'| grep -o '/lib/.*') libc.so 777 0 0" >> $@ -$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config +$(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$(ARCH).config mkdir -p $(KERNEL_BUILD_PATH) cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config @@ -258,29 +317,20 @@ $(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) -$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) +$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config - $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install +$(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed + rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(TOOLCHAIN_PATH)/$(CHOST) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install touch $@ -$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR) +$(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD) - $(MAKE) -C $(MUSL_PATH) - $(STRIP) -s $@ - -$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so - $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers + $(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so touch $@ -$(MUSL_CC): $(MUSL_PATH)/lib/libc.so - sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs - printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc - chmod +x $(BUILD_PATH)/musl-gcc - $(IPERF_PATH)/.installed: $(IPERF_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< @@ -289,6 +339,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR) touch $@ $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) + cd $(IPERF_PATH) && autoreconf -fi cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no $(MAKE) -C $(IPERF_PATH) $(STRIP) -s $@ @@ -304,7 +355,7 @@ $(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) mkdir -p $(BUILD_PATH) - $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< $(STRIP) -s $@ $(IPUTILS_PATH)/.installed: $(IPUTILS_TAR) diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config index 3d063bb247bbe..e9ac41f6b3ae8 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config @@ -1,5 +1,8 @@ CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config index dbdc7e406a7ba..03609a203e8ca 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config @@ -1,6 +1,9 @@ CONFIG_CPU_BIG_ENDIAN=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config index 148f499054182..c616124fdd593 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/arm.config +++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config @@ -4,6 +4,9 @@ CONFIG_ARCH_VIRT=y CONFIG_THUMB2_KERNEL=n CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config index bd76b07d00a2c..d3a40a974e163 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config +++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config @@ -4,7 +4,10 @@ CONFIG_ARCH_VIRT=y CONFIG_THUMB2_KERNEL=n CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" CONFIG_CPU_BIG_ENDIAN=y CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config new file mode 100644 index 0000000000000..c19c7b519d8bf --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config @@ -0,0 +1,13 @@ +CONFIG_PPC64=y +CONFIG_PPC_PSERIES=y +CONFIG_ALTIVEC=y +CONFIG_VSX=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_RADIX_MMU=y +CONFIG_HVC_CONSOLE=y +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_FRAME_WARN=1280 +CONFIG_THREAD_SHIFT=14 diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config new file mode 100644 index 0000000000000..ea53e78e923e9 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config @@ -0,0 +1,12 @@ +CONFIG_ARCH_RV32I=y +CONFIG_MMU=y +CONFIG_FPU=y +CONFIG_SOC_VIRT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1" +CONFIG_CMDLINE_FORCE=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config new file mode 100644 index 0000000000000..4a08d8c1d4afc --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config @@ -0,0 +1,12 @@ +CONFIG_ARCH_RV64I=y +CONFIG_MMU=y +CONFIG_FPU=y +CONFIG_SOC_VIRT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1" +CONFIG_CMDLINE_FORCE=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/s390x.config b/tools/testing/selftests/wireguard/qemu/arch/s390x.config new file mode 100644 index 0000000000000..274a44f4e49cf --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/s390x.config @@ -0,0 +1,6 @@ +CONFIG_SCLP_VT220_TTY=y +CONFIG_SCLP_VT220_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_S390_GUEST=y +CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1" From d261ba6aa411e03c27da266b7df4bef771e8105e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:18 +0200 Subject: [PATCH 181/491] wireguard: selftests: restore support for ccache When moving to non-system toolchains, we inadvertantly killed the ability to use ccache. So instead, build ccache support into the test harness directly. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- .../selftests/wireguard/qemu/.gitignore | 1 + .../testing/selftests/wireguard/qemu/Makefile | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore index bfa15e6feb2ff..42ab9d72b37b3 100644 --- a/tools/testing/selftests/wireguard/qemu/.gitignore +++ b/tools/testing/selftests/wireguard/qemu/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only build/ distfiles/ +ccache/ diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 930f59c9e7148..e121ef3878afd 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -266,6 +266,13 @@ $(info Building for $(CHOST) using $(CBUILD)) export CROSS_COMPILE := $(CHOST)- export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH) export CC := $(CHOST)-gcc +CCACHE_PATH := $(shell which ccache 2>/dev/null) +ifneq ($(CCACHE_PATH),) +export KBUILD_BUILD_TIMESTAMP := Fri Jun 5 15:58:00 CEST 2015 +export PATH := $(TOOLCHAIN_PATH)/bin/ccache:$(PATH) +export CCACHE_SLOPPINESS := file_macro,time_macros +export CCACHE_DIR ?= $(PWD)/ccache +endif USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed @@ -329,6 +336,10 @@ $(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< $(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so +ifneq ($(CCACHE_PATH),) + mkdir -p $(TOOLCHAIN_PATH)/bin/ccache + ln -s $(CCACHE_PATH) $(TOOLCHAIN_PATH)/bin/ccache/$(CC) +endif touch $@ $(IPERF_PATH)/.installed: $(IPERF_TAR) @@ -421,8 +432,13 @@ clean: distclean: clean rm -rf $(DISTFILES_PATH) +cacheclean: clean +ifneq ($(CCACHE_DIR),) + rm -rf $(CCACHE_DIR) +endif + menuconfig: $(KERNEL_BUILD_PATH)/.config $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig -.PHONY: qemu build clean distclean menuconfig +.PHONY: qemu build clean distclean cacheclean menuconfig .DELETE_ON_ERROR: From a6b8ea9144340c0aaa66c817a3bbb6bca47f0321 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:19 +0200 Subject: [PATCH 182/491] wireguard: selftests: bump package deps Use newer, more reliable package dependencies. These should hopefully reduce flakes. However, we keep the old iputils package, as it accumulated bugs after resulting in flakes on slow machines. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- .../testing/selftests/wireguard/qemu/Makefile | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index e121ef3878afd..bca07b93eeb07 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -39,13 +39,13 @@ $(DISTFILES_PATH)/$(1): | $(4) flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\) \($(1)\)\$$$$#\1 $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3) $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' endef -$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) -$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) -$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692)) -$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) -$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) +$(eval $(call tar_download,IPERF,iperf,3.11,.tar.gz,https://downloads.es.net/pub/iperf/,de8cb409fad61a0574f4cb07eb19ce1159707403ac2dc01b5d175e91240b7e5f)) +$(eval $(call tar_download,BASH,bash,5.1.16,.tar.gz,https://ftp.gnu.org/gnu/bash/,5bac17218d3911834520dad13cd1f85ab944e1c09ae1aba55906be1f8192f558)) +$(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,bda331d5c4606138892f23a565d78fca18919b4d508a0b7ca8391c2da2db68b9)) +$(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0)) +$(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117)) $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) -$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64)) +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac)) export CFLAGS := -O3 -pipe ifeq ($(HOST_ARCH),$(ARCH)) @@ -385,15 +385,15 @@ $(BASH_PATH)/.installed: $(BASH_TAR) touch $@ $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) - cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble + cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble $(MAKE) -C $(BASH_PATH) $(STRIP) -s $@ $(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk - printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_HANDLE_AT\n' > $(IPROUTE2_PATH)/config.mk + printf 'libutil.a.done:\n\tflock -x $$@.lock $$(MAKE) -C lib\n\ttouch $$@\nip/ip: libutil.a.done\n\t$$(MAKE) -C ip ip\nmisc/ss: libutil.a.done\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile touch $@ $(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) From 3fc1b11e5d7278437bdfff0e01f51e777eefb222 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:20 +0200 Subject: [PATCH 183/491] wireguard: selftests: set panic_on_warn=1 from cmdline Rather than setting this once init is running, set panic_on_warn from the kernel command line, so that it catches splats from WireGuard initialization code and the various crypto selftests. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/qemu/arch/aarch64.config | 2 +- .../testing/selftests/wireguard/qemu/arch/aarch64_be.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/arm.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/armeb.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/i686.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/m68k.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/mips.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/mips64.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/mips64el.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/mipsel.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/powerpc.config | 2 +- .../testing/selftests/wireguard/qemu/arch/powerpc64.config | 2 +- .../selftests/wireguard/qemu/arch/powerpc64le.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/riscv32.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/riscv64.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/s390x.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/x86_64.config | 2 +- tools/testing/selftests/wireguard/qemu/init.c | 6 ------ 18 files changed, 17 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config index e9ac41f6b3ae8..09016880ce035 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config @@ -4,5 +4,5 @@ CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config index 03609a203e8ca..19ff66e4c602e 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config @@ -5,5 +5,5 @@ CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config index c616124fdd593..fc7959bef9c25 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/arm.config +++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config @@ -8,5 +8,5 @@ CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config index d3a40a974e163..f3066be81c199 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config +++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config @@ -8,6 +8,6 @@ CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_CPU_BIG_ENDIAN=y CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config index a9b4fe7950480..6d90892a85a24 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/i686.config +++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config @@ -2,5 +2,5 @@ CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config index 62a15bdb877e7..82c925e49beb7 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config +++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config @@ -5,5 +5,5 @@ CONFIG_MAC=y CONFIG_SERIAL_PMACZILOG=y CONFIG_SERIAL_PMACZILOG_TTYS=y CONFIG_SERIAL_PMACZILOG_CONSOLE=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config index df71d6b95546f..d7ec63c17b30e 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mips.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config @@ -7,5 +7,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config index 90c783f725c4d..0994947e33925 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mips64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config @@ -10,5 +10,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config index 435b0b43e00cb..591184342f471 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config @@ -11,5 +11,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config index 62bb50c4a85fc..18a4982937376 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config @@ -8,5 +8,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config index 57957093b71b8..5e04882e8e35b 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config @@ -6,5 +6,5 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_MATH_EMULATION=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config index c19c7b519d8bf..737194b7619e0 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config @@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y CONFIG_HVC_CONSOLE=y CONFIG_CPU_BIG_ENDIAN=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1" CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_FRAME_WARN=1280 CONFIG_THREAD_SHIFT=14 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config index f52f1e2bc7f64..8148b9d1220a4 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config @@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y CONFIG_HVC_CONSOLE=y CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1" CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_FRAME_WARN=1280 CONFIG_THREAD_SHIFT=14 diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config index ea53e78e923e9..0bd0e72d95d49 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config @@ -8,5 +8,5 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y -CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1" CONFIG_CMDLINE_FORCE=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config index 4a08d8c1d4afc..dc266f3b19155 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config @@ -8,5 +8,5 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y -CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1" CONFIG_CMDLINE_FORCE=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/s390x.config b/tools/testing/selftests/wireguard/qemu/arch/s390x.config index 274a44f4e49cf..a7b44dca0b0ac 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/s390x.config +++ b/tools/testing/selftests/wireguard/qemu/arch/s390x.config @@ -3,4 +3,4 @@ CONFIG_SCLP_VT220_CONSOLE=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_CONSOLE=y CONFIG_S390_GUEST=y -CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1 panic_on_warn=1" diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config index 45dd53a0d7600..efa00693e08bf 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config @@ -2,5 +2,5 @@ CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c index 0b45055d9de0e..2a0f48fac925a 100644 --- a/tools/testing/selftests/wireguard/qemu/init.c +++ b/tools/testing/selftests/wireguard/qemu/init.c @@ -110,12 +110,6 @@ static void enable_logging(void) panic("write(exception-trace)"); close(fd); } - fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY); - if (fd >= 0) { - if (write(fd, "1\n", 2) != 2) - panic("write(panic_on_warn)"); - close(fd); - } } static void kmod_selftests(void) From b2d057560b8107c633b39aabe517ff9d93f285e3 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:08 +0200 Subject: [PATCH 184/491] secure_seq: use the 64 bits of the siphash for port offset calculation SipHash replaced MD5 in secure_ipv{4,6}_port_ephemeral() via commit 7cd23e5300c1 ("secure_seq: use SipHash in place of MD5"), but the output remained truncated to 32-bit only. In order to exploit more bits from the hash, let's make the functions return the full 64-bit of siphash_3u32(). We also make sure the port offset calculation in __inet_hash_connect() remains done on 32-bit to avoid the need for div_u64_rem() and an extra cost on 32-bit systems. Cc: Jason A. Donenfeld Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 2 +- include/net/secure_seq.h | 4 ++-- net/core/secure_seq.c | 4 ++-- net/ipv4/inet_hashtables.c | 10 ++++++---- net/ipv6/inet6_hashtables.c | 4 ++-- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f72ec113ae568..98e1ec1a14f03 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -425,7 +425,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) } int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)); diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index d7d2495f83c27..dac91aa38c5af 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -4,8 +4,8 @@ #include -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 9b8443774449f..55aa5cc258e3b 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -94,7 +94,7 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, } EXPORT_SYMBOL(secure_tcpv6_seq); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) { const struct { @@ -142,7 +142,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, } EXPORT_SYMBOL_GPL(secure_tcp_seq); -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); return siphash_3u32((__force u32)saddr, (__force u32)daddr, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 17440840a7914..9d24d9319f3dc 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -504,7 +504,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; } -static u32 inet_sk_port_offset(const struct sock *sk) +static u64 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -734,7 +734,7 @@ EXPORT_SYMBOL_GPL(inet_unhash); static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)) { @@ -777,7 +777,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, net_get_random_once(table_perturb, sizeof(table_perturb)); index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); - offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining; + offset = READ_ONCE(table_perturb[index]) + port_offset; + offset %= remaining; + /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ @@ -859,7 +861,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 4740afecf7c62..32ccac10bd625 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -308,7 +308,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; } -static u32 inet6_sk_port_offset(const struct sock *sk) +static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -320,7 +320,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); From 9e9b70ae923baf2b5e8a0ea4fd0c8451801ac526 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:09 +0200 Subject: [PATCH 185/491] tcp: use different parts of the port_offset for index and offset Amit Klein suggests that we use different parts of port_offset for the table's index and the port offset so that there is no direct relation between them. Cc: Jason A. Donenfeld Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9d24d9319f3dc..29c701cd83121 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -777,7 +777,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, net_get_random_once(table_perturb, sizeof(table_perturb)); index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); - offset = READ_ONCE(table_perturb[index]) + port_offset; + offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); offset %= remaining; /* In first pass we try ports of @low parity. From 4dfa9b438ee34caca4e6a4e5e961641807367f6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 May 2022 10:46:10 +0200 Subject: [PATCH 186/491] tcp: resalt the secret every 10 seconds In order to limit the ability for an observer to recognize the source ports sequence used to contact a set of destinations, we should periodically shuffle the secret. 10 seconds looks effective enough without causing particular issues. Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Cc: Jason A. Donenfeld Tested-by: Willy Tarreau Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/secure_seq.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 55aa5cc258e3b..5f85e01d4093b 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -22,6 +22,8 @@ static siphash_aligned_key_t net_secret; static siphash_aligned_key_t ts_secret; +#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) + static __always_inline void net_secret_init(void) { net_get_random_once(&net_secret, sizeof(net_secret)); @@ -100,11 +102,13 @@ u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, const struct { struct in6_addr saddr; struct in6_addr daddr; + unsigned int timeseed; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, - .dport = dport + .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, + .dport = dport, }; net_secret_init(); return siphash(&combined, offsetofend(typeof(combined), dport), @@ -145,8 +149,10 @@ EXPORT_SYMBOL_GPL(secure_tcp_seq); u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); - return siphash_3u32((__force u32)saddr, (__force u32)daddr, - (__force u16)dport, &net_secret); + return siphash_4u32((__force u32)saddr, (__force u32)daddr, + (__force u16)dport, + jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, + &net_secret); } EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); #endif From ca7af0402550f9a0b3316d5f1c30904e42ed257d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:11 +0200 Subject: [PATCH 187/491] tcp: add small random increments to the source port Here we're randomly adding between 0 and 7 random increments to the selected source port in order to add some noise in the source port selection that will make the next port less predictable. With the default port range of 32768-60999 this means a worst case reuse scenario of 14116/8=1764 connections between two consecutive uses of the same port, with an average of 14116/4.5=3137. This code was stressed at more than 800000 connections per second to a fixed target with all connections closed by the client using RSTs (worst condition) and only 2 connections failed among 13 billion, despite the hash being reseeded every 10 seconds, indicating a perfectly safe situation. Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 29c701cd83121..63bb4902f0186 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -833,11 +833,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; ok: - /* If our first attempt found a candidate, skip next candidate - * in 1/16 of cases to add some noise. + /* Here we want to add a little bit of randomness to the next source + * port that will be chosen. We use a max() with a random here so that + * on low contention the randomness is maximal and on high contention + * it may be inexistent. */ - if (!i && !(prandom_u32() % 16)) - i = 2; + i = max_t(int, i, (prandom_u32() & 7) * 2); WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ From e9261476184be1abd486c9434164b2acbe0ed6c2 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:12 +0200 Subject: [PATCH 188/491] tcp: dynamically allocate the perturb table used by source ports We'll need to further increase the size of this table and it's likely that at some point its size will not be suitable anymore for a static table. Let's allocate it on boot from inet_hashinfo2_init(), which is called from tcp_init(). Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 63bb4902f0186..48ca07853068d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -731,7 +731,8 @@ EXPORT_SYMBOL_GPL(inet_unhash); * privacy, this only consumes 1 KB of kernel memory. */ #define INET_TABLE_PERTURB_SHIFT 8 -static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; +#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) +static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, @@ -774,7 +775,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U; - net_get_random_once(table_perturb, sizeof(table_perturb)); + net_get_random_once(table_perturb, + INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); @@ -912,6 +914,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, low_limit, high_limit); init_hashinfo_lhash2(h); + + /* this one is used for source ports of outgoing connections */ + table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE, + sizeof(*table_perturb), GFP_KERNEL); + if (!table_perturb) + panic("TCP: failed to alloc table_perturb"); } int inet_hashinfo2_init_mod(struct inet_hashinfo *h) From 4c2c8f03a5ab7cb04ec64724d7d176d00bcc91e5 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:13 +0200 Subject: [PATCH 189/491] tcp: increase source port perturb table to 2^16 Moshe Kol, Amit Klein, and Yossi Gilad reported being able to accurately identify a client by forcing it to emit only 40 times more connections than there are entries in the table_perturb[] table. The previous two improvements consisting in resalting the secret every 10s and adding randomness to each port selection only slightly improved the situation, and the current value of 2^8 was too small as it's not very difficult to make a client emit 10k connections in less than 10 seconds. Thus we're increasing the perturb table from 2^8 to 2^16 so that the same precision now requires 2.6M connections, which is more difficult in this time frame and harder to hide as a background activity. The impact is that the table now uses 256 kB instead of 1 kB, which could mostly affect devices making frequent outgoing connections. However such components usually target a small set of destinations (load balancers, database clients, perf assessment tools), and in practice only a few entries will be visited, like before. A live test at 1 million connections per second showed no performance difference from the previous value. Reported-by: Moshe Kol Reported-by: Yossi Gilad Reported-by: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 48ca07853068d..cc5f66328b474 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -726,11 +726,12 @@ EXPORT_SYMBOL_GPL(inet_unhash); * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. - * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, - * we use 256 instead to really give more isolation and - * privacy, this only consumes 1 KB of kernel memory. + * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though + * attacks were since demonstrated, thus we use 65536 instead to really + * give more isolation and privacy, at the expense of 256kB of kernel + * memory. */ -#define INET_TABLE_PERTURB_SHIFT 8 +#define INET_TABLE_PERTURB_SHIFT 16 #define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) static u32 *table_perturb; From e8161345ddbb66e449abde10d2fdce93f867eba9 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:14 +0200 Subject: [PATCH 190/491] tcp: drop the hash_32() part from the index calculation In commit 190cc82489f4 ("tcp: change source port randomizarion at connect() time"), the table_perturb[] array was introduced and an index was taken from the port_offset via hash_32(). But it turns out that hash_32() performs a multiplication while the input here comes from the output of SipHash in secure_seq, that is well distributed enough to avoid the need for yet another hash. Suggested-by: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index cc5f66328b474..a5d57fa679caa 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -778,7 +778,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, net_get_random_once(table_perturb, INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); - index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); + index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); offset %= remaining; From 5a7c5f70c743c6cf32b44b05bd6b19d4ad82f49d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 3 May 2022 15:14:28 +0300 Subject: [PATCH 191/491] selftests: ocelot: tc_flower_chains: specify conform-exceed action for policer As discussed here with Ido Schimmel: https://patchwork.kernel.org/project/netdevbpf/patch/20220224102908.5255-2-jianbol@nvidia.com/ the default conform-exceed action is "reclassify", for a reason we don't really understand. The point is that hardware can't offload that police action, so not specifying "conform-exceed" was always wrong, even though the command used to work in hardware (but not in software) until the kernel started adding validation for it. Fix the command used by the selftest by making the policer drop on exceed, and pass the packet to the next action (goto) on conform. Fixes: 8cd6b020b644 ("selftests: ocelot: add some example VCAP IS1, IS2 and ES0 tc offloads") Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20220503121428.842906-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index eaf8a04a7ca5f..10e54bcca7a93 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -190,7 +190,7 @@ setup_prepare() tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \ protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \ - action police rate 50mbit burst 64k \ + action police rate 50mbit burst 64k conform-exceed drop/pipe \ action goto chain $(IS2 1 0) } From 170f37d6aa6ad4582eefd7459015de79e244536e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 3 May 2022 00:09:31 -0400 Subject: [PATCH 192/491] block: Do not call folio_next() on an unreferenced folio It is unsafe to call folio_next() on a folio unless you hold a reference on it that prevents it from being split or freed. After returning from the iterator, iomap calls folio_end_writeback() which may drop the last reference to the page, or allow the page to be split. If that happens, the iterator will not advance far enough through the bio_vec, leading to assertion failures like the BUG() in folio_end_writeback() that checks we're not trying to end writeback on a page not currently under writeback. Other assertion failures were also seen, but they're all explained by this one bug. Fix the bug by remembering where the next folio starts before returning from the iterator. There are other ways of fixing this bug, but this seems the simplest. Reported-by: Darrick J. Wong Tested-by: Darrick J. Wong Reported-by: Brian Foster Tested-by: Brian Foster Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/bio.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index 278cc81cc1e7f..00450fd86bb43 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -269,6 +269,7 @@ struct folio_iter { size_t offset; size_t length; /* private: for use by the iterator */ + struct folio *_next; size_t _seg_count; int _i; }; @@ -283,6 +284,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, PAGE_SIZE * (bvec->bv_page - &fi->folio->page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); + fi->_next = folio_next(fi->folio); fi->_i = i; } @@ -290,9 +292,10 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) { fi->_seg_count -= fi->length; if (fi->_seg_count) { - fi->folio = folio_next(fi->folio); + fi->folio = fi->_next; fi->offset = 0; fi->length = min(folio_size(fi->folio), fi->_seg_count); + fi->_next = folio_next(fi->folio); } else if (fi->_i + 1 < bio->bi_vcnt) { bio_first_folio(fi, bio, fi->_i + 1); } else { From b9ff43dd27434dbd850b908e2e0e1f6e794efd9b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 27 Apr 2022 17:01:28 -0400 Subject: [PATCH 193/491] mm/readahead: Fix readahead with large folios Reading 100KB chunks from a big file (eg dd bs=100K) leads to poor readahead behaviour. Studying the traces in detail, I noticed two problems. The first is that we were setting the readahead flag on the folio which contains the last byte read from the block. This is wrong because we will trigger readahead at the end of the read without waiting to see if a subsequent read is going to use the pages we just read. Instead, we need to set the readahead flag on the first folio _after_ the one which contains the last byte that we're reading. The second is that we were looking for the index of the folio with the readahead flag set to exactly match the start + size - async_size. If we've rounded this, either down (as previously) or up (as now), we'll think we hit a folio marked as readahead by a different read, and try to read the wrong pages. So round the expected index to the order of the folio we hit. Reported-by: Guo Xuenan Signed-off-by: Matthew Wilcox (Oracle) --- mm/readahead.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index 8e3775829513e..4a60cdb64262a 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -474,7 +474,8 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - if (mark - index < (1UL << order)) + mark = round_up(mark, 1UL << order); + if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); if (err) @@ -555,8 +556,9 @@ static void ondemand_readahead(struct readahead_control *ractl, struct file_ra_state *ra = ractl->ra; unsigned long max_pages = ra->ra_pages; unsigned long add_pages; - unsigned long index = readahead_index(ractl); - pgoff_t prev_index; + pgoff_t index = readahead_index(ractl); + pgoff_t expected, prev_index; + unsigned int order = folio ? folio_order(folio) : 0; /* * If the request exceeds the readahead window, allow the read to @@ -575,8 +577,9 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - if ((index == (ra->start + ra->size - ra->async_size) || - index == (ra->start + ra->size))) { + expected = round_up(ra->start + ra->size - ra->async_size, + 1UL << order); + if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; ra->size = get_next_ra_size(ra, max_pages); ra->async_size = ra->size; @@ -662,7 +665,7 @@ static void ondemand_readahead(struct readahead_control *ractl, } ractl->_index = ra->start; - page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0); + page_cache_ra_order(ractl, ra, order); } void page_cache_sync_ra(struct readahead_control *ractl, From 4071bf121d59944d5cd2238de0642f3d7995a997 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 4 May 2022 13:58:47 +0800 Subject: [PATCH 194/491] NFC: netlink: fix sleep in atomic bug when firmware download timeout There are sleep in atomic bug that could cause kernel panic during firmware download process. The root cause is that nlmsg_new with GFP_KERNEL parameter is called in fw_dnld_timeout which is a timer handler. The call trace is shown below: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:265 Call Trace: kmem_cache_alloc_node __alloc_skb nfc_genl_fw_download_done call_timer_fn __run_timers.part.0 run_timer_softirq __do_softirq ... The nlmsg_new with GFP_KERNEL parameter may sleep during memory allocation process, and the timer handler is run as the result of a "software interrupt" that should not call any other function that could sleep. This patch changes allocation mode of netlink message from GFP_KERNEL to GFP_ATOMIC in order to prevent sleep in atomic bug. The GFP_ATOMIC flag makes memory allocation operation could be used in atomic context. Fixes: 9674da8759df ("NFC: Add firmware upload netlink command") Fixes: 9ea7187c53f6 ("NFC: netlink: Rename CMD_FW_UPLOAD to CMD_FW_DOWNLOAD") Signed-off-by: Duoming Zhou Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220504055847.38026-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni --- net/nfc/netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f184b0db79d40..7c62417ccfd78 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1244,7 +1244,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -1260,7 +1260,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, genlmsg_end(msg, hdr); - genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); + genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; From 2d3535ed2c73fee356160aed40714b27be07442a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 2 May 2022 11:34:16 +0200 Subject: [PATCH 195/491] MAINTAINERS: update the GPIO git tree entry My git tree has become the de facto main GPIO tree. Update the MAINTAINERS file to reflect that. Signed-off-by: Bartosz Golaszewski Reported-by: Baruch Siach Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index edc96cdb85e85..9d47c5e7c6ae0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8385,7 +8385,7 @@ M: Linus Walleij M: Bartosz Golaszewski L: linux-gpio@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git F: Documentation/ABI/obsolete/sysfs-gpio F: Documentation/ABI/testing/gpio-cdev F: Documentation/admin-guide/gpio/ From 8707898e22fd665bc1d7b18b809be4b56ce25bdd Mon Sep 17 00:00:00 2001 From: Thomas Pfaff Date: Mon, 2 May 2022 13:28:29 +0200 Subject: [PATCH 196/491] genirq: Synchronize interrupt thread startup A kernel hang can be observed when running setserial in a loop on a kernel with force threaded interrupts. The sequence of events is: setserial open("/dev/ttyXXX") request_irq() do_stuff() -> serial interrupt -> wake(irq_thread) desc->threads_active++; close() free_irq() kthread_stop(irq_thread) synchronize_irq() <- hangs because desc->threads_active != 0 The thread is created in request_irq() and woken up, but does not get on a CPU to reach the actual thread function, which would handle the pending wake-up. kthread_stop() sets the should stop condition which makes the thread immediately exit, which in turn leaves the stale threads_active count around. This problem was introduced with commit 519cc8652b3a, which addressed a interrupt sharing issue in the PCIe code. Before that commit free_irq() invoked synchronize_irq(), which waits for the hard interrupt handler and also for associated threads to complete. To address the PCIe issue synchronize_irq() was replaced with __synchronize_hardirq(), which only waits for the hard interrupt handler to complete, but not for threaded handlers. This was done under the assumption, that the interrupt thread already reached the thread function and waits for a wake-up, which is guaranteed to be handled before acting on the stop condition. The problematic case, that the thread would not reach the thread function, was obviously overlooked. Make sure that the interrupt thread is really started and reaches thread_fn() before returning from __setup_irq(). This utilizes the existing wait queue in the interrupt descriptor. The wait queue is unused for non-shared interrupts. For shared interrupts the usage might cause a spurious wake-up of a waiter in synchronize_irq() or the completion of a threaded handler might cause a spurious wake-up of the waiter for the ready flag. Both are harmless and have no functional impact. [ tglx: Amended changelog ] Fixes: 519cc8652b3a ("genirq: Synchronize only with single thread on free_irq()") Signed-off-by: Thomas Pfaff Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/552fe7b4-9224-b183-bb87-a8f36d335690@pcs.com --- kernel/irq/internals.h | 2 ++ kernel/irq/irqdesc.c | 2 ++ kernel/irq/manage.c | 39 +++++++++++++++++++++++++++++---------- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 99cbdf55a8bda..f09c60393e559 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -29,12 +29,14 @@ extern struct irqaction chained_action; * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed * IRQTF_AFFINITY - irq thread is requested to adjust affinity * IRQTF_FORCED_THREAD - irq action is force threaded + * IRQTF_READY - signals that irq thread is ready */ enum { IRQTF_RUNTHREAD, IRQTF_WARNED, IRQTF_AFFINITY, IRQTF_FORCED_THREAD, + IRQTF_READY, }; /* diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 939d21cd55c38..0099b87dd8530 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -407,6 +407,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, lockdep_set_class(&desc->lock, &irq_desc_lock_class); mutex_init(&desc->request_mutex); init_rcu_head(&desc->rcu); + init_waitqueue_head(&desc->wait_for_threads); desc_set_defaults(irq, desc, node, affinity, owner); irqd_set(&desc->irq_data, flags); @@ -575,6 +576,7 @@ int __init early_irq_init(void) raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); mutex_init(&desc[i].request_mutex); + init_waitqueue_head(&desc[i].wait_for_threads); desc_set_defaults(i, &desc[i], node, NULL, NULL); } return arch_early_irq_init(); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c03f71d5ec101..e3e245a4fd70b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1248,6 +1248,31 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) raw_spin_unlock_irq(&desc->lock); } +/* + * Internal function to notify that a interrupt thread is ready. + */ +static void irq_thread_set_ready(struct irq_desc *desc, + struct irqaction *action) +{ + set_bit(IRQTF_READY, &action->thread_flags); + wake_up(&desc->wait_for_threads); +} + +/* + * Internal function to wake up a interrupt thread and wait until it is + * ready. + */ +static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc, + struct irqaction *action) +{ + if (!action || !action->thread) + return; + + wake_up_process(action->thread); + wait_event(desc->wait_for_threads, + test_bit(IRQTF_READY, &action->thread_flags)); +} + /* * Interrupt handler thread */ @@ -1259,6 +1284,8 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); + irq_thread_set_ready(desc, action); + sched_set_fifo(current); if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, @@ -1683,8 +1710,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!shared) { - init_waitqueue_head(&desc->wait_for_threads); - /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, @@ -1780,14 +1805,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irq_setup_timings(desc, new); - /* - * Strictly no need to wake it up, but hung_task complains - * when no hard interrupt wakes the thread up. - */ - if (new->thread) - wake_up_process(new->thread); - if (new->secondary) - wake_up_process(new->secondary->thread); + wake_up_and_wait_for_irq_thread_ready(desc, new); + wake_up_and_wait_for_irq_thread_ready(desc, new->secondary); register_irq_proc(irq, desc); new->dir = NULL; From 26a08f8bad3e1f98d3153f939fb8cd330da4cb26 Mon Sep 17 00:00:00 2001 From: Scott Chen Date: Mon, 25 Apr 2022 17:00:20 +0800 Subject: [PATCH 197/491] USB: serial: pl2303: add device id for HP LM930 Display Add the device id for the HPLM930Display which is a PL2303GC based device. Signed-off-by: Scott Chen Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 88b284d61681a..1d878d05a6584 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -106,6 +106,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LM930_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) }, { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index c5406452b774e..732f9b13ad5d5 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -135,6 +135,7 @@ #define HP_TD620_PRODUCT_ID 0x0956 #define HP_LD960_PRODUCT_ID 0x0b39 #define HP_LD381_PRODUCT_ID 0x0f7f +#define HP_LM930_PRODUCT_ID 0x0f9b #define HP_LCM220_PRODUCT_ID 0x3139 #define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 From 714adff9a6271b5f1664b04c944b598141ebfe73 Mon Sep 17 00:00:00 2001 From: Sven Schwermer Date: Mon, 25 Apr 2022 16:34:49 +0200 Subject: [PATCH 198/491] USB: serial: option: add Fibocom L610 modem The L610 modem has 3 USB configurations that are configurable via the AT command AT+GTUSBMODE={31,32,33} which make the modem enumerate with the following interfaces, respectively: 31: Modem + NV + MOS + Diag + LOG + AT + AT 32: ECM + Modem + NV + MOS + Diag + LOG + AT + AT 33: RNDIS + Modem + NV + MOS + Diag + LOG + AT + AT A detailed description of the USB configuration for each mode follows: +GTUSBMODE: 31 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#=124 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4d10 Rev= 0.00 S: Manufacturer=FIBOCOM S: Product=L610 C:* #Ifs= 7 Cfg#= 1 Atr=e0 MxPwr=400mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +GTUSBMODE: 32 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#=122 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4d11 Rev= 0.00 S: Manufacturer=FIBOCOM S: Product=L610 C:* #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=400mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=06 Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +GTUSBMODE: 33 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#=126 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4d11 Rev= 0.00 S: Manufacturer=FIBOCOM S: Product=L610 C:* #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=400mA A: FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=03 I:* If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=4096ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Sven Schwermer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1364ce7f0abf0..cd82dd44f72d3 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2123,6 +2123,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, + { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) }, /* Fibocom L610 (ECM/RNDIS mode) */ { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ From 07989eb981d862f7f2be68d233d753f2e7ccc119 Mon Sep 17 00:00:00 2001 From: Sven Schwermer Date: Mon, 25 Apr 2022 16:34:50 +0200 Subject: [PATCH 199/491] USB: serial: option: add Fibocom MA510 modem The MA510 modem has 3 USB configurations that are configurable via the AT command AT+GTUSBMODE={30,31,32} which make the modem enumerate with the following interfaces, respectively: 30: Diag + QDSS + Modem + RMNET 31: Diag + Modem + AT + ECM 32: Modem + AT + ECM The first configuration (30) reuses u-blox R410M's VID/PID with identical interface configuration. A detailed description of the USB configuration for each mode follows: +GTUSBMODE: 30 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#= 19 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=05c6 ProdID=90b2 Rev= 0.00 S: Manufacturer=Fibocom MA510 Modem S: Product=Fibocom MA510 Modem S: SerialNumber=55e2695b C:* #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +GTUSBMODE: 31 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#= 99 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0106 Rev= 0.00 S: Manufacturer=Fibocom MA510 Modem S: Product=Fibocom MA510 Modem S: SerialNumber=55e2695b C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA A: FirstIf#= 3 IfCount= 2 Cls=02(comm.) Sub=00 Prot=00 I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=2ms I: If#= 4 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 4 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +GTUSBMODE: 32 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#=100 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=010a Rev= 0.00 S: Manufacturer=Fibocom MA510 Modem S: Product=Fibocom MA510 Modem S: SerialNumber=55e2695b C:* #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA A: FirstIf#= 2 IfCount= 2 Cls=02(comm.) Sub=00 Prot=00 I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms I: If#= 3 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 3 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Sven Schwermer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index cd82dd44f72d3..152ad882657d7 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2129,6 +2129,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ .driver_info = RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0106, 0xff) }, /* Fibocom MA510 (ECM mode w/ diag intf.) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) }, /* Fibocom MA510 (ECM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ From 171865dab096da1ab980a32eeea5d1b88cd7bc50 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Thu, 21 Apr 2022 18:42:28 +0900 Subject: [PATCH 200/491] gpio: visconti: Fix fwnode of GPIO IRQ The fwnode of GPIO IRQ must be set to its own fwnode, not the fwnode of the parent IRQ. Therefore, this sets own fwnode instead of the parent IRQ fwnode to GPIO IRQ's. Fixes: 2ad74f40dacc ("gpio: visconti: Add Toshiba Visconti GPIO support") Signed-off-by: Nobuhiro Iwamatsu Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-visconti.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-visconti.c b/drivers/gpio/gpio-visconti.c index 47455810bdb91..e6534ea1eaa7a 100644 --- a/drivers/gpio/gpio-visconti.c +++ b/drivers/gpio/gpio-visconti.c @@ -130,7 +130,6 @@ static int visconti_gpio_probe(struct platform_device *pdev) struct gpio_irq_chip *girq; struct irq_domain *parent; struct device_node *irq_parent; - struct fwnode_handle *fwnode; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -150,14 +149,12 @@ static int visconti_gpio_probe(struct platform_device *pdev) } parent = irq_find_host(irq_parent); + of_node_put(irq_parent); if (!parent) { dev_err(dev, "No IRQ parent domain\n"); return -ENODEV; } - fwnode = of_node_to_fwnode(irq_parent); - of_node_put(irq_parent); - ret = bgpio_init(&priv->gpio_chip, dev, 4, priv->base + GPIO_IDATA, priv->base + GPIO_OSET, @@ -180,7 +177,7 @@ static int visconti_gpio_probe(struct platform_device *pdev) girq = &priv->gpio_chip.irq; girq->chip = irq_chip; - girq->fwnode = fwnode; + girq->fwnode = of_node_to_fwnode(dev->of_node); girq->parent_domain = parent; girq->child_to_parent_hwirq = visconti_gpio_child_to_parent_hwirq; girq->populate_parent_alloc_arg = visconti_gpio_populate_parent_fwspec; From 870b1eee2d844727b06e238c121d260bc5645580 Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Mon, 25 Apr 2022 13:58:40 +0800 Subject: [PATCH 201/491] USB: serial: qcserial: add support for Sierra Wireless EM7590 Add support for Sierra Wireless EM7590 0xc080/0xc081 compositions. Signed-off-by: Ethan Yang Link: https://lore.kernel.org/r/20220425055840.5693-1-etyang@sierrawireless.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index c18bf8164bc2e..586ef5551e76e 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ {DEVICE_SWI(0x1199, 0x90d2)}, /* Sierra Wireless EM9191 QDL */ + {DEVICE_SWI(0x1199, 0xc080)}, /* Sierra Wireless EM7590 QDL */ + {DEVICE_SWI(0x1199, 0xc081)}, /* Sierra Wireless EM7590 */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ From 2685027fca387b602ae565bff17895188b803988 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 27 Apr 2022 10:54:28 -0400 Subject: [PATCH 202/491] cgroup/cpuset: Remove cpus_allowed/mems_allowed setup in cpuset_init_smp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are 3 places where the cpu and node masks of the top cpuset can be initialized in the order they are executed: 1) start_kernel -> cpuset_init() 2) start_kernel -> cgroup_init() -> cpuset_bind() 3) kernel_init_freeable() -> do_basic_setup() -> cpuset_init_smp() The first cpuset_init() call just sets all the bits in the masks. The second cpuset_bind() call sets cpus_allowed and mems_allowed to the default v2 values. The third cpuset_init_smp() call sets them back to v1 values. For systems with cgroup v2 setup, cpuset_bind() is called once. As a result, cpu and memory node hot add may fail to update the cpu and node masks of the top cpuset to include the newly added cpu or node in a cgroup v2 environment. For systems with cgroup v1 setup, cpuset_bind() is called again by rebind_subsystem() when the v1 cpuset filesystem is mounted as shown in the dmesg log below with an instrumented kernel. [ 2.609781] cpuset_bind() called - v2 = 1 [ 3.079473] cpuset_init_smp() called [ 7.103710] cpuset_bind() called - v2 = 0 smp_init() is called after the first two init functions. So we don't have a complete list of active cpus and memory nodes until later in cpuset_init_smp() which is the right time to set up effective_cpus and effective_mems. To fix this cgroup v2 mask setup problem, the potentially incorrect cpus_allowed & mems_allowed setting in cpuset_init_smp() are removed. For cgroup v2 systems, the initial cpuset_bind() call will set the masks correctly. For cgroup v1 systems, the second call to cpuset_bind() will do the right setup. cc: stable@vger.kernel.org Signed-off-by: Waiman Long Tested-by: Feng Tang Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 9390bfd9f1cd3..71a418858a5e0 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3390,8 +3390,11 @@ static struct notifier_block cpuset_track_online_nodes_nb = { */ void __init cpuset_init_smp(void) { - cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); - top_cpuset.mems_allowed = node_states[N_MEMORY]; + /* + * cpus_allowd/mems_allowed set to v2 values in the initial + * cpuset_bind() call will be reset to v1 values in another + * cpuset_bind() call when v1 cpuset is mounted. + */ top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); From 9f73f1aef98b2fa7252c0a89be64840271ce8ea0 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 1 Apr 2022 15:29:37 +0800 Subject: [PATCH 203/491] btrfs: force v2 space cache usage for subpage mount [BUG] For a 4K sector sized btrfs with v1 cache enabled and only mounted on systems with 4K page size, if it's mounted on subpage (64K page size) systems, it can cause the following warning on v1 space cache: BTRFS error (device dm-1): csum mismatch on free space cache BTRFS warning (device dm-1): failed to load free space cache for block group 84082688, rebuilding it now Although not a big deal, as kernel can rebuild it without problem, such warning will bother end users, especially if they want to switch the same btrfs seamlessly between different page sized systems. [CAUSE] V1 free space cache is still using fixed PAGE_SIZE for various bitmap, like BITS_PER_BITMAP. Such hard-coded PAGE_SIZE usage will cause various mismatch, from v1 cache size to checksum. Thus kernel will always reject v1 cache with a different PAGE_SIZE with csum mismatch. [FIX] Although we should fix v1 cache, it's already going to be marked deprecated soon. And we have v2 cache based on metadata (which is already fully subpage compatible), and it has almost everything superior than v1 cache. So just force subpage mount to use v2 cache on mount. Reported-by: Matt Corallo CC: stable@vger.kernel.org # 5.15+ Link: https://lore.kernel.org/linux-btrfs/61aa27d1-30fc-c1a9-f0f4-9df544395ec3@bluematt.me/ Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 20e70eb88465c..3e0acc3622338 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3657,6 +3657,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device if (sectorsize < PAGE_SIZE) { struct btrfs_subpage_info *subpage_info; + /* + * V1 space cache has some hardcoded PAGE_SIZE usage, and is + * going to be deprecated. + * + * Force to use v2 cache for subpage case. + */ + btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE); + btrfs_set_and_info(fs_info, FREE_SPACE_TREE, + "forcing free space tree for sector size %u with page size %lu", + sectorsize, PAGE_SIZE); + btrfs_warn(fs_info, "read-write for sector size %u with page size %lu is experimental", sectorsize, PAGE_SIZE); From 549577127afeb759c29cdeeff1bdccd86e6c0dbf Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 3 May 2022 14:10:04 -0700 Subject: [PATCH 204/491] btrfs: zoned: move non-changing condition check out of the loop btrfs_zone_activate() checks if block_group->alloc_offset == block_group->zone_capacity every time it iterates the loop. But, it is not depending on the index. Move out the check and do it only once. Fixes: f9a912a3c45f ("btrfs: zoned: make zone activation multi stripe capable") Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 1b1b310c3c510..b5eb794c1e23c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1835,6 +1835,12 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) goto out_unlock; } + /* No space left */ + if (block_group->alloc_offset == block_group->zone_capacity) { + ret = false; + goto out_unlock; + } + for (i = 0; i < map->num_stripes; i++) { device = map->stripes[i].dev; physical = map->stripes[i].physical; @@ -1842,12 +1848,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) if (device->zone_info->max_active_zones == 0) continue; - /* No space left */ - if (block_group->alloc_offset == block_group->zone_capacity) { - ret = false; - goto out_unlock; - } - if (!btrfs_dev_set_active_zone(device, physical)) { /* Cannot activate the zone */ ret = false; From ceb4f60830a7cd38ff47b7dd54e9e06ddbaf413c Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 3 May 2022 14:10:05 -0700 Subject: [PATCH 205/491] btrfs: zoned: activate block group properly on unlimited active zone device btrfs_zone_activate() checks if it activated all the underlying zones in the loop. However, that check never hit on an unlimited activate zone device (max_active_zones == 0). Fortunately, it still works without ENOSPC because btrfs_zone_activate() returns true in the end, even if block_group->zone_is_active == 0. But, it is confusing to have non zone_is_active block group still usable for allocation. Also, we are wasting CPU time to iterate the loop every time btrfs_zone_activate() is called for the blog groups. Since error case in the loop is handled by out_unlock, we can just set zone_is_active and do the list stuff after the loop. Fixes: f9a912a3c45f ("btrfs: zoned: make zone activation multi stripe capable") Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index b5eb794c1e23c..d31b0eda210f1 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1853,24 +1853,18 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) ret = false; goto out_unlock; } - - /* Successfully activated all the zones */ - if (i == map->num_stripes - 1) - block_group->zone_is_active = 1; - - } + + /* Successfully activated all the zones */ + block_group->zone_is_active = 1; spin_unlock(&block_group->lock); - if (block_group->zone_is_active) { - /* For the active block group list */ - btrfs_get_block_group(block_group); + /* For the active block group list */ + btrfs_get_block_group(block_group); - spin_lock(&fs_info->zone_active_bgs_lock); - list_add_tail(&block_group->active_bg_list, - &fs_info->zone_active_bgs); - spin_unlock(&fs_info->zone_active_bgs_lock); - } + spin_lock(&fs_info->zone_active_bgs_lock); + list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); + spin_unlock(&fs_info->zone_active_bgs_lock); return true; From 750ee454908e90a8792b1e2b157c2948da86e926 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 3 May 2022 11:57:02 +0100 Subject: [PATCH 206/491] btrfs: fix assertion failure when logging directory key range item When inserting a key range item (BTRFS_DIR_LOG_INDEX_KEY) while logging a directory, we don't expect the insertion to fail with -EEXIST, because we are holding the directory's log_mutex and we have dropped all existing BTRFS_DIR_LOG_INDEX_KEY keys from the log tree before we started to log the directory. However it's possible that during the logging we attempt to insert the same BTRFS_DIR_LOG_INDEX_KEY key twice, but for this to happen we need to race with insertions of items from other inodes in the subvolume's tree while we are logging a directory. Here's how this can happen: 1) We are logging a directory with inode number 1000 that has its items spread across 3 leaves in the subvolume's tree: leaf A - has index keys from the range 2 to 20 for example. The last item in the leaf corresponds to a dir item for index number 20. All these dir items were created in a past transaction. leaf B - has index keys from the range 22 to 100 for example. It has no keys from other inodes, all its keys are dir index keys for our directory inode number 1000. Its first key is for the dir item with a sequence number of 22. All these dir items were also created in a past transaction. leaf C - has index keys for our directory for the range 101 to 120 for example. This leaf also has items from other inodes, and its first item corresponds to the dir item for index number 101 for our directory with inode number 1000; 2) When we finish processing the items from leaf A at log_dir_items(), we log a BTRFS_DIR_LOG_INDEX_KEY key with an offset of 21 and a last offset of 21, meaning the log is authoritative for the index range from 21 to 21 (a single sequence number). At this point leaf B was not yet modified in the current transaction; 3) When we return from log_dir_items() we have released our read lock on leaf B, and have set *last_offset_ret to 21 (index number of the first item on leaf B minus 1); 4) Some other task inserts an item for other inode (inode number 1001 for example) into leaf C. That resulted in pushing some items from leaf C into leaf B, in order to make room for the new item, so now leaf B has dir index keys for the sequence number range from 22 to 102 and leaf C has the dir items for the sequence number range 103 to 120; 5) At log_directory_changes() we call log_dir_items() again, passing it a 'min_offset' / 'min_key' value of 22 (*last_offset_ret from step 3 plus 1, so 21 + 1). Then btrfs_search_forward() leaves us at slot 0 of leaf B, since leaf B was modified in the current transaction. We have also initialized 'last_old_dentry_offset' to 20 after calling btrfs_previous_item() at log_dir_items(), as it left us at the last item of leaf A, which refers to the dir item with sequence number 20; 6) We then call process_dir_items_leaf() to process the dir items of leaf B, and when we process the first item, corresponding to slot 0, sequence number 22, we notice the dir item was created in a past transaction and its sequence number is greater than the value of *last_old_dentry_offset + 1 (20 + 1), so we decide to log again a BTRFS_DIR_LOG_INDEX_KEY key with an offset of 21 and an end range of 21 (key.offset - 1 == 22 - 1 == 21), which results in an -EEXIST error from insert_dir_log_key(), as we have already inserted that key at step 2, triggering the assertion at process_dir_items_leaf(). The trace produced in dmesg is like the following: assertion failed: ret != -EEXIST, in fs/btrfs/tree-log.c:3857 [198255.980839][ T7460] ------------[ cut here ]------------ [198255.981666][ T7460] kernel BUG at fs/btrfs/ctree.h:3617! [198255.983141][ T7460] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI [198255.984080][ T7460] CPU: 0 PID: 7460 Comm: repro-ghost-dir Not tainted 5.18.0-5314c78ac373-misc-next+ [198255.986027][ T7460] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [198255.988600][ T7460] RIP: 0010:assertfail.constprop.0+0x1c/0x1e [198255.989465][ T7460] Code: 8b 4c 89 (...) [198255.992599][ T7460] RSP: 0018:ffffc90007387188 EFLAGS: 00010282 [198255.993414][ T7460] RAX: 000000000000003d RBX: 0000000000000065 RCX: 0000000000000000 [198255.996056][ T7460] RDX: 0000000000000001 RSI: ffffffff8b62b180 RDI: fffff52000e70e24 [198255.997668][ T7460] RBP: ffffc90007387188 R08: 000000000000003d R09: ffff8881f0e16507 [198255.999199][ T7460] R10: ffffed103e1c2ca0 R11: 0000000000000001 R12: 00000000ffffffef [198256.000683][ T7460] R13: ffff88813befc630 R14: ffff888116c16e70 R15: ffffc90007387358 [198256.007082][ T7460] FS: 00007fc7f7c24640(0000) GS:ffff8881f0c00000(0000) knlGS:0000000000000000 [198256.009939][ T7460] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [198256.014133][ T7460] CR2: 0000560bb16d0b78 CR3: 0000000140b34005 CR4: 0000000000170ef0 [198256.015239][ T7460] Call Trace: [198256.015674][ T7460] [198256.016313][ T7460] log_dir_items.cold+0x16/0x2c [198256.018858][ T7460] ? replay_one_extent+0xbf0/0xbf0 [198256.025932][ T7460] ? release_extent_buffer+0x1d2/0x270 [198256.029658][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.031114][ T7460] ? lock_acquired+0xbe/0x660 [198256.032633][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.034386][ T7460] ? lock_release+0xcf/0x8a0 [198256.036152][ T7460] log_directory_changes+0xf9/0x170 [198256.036993][ T7460] ? log_dir_items+0xba0/0xba0 [198256.037661][ T7460] ? do_raw_write_unlock+0x7d/0xe0 [198256.038680][ T7460] btrfs_log_inode+0x233b/0x26d0 [198256.041294][ T7460] ? log_directory_changes+0x170/0x170 [198256.042864][ T7460] ? btrfs_attach_transaction_barrier+0x60/0x60 [198256.045130][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.046568][ T7460] ? lock_release+0xcf/0x8a0 [198256.047504][ T7460] ? lock_downgrade+0x420/0x420 [198256.048712][ T7460] ? ilookup5_nowait+0x81/0xa0 [198256.049747][ T7460] ? lock_downgrade+0x420/0x420 [198256.050652][ T7460] ? do_raw_spin_unlock+0xa9/0x100 [198256.051618][ T7460] ? __might_resched+0x128/0x1c0 [198256.052511][ T7460] ? __might_sleep+0x66/0xc0 [198256.053442][ T7460] ? __kasan_check_read+0x11/0x20 [198256.054251][ T7460] ? iget5_locked+0xbd/0x150 [198256.054986][ T7460] ? run_delayed_iput_locked+0x110/0x110 [198256.055929][ T7460] ? btrfs_iget+0xc7/0x150 [198256.056630][ T7460] ? btrfs_orphan_cleanup+0x4a0/0x4a0 [198256.057502][ T7460] ? free_extent_buffer+0x13/0x20 [198256.058322][ T7460] btrfs_log_inode+0x2654/0x26d0 [198256.059137][ T7460] ? log_directory_changes+0x170/0x170 [198256.060020][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.060930][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.061905][ T7460] ? lock_contended+0x770/0x770 [198256.062682][ T7460] ? btrfs_log_inode_parent+0xd04/0x1750 [198256.063582][ T7460] ? lock_downgrade+0x420/0x420 [198256.064432][ T7460] ? preempt_count_sub+0x18/0xc0 [198256.065550][ T7460] ? __mutex_lock+0x580/0xdc0 [198256.066654][ T7460] ? stack_trace_save+0x94/0xc0 [198256.068008][ T7460] ? __kasan_check_write+0x14/0x20 [198256.072149][ T7460] ? __mutex_unlock_slowpath+0x12a/0x430 [198256.073145][ T7460] ? mutex_lock_io_nested+0xcd0/0xcd0 [198256.074341][ T7460] ? wait_for_completion_io_timeout+0x20/0x20 [198256.075345][ T7460] ? lock_downgrade+0x420/0x420 [198256.076142][ T7460] ? lock_contended+0x770/0x770 [198256.076939][ T7460] ? do_raw_spin_lock+0x1c0/0x1c0 [198256.078401][ T7460] ? btrfs_sync_file+0x5e6/0xa40 [198256.080598][ T7460] btrfs_log_inode_parent+0x523/0x1750 [198256.081991][ T7460] ? wait_current_trans+0xc8/0x240 [198256.083320][ T7460] ? lock_downgrade+0x420/0x420 [198256.085450][ T7460] ? btrfs_end_log_trans+0x70/0x70 [198256.086362][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.087544][ T7460] ? lock_release+0xcf/0x8a0 [198256.088305][ T7460] ? lock_downgrade+0x420/0x420 [198256.090375][ T7460] ? dget_parent+0x8e/0x300 [198256.093538][ T7460] ? do_raw_spin_lock+0x1c0/0x1c0 [198256.094918][ T7460] ? lock_downgrade+0x420/0x420 [198256.097815][ T7460] ? do_raw_spin_unlock+0xa9/0x100 [198256.101822][ T7460] ? dget_parent+0xb7/0x300 [198256.103345][ T7460] btrfs_log_dentry_safe+0x48/0x60 [198256.105052][ T7460] btrfs_sync_file+0x629/0xa40 [198256.106829][ T7460] ? start_ordered_ops.constprop.0+0x120/0x120 [198256.109655][ T7460] ? __fget_files+0x161/0x230 [198256.110760][ T7460] vfs_fsync_range+0x6d/0x110 [198256.111923][ T7460] ? start_ordered_ops.constprop.0+0x120/0x120 [198256.113556][ T7460] __x64_sys_fsync+0x45/0x70 [198256.114323][ T7460] do_syscall_64+0x5c/0xc0 [198256.115084][ T7460] ? syscall_exit_to_user_mode+0x3b/0x50 [198256.116030][ T7460] ? do_syscall_64+0x69/0xc0 [198256.116768][ T7460] ? do_syscall_64+0x69/0xc0 [198256.117555][ T7460] ? do_syscall_64+0x69/0xc0 [198256.118324][ T7460] ? sysvec_call_function_single+0x57/0xc0 [198256.119308][ T7460] ? asm_sysvec_call_function_single+0xa/0x20 [198256.120363][ T7460] entry_SYSCALL_64_after_hwframe+0x44/0xae [198256.121334][ T7460] RIP: 0033:0x7fc7fe97b6ab [198256.122067][ T7460] Code: 0f 05 48 (...) [198256.125198][ T7460] RSP: 002b:00007fc7f7c23950 EFLAGS: 00000293 ORIG_RAX: 000000000000004a [198256.126568][ T7460] RAX: ffffffffffffffda RBX: 00007fc7f7c239f0 RCX: 00007fc7fe97b6ab [198256.127942][ T7460] RDX: 0000000000000002 RSI: 000056167536bcf0 RDI: 0000000000000004 [198256.129302][ T7460] RBP: 0000000000000004 R08: 0000000000000000 R09: 000000007ffffeb8 [198256.130670][ T7460] R10: 00000000000001ff R11: 0000000000000293 R12: 0000000000000001 [198256.132046][ T7460] R13: 0000561674ca8140 R14: 00007fc7f7c239d0 R15: 000056167536dab8 [198256.133403][ T7460] Fix this by treating -EEXIST as expected at insert_dir_log_key() and have it update the item with an end offset corresponding to the maximum between the previously logged end offset and the new requested end offset. The end offsets may be different due to dir index key deletions that happened as part of unlink operations while we are logging a directory (triggered when fsyncing some other inode parented by the directory) or during renames which always attempt to log a single dir index deletion. Reported-by: Zygo Blaxell Link: https://lore.kernel.org/linux-btrfs/YmyefE9mc2xl5ZMz@hungrycats.org/ Fixes: 732d591a5d6c12 ("btrfs: stop copying old dir items when logging a directory") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 11399c8eed87d..e65633686378c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3721,11 +3721,29 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, key.offset = first_offset; key.type = BTRFS_DIR_LOG_INDEX_KEY; ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); - if (ret) + /* + * -EEXIST is fine and can happen sporadically when we are logging a + * directory and have concurrent insertions in the subvolume's tree for + * items from other inodes and that result in pushing off some dir items + * from one leaf to another in order to accommodate for the new items. + * This results in logging the same dir index range key. + */ + if (ret && ret != -EEXIST) return ret; item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_dir_log_item); + if (ret == -EEXIST) { + const u64 curr_end = btrfs_dir_log_end(path->nodes[0], item); + + /* + * btrfs_del_dir_entries_in_log() might have been called during + * an unlink between the initial insertion of this key and the + * current update, or we might be logging a single entry deletion + * during a rename, so set the new last_offset to the max value. + */ + last_offset = max(last_offset, curr_end); + } btrfs_set_dir_log_end(path->nodes[0], item, last_offset); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(path); @@ -3849,13 +3867,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, ret = insert_dir_log_key(trans, log, dst_path, ino, *last_old_dentry_offset + 1, key.offset - 1); - /* - * -EEXIST should never happen because when we - * log a directory in full mode (LOG_INODE_ALL) - * we drop all BTRFS_DIR_LOG_INDEX_KEY keys from - * the log tree. - */ - ASSERT(ret != -EEXIST); if (ret < 0) return ret; } @@ -7031,12 +7042,12 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, /* * Other concurrent task might be logging the old directory, * as it can be triggered when logging other inode that had or - * still has a dentry in the old directory. So take the old - * directory's log_mutex to prevent getting an -EEXIST when - * logging a key to record the deletion, or having that other - * task logging the old directory get an -EEXIST if it attempts - * to log the same key after we just did it. In both cases that - * would result in falling back to a transaction commit. + * still has a dentry in the old directory. We lock the old + * directory's log_mutex to ensure the deletion of the old + * name is persisted, because during directory logging we + * delete all BTRFS_DIR_LOG_INDEX_KEY keys and the deletion of + * the old name's dir index item is in the delayed items, so + * it could be missed by an in progress directory logging. */ mutex_lock(&old_dir->log_mutex); ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir), From 3e1ad196385c65c1454aceab1226d9a4baca27d5 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 3 May 2022 17:35:25 +0200 Subject: [PATCH 207/491] btrfs: sysfs: export the balance paused state of exclusive operation The new state allowing device addition with paused balance is not exported to user space so it can't recognize it and actually start the operation. Fixes: efc0e69c2fea ("btrfs: introduce exclusive operation BALANCE_PAUSED state") CC: stable@vger.kernel.org # 5.17 Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 17389a42a3ab7..ba78ca5aabbb2 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -922,6 +922,9 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj, case BTRFS_EXCLOP_BALANCE: str = "balance\n"; break; + case BTRFS_EXCLOP_BALANCE_PAUSED: + str = "balance paused\n"; + break; case BTRFS_EXCLOP_DEV_ADD: str = "device add\n"; break; From 01e01f5c89773c600a9f0b32c888de0146066c3a Mon Sep 17 00:00:00 2001 From: Sergey Ryazanov Date: Sun, 1 May 2022 20:58:28 +0300 Subject: [PATCH 208/491] usb: cdc-wdm: fix reading stuck on device close cdc-wdm tracks whether a response reading request is in-progress and blocks the next request from being sent until the previous request is completed. As soon as last user closes the cdc-wdm device file, the driver cancels any ongoing requests, resets the pending response counter, but leaves the response reading in-progress flag (WDM_RESPONDING) untouched. So if the user closes the device file during the response receive request is being performed, no more data will be obtained from the modem. The request will be cancelled, effectively preventing the WDM_RESPONDING flag from being reseted. Keeping the flag set will prevent a new response receive request from being sent, permanently blocking the read path. The read path will staying blocked until the module will be reloaded or till the modem will be re-attached. This stuck has been observed with a Huawei E3372 modem attached to an OpenWrt router and using the comgt utility to set up a network connection. Fix this issue by clearing the WDM_RESPONDING flag on the device file close. Without this fix, the device reading stuck can be easily reproduced in a few connection establishing attempts. With this fix, a load test for modem connection re-establishing worked for several hours without any issues. Fixes: 922a5eadd5a3 ("usb: cdc-wdm: Fix race between autosuspend and reading from the device") Signed-off-by: Sergey Ryazanov Cc: stable Acked-by: Oliver Neukum Link: https://lore.kernel.org/r/20220501175828.8185-1-ryazanov.s.a@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 7f2c83f299d32..eebe782380fb9 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -774,6 +774,7 @@ static int wdm_release(struct inode *inode, struct file *file) poison_urbs(desc); spin_lock_irq(&desc->iuspin); desc->resp_count = 0; + clear_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); desc->manage_power(desc->intf, 0); unpoison_urbs(desc); From bbc126ae381cf0a27822c1f822d0aeed74cc40d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 2 May 2022 10:04:56 +0200 Subject: [PATCH 209/491] usb: typec: tcpci: Don't skip cleanup in .remove() on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning an error value in an i2c remove callback results in an error message being emitted by the i2c core, but otherwise it doesn't make a difference. The device goes away anyhow and the devm cleanups are called. In this case the remove callback even returns early without stopping the tcpm worker thread and various timers. A work scheduled on the work queue, or a firing timer after tcpci_remove() returned probably results in a use-after-free situation because the regmap and driver data were freed. So better make sure that tcpci_unregister_port() is called even if disabling the irq failed. Also emit a more specific error message instead of the i2c core's "remove failed (EIO), will be ignored" and return 0 to suppress the core's warning. This patch is (also) a preparation for making i2c remove callbacks return void. Fixes: 3ba76256fc4e ("usb: typec: tcpci: mask event interrupts when remove driver") Signed-off-by: Uwe Kleine-König Cc: stable Acked-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20220502080456.21568-1-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index e07d26a3cd8e1..f33e08eb76709 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -877,7 +877,7 @@ static int tcpci_remove(struct i2c_client *client) /* Disable chip interrupts before unregistering port */ err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0); if (err < 0) - return err; + dev_warn(&client->dev, "Failed to disable irqs (%pe)\n", ERR_PTR(err)); tcpci_unregister_port(chip->tcpci); From b81ac4395bbeaf36e078dea1a48c02dd97b76235 Mon Sep 17 00:00:00 2001 From: Dan Vacura Date: Tue, 3 May 2022 15:10:38 -0500 Subject: [PATCH 210/491] usb: gadget: uvc: allow for application to cleanly shutdown Several types of kernel panics can occur due to timing during the uvc gadget removal. This appears to be a problem with gadget resources being managed by both the client application's v4l2 open/close and the UDC gadget bind/unbind. Since the concept of USB_GADGET_DELAYED_STATUS doesn't exist for unbind, add a wait to allow for the application to close out. Some examples of the panics that can occur are: <1>[ 1147.652313] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000028 <4>[ 1147.652510] Call trace: <4>[ 1147.652514] usb_gadget_disconnect+0x74/0x1f0 <4>[ 1147.652516] usb_gadget_deactivate+0x38/0x168 <4>[ 1147.652520] usb_function_deactivate+0x54/0x90 <4>[ 1147.652524] uvc_function_disconnect+0x14/0x38 <4>[ 1147.652527] uvc_v4l2_release+0x34/0xa0 <4>[ 1147.652537] __fput+0xdc/0x2c0 <4>[ 1147.652540] ____fput+0x10/0x1c <4>[ 1147.652545] task_work_run+0xe4/0x12c <4>[ 1147.652549] do_notify_resume+0x108/0x168 <1>[ 282.950561][ T1472] Unable to handle kernel NULL pointer dereference at virtual address 00000000000005b8 <6>[ 282.953111][ T1472] Call trace: <6>[ 282.953121][ T1472] usb_function_deactivate+0x54/0xd4 <6>[ 282.953134][ T1472] uvc_v4l2_release+0xac/0x1e4 <6>[ 282.953145][ T1472] v4l2_release+0x134/0x1f0 <6>[ 282.953167][ T1472] __fput+0xf4/0x428 <6>[ 282.953178][ T1472] ____fput+0x14/0x24 <6>[ 282.953193][ T1472] task_work_run+0xac/0x130 <3>[ 213.410077][ T29] configfs-gadget gadget: uvc: Failed to queue request (-108). <1>[ 213.410116][ T29] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000003 <6>[ 213.413460][ T29] Call trace: <6>[ 213.413474][ T29] uvcg_video_pump+0x1f0/0x384 <6>[ 213.413489][ T29] process_one_work+0x2a4/0x544 <6>[ 213.413502][ T29] worker_thread+0x350/0x784 <6>[ 213.413515][ T29] kthread+0x2ac/0x320 <6>[ 213.413528][ T29] ret_from_fork+0x10/0x30 Signed-off-by: Dan Vacura Cc: stable Link: https://lore.kernel.org/r/20220503201039.71720-1-w36195@motorola.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uvc.c | 25 +++++++++++++++++++++++++ drivers/usb/gadget/function/uvc.h | 2 ++ drivers/usb/gadget/function/uvc_v4l2.c | 3 ++- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 71bb5e477dbad..d37965867b230 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -890,13 +890,37 @@ static void uvc_function_unbind(struct usb_configuration *c, { struct usb_composite_dev *cdev = c->cdev; struct uvc_device *uvc = to_uvc(f); + long wait_ret = 1; uvcg_info(f, "%s()\n", __func__); + /* If we know we're connected via v4l2, then there should be a cleanup + * of the device from userspace either via UVC_EVENT_DISCONNECT or + * though the video device removal uevent. Allow some time for the + * application to close out before things get deleted. + */ + if (uvc->func_connected) { + uvcg_dbg(f, "waiting for clean disconnect\n"); + wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, + uvc->func_connected == false, msecs_to_jiffies(500)); + uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret); + } + device_remove_file(&uvc->vdev.dev, &dev_attr_function_name); video_unregister_device(&uvc->vdev); v4l2_device_unregister(&uvc->v4l2_dev); + if (uvc->func_connected) { + /* Wait for the release to occur to ensure there are no longer any + * pending operations that may cause panics when resources are cleaned + * up. + */ + uvcg_warn(f, "%s no clean disconnect, wait for release\n", __func__); + wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, + uvc->func_connected == false, msecs_to_jiffies(1000)); + uvcg_dbg(f, "done waiting for release with ret: %ld\n", wait_ret); + } + usb_ep_free_request(cdev->gadget->ep0, uvc->control_req); kfree(uvc->control_buf); @@ -915,6 +939,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi) mutex_init(&uvc->video.mutex); uvc->state = UVC_STATE_DISCONNECTED; + init_waitqueue_head(&uvc->func_connected_queue); opts = fi_to_f_uvc_opts(fi); mutex_lock(&opts->lock); diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h index c3607a32b9862..886103a1fe9b7 100644 --- a/drivers/usb/gadget/function/uvc.h +++ b/drivers/usb/gadget/function/uvc.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -129,6 +130,7 @@ struct uvc_device { struct usb_function func; struct uvc_video video; bool func_connected; + wait_queue_head_t func_connected_queue; /* Descriptors */ struct { diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index a2c78690c5c28..fd8f73bb726dd 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -253,10 +253,11 @@ uvc_v4l2_subscribe_event(struct v4l2_fh *fh, static void uvc_v4l2_disable(struct uvc_device *uvc) { - uvc->func_connected = false; uvc_function_disconnect(uvc); uvcg_video_enable(&uvc->video, 0); uvcg_free_buffers(&uvc->video.queue); + uvc->func_connected = false; + wake_up_interruptible(&uvc->func_connected_queue); } static int From 447ee1516f19f534a228dda237eddb202f23e163 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 5 May 2022 20:46:21 +0800 Subject: [PATCH 211/491] tty/serial: digicolor: fix possible null-ptr-deref in digicolor_uart_probe() It will cause null-ptr-deref when using 'res', if platform_get_resource() returns NULL, so move using 'res' after devm_ioremap_resource() that will check it to avoid null-ptr-deref. And use devm_platform_get_and_ioremap_resource() to simplify code. Fixes: 5930cb3511df ("serial: driver for Conexant Digicolor USART") Signed-off-by: Yang Yingliang Reviewed-by: Baruch Siach Cc: stable Link: https://lore.kernel.org/r/20220505124621.1592697-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/digicolor-usart.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c index 6d70fea76bb3e..e37a917b9dbbc 100644 --- a/drivers/tty/serial/digicolor-usart.c +++ b/drivers/tty/serial/digicolor-usart.c @@ -471,11 +471,10 @@ static int digicolor_uart_probe(struct platform_device *pdev) if (IS_ERR(uart_clk)) return PTR_ERR(uart_clk); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - dp->port.mapbase = res->start; - dp->port.membase = devm_ioremap_resource(&pdev->dev, res); + dp->port.membase = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(dp->port.membase)) return PTR_ERR(dp->port.membase); + dp->port.mapbase = res->start; irq = platform_get_irq(pdev, 0); if (irq < 0) From bb0b197aadd928f52ce6f01f0ee977f0a08cf1be Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 27 Apr 2022 15:23:26 +0200 Subject: [PATCH 212/491] serial: 8250_mtk: Fix UART_EFR register address On MediaTek SoCs, the UART IP is 16550A compatible, but there are some specific quirks: we are declaring a register shift of 2, but this is only valid for the majority of the registers, as there are some that are out of the standard layout. Specifically, this driver is using definitions from serial_reg.h, where we have a UART_EFR register defined as 2: this results in a 0x8 offset, but there we have the FCR register instead. The right offset for the EFR register on MediaTek UART is at 0x98, so, following the decimal definition convention in serial_reg.h and accounting for the register left shift of two, add and use the correct register address for this IP, defined as decimal 38, so that the final calculation results in (0x26 << 2) = 0x98. Fixes: bdbd0a7f8f03 ("serial: 8250-mtk: modify baudrate setting") Signed-off-by: AngeloGioacchino Del Regno Cc: stable Link: https://lore.kernel.org/r/20220427132328.228297-2-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index f4a0caa56f84a..cd62a5f340149 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -37,6 +37,7 @@ #define MTK_UART_IER_RTSI 0x40 /* Enable RTS Modem status interrupt */ #define MTK_UART_IER_CTSI 0x80 /* Enable CTS Modem status interrupt */ +#define MTK_UART_EFR 38 /* I/O: Extended Features Register */ #define MTK_UART_EFR_EN 0x10 /* Enable enhancement feature */ #define MTK_UART_EFR_RTS 0x40 /* Enable hardware rx flow control */ #define MTK_UART_EFR_CTS 0x80 /* Enable hardware tx flow control */ @@ -169,7 +170,7 @@ static void mtk8250_dma_enable(struct uart_8250_port *up) MTK_UART_DMA_EN_RX | MTK_UART_DMA_EN_TX); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, UART_EFR_ECB); + serial_out(up, MTK_UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, lcr); if (dmaengine_slave_config(dma->rxchan, &dma->rxconf) != 0) @@ -232,7 +233,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) int lcr = serial_in(up, UART_LCR); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, UART_EFR_ECB); + serial_out(up, MTK_UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, lcr); lcr = serial_in(up, UART_LCR); @@ -241,7 +242,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) serial_out(up, MTK_UART_ESCAPE_DAT, MTK_UART_ESCAPE_CHAR); serial_out(up, MTK_UART_ESCAPE_EN, 0x00); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, serial_in(up, UART_EFR) & + serial_out(up, MTK_UART_EFR, serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))); serial_out(up, UART_LCR, lcr); mtk8250_disable_intrs(up, MTK_UART_IER_XOFFI | @@ -255,8 +256,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); /*enable hw flow control*/ - serial_out(up, UART_EFR, MTK_UART_EFR_HW_FC | - (serial_in(up, UART_EFR) & + serial_out(up, MTK_UART_EFR, MTK_UART_EFR_HW_FC | + (serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)))); serial_out(up, UART_LCR, lcr); @@ -270,8 +271,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); /*enable sw flow control */ - serial_out(up, UART_EFR, MTK_UART_EFR_XON1_XOFF1 | - (serial_in(up, UART_EFR) & + serial_out(up, MTK_UART_EFR, MTK_UART_EFR_XON1_XOFF1 | + (serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)))); serial_out(up, UART_XON1, START_CHAR(port->state->port.tty)); From 6f81fdded0d024c7d4084d434764f30bca1cd6b1 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 27 Apr 2022 15:23:27 +0200 Subject: [PATCH 213/491] serial: 8250_mtk: Make sure to select the right FEATURE_SEL Set the FEATURE_SEL at probe time to make sure that BIT(0) is enabled: this guarantees that when the port is configured as AP UART, the right register layout is interpreted by the UART IP. Signed-off-by: AngeloGioacchino Del Regno Cc: stable Link: https://lore.kernel.org/r/20220427132328.228297-3-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index cd62a5f340149..28e36459642ce 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -54,6 +54,9 @@ #define MTK_UART_TX_TRIGGER 1 #define MTK_UART_RX_TRIGGER MTK_UART_RX_SIZE +#define MTK_UART_FEATURE_SEL 39 /* Feature Selection register */ +#define MTK_UART_FEAT_NEWRMAP BIT(0) /* Use new register map */ + #ifdef CONFIG_SERIAL_8250_DMA enum dma_rx_status { DMA_RX_START = 0, @@ -569,6 +572,10 @@ static int mtk8250_probe(struct platform_device *pdev) uart.dma = data->dma; #endif + /* Set AP UART new register map */ + writel(MTK_UART_FEAT_NEWRMAP, uart.port.membase + + (MTK_UART_FEATURE_SEL << uart.port.regshift)); + /* Disable Rate Fix function */ writel(0x0, uart.port.membase + (MTK_UART_RATE_FIX << uart.port.regshift)); From e1bfdbc7daca171c74a577b3dd0b36d76bb0ffcc Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 27 Apr 2022 15:23:28 +0200 Subject: [PATCH 214/491] serial: 8250_mtk: Fix register address for XON/XOFF character The XON1/XOFF1 character registers are at offset 0xa0 and 0xa8 respectively, so we cannot use the definition in serial_port.h. Fixes: bdbd0a7f8f03 ("serial: 8250-mtk: modify baudrate setting") Signed-off-by: AngeloGioacchino Del Regno Cc: stable Link: https://lore.kernel.org/r/20220427132328.228297-4-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index 28e36459642ce..21053db93ff1e 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -57,6 +57,9 @@ #define MTK_UART_FEATURE_SEL 39 /* Feature Selection register */ #define MTK_UART_FEAT_NEWRMAP BIT(0) /* Use new register map */ +#define MTK_UART_XON1 40 /* I/O: Xon character 1 */ +#define MTK_UART_XOFF1 42 /* I/O: Xoff character 1 */ + #ifdef CONFIG_SERIAL_8250_DMA enum dma_rx_status { DMA_RX_START = 0, @@ -278,8 +281,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) (serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)))); - serial_out(up, UART_XON1, START_CHAR(port->state->port.tty)); - serial_out(up, UART_XOFF1, STOP_CHAR(port->state->port.tty)); + serial_out(up, MTK_UART_XON1, START_CHAR(port->state->port.tty)); + serial_out(up, MTK_UART_XOFF1, STOP_CHAR(port->state->port.tty)); serial_out(up, UART_LCR, lcr); mtk8250_disable_intrs(up, MTK_UART_IER_CTSI|MTK_UART_IER_RTSI); mtk8250_enable_intrs(up, MTK_UART_IER_XOFFI); From fd442e5ba30aaa75ea47b32149e7a3110dc20a46 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Wed, 4 May 2022 10:17:31 +0200 Subject: [PATCH 215/491] tty: n_gsm: fix buffer over-read in gsm_dlci_data() 'len' is decreased after each octet that has its EA bit set to 0, which means that the value is encoded with additional octets. However, the final octet does not decreases 'len' which results in 'len' being one byte too long. A buffer over-read may occur in tty_insert_flip_string() as it tries to read one byte more than the passed content size of 'data'. Decrease 'len' also for the final octet which has the EA bit set to 1 to write the correct number of bytes from the internal receive buffer to the virtual tty. Fixes: 2e124b4a390c ("TTY: switch tty_flip_buffer_push") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220504081733.3494-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index a38b922bcbc10..9b0b435cf26ea 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1658,6 +1658,7 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen) if (len == 0) return; } + len--; slen++; tty = tty_port_tty_get(port); if (tty) { From edd5f60c340086891fab094ad61270d6c80f9ca4 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Wed, 4 May 2022 10:17:32 +0200 Subject: [PATCH 216/491] tty: n_gsm: fix mux activation issues in gsm_config() The current implementation activates the mux if it was restarted and opens the control channel if the mux was previously closed and we are now acting as initiator instead of responder, which is the default setting. This has two issues. 1) No mux is activated if we keep all default values and only switch to initiator. The control channel is not allocated but will be opened next which results in a NULL pointer dereference. 2) Switching the configuration after it was once configured while keeping the initiator value the same will not reopen the control channel if it was closed due to parameter incompatibilities. The mux remains dead. Fix 1) by always activating the mux if it is dead after configuration. Fix 2) by always opening the control channel after mux activation. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220504081733.3494-2-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 9b0b435cf26ea..bcb714031d691 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2352,6 +2352,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm, static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) { + int ret = 0; int need_close = 0; int need_restart = 0; @@ -2419,10 +2420,13 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) * FIXME: We need to separate activation/deactivation from adding * and removing from the mux array */ - if (need_restart) - gsm_activate_mux(gsm); - if (gsm->initiator && need_close) - gsm_dlci_begin_open(gsm->dlci[0]); + if (gsm->dead) { + ret = gsm_activate_mux(gsm); + if (ret) + return ret; + if (gsm->initiator) + gsm_dlci_begin_open(gsm->dlci[0]); + } return 0; } From 9361ebfbb79fd1bc8594a487c01ad52cdaa391ea Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Wed, 4 May 2022 10:17:33 +0200 Subject: [PATCH 217/491] tty: n_gsm: fix invalid gsmtty_write_room() result gsmtty_write() does not prevent the user to use the full fifo size of 4096 bytes as allocated in gsm_dlci_alloc(). However, gsmtty_write_room() tries to limit the return value by 'TX_SIZE' and returns a negative value if the fifo has more than 'TX_SIZE' bytes stored. This is obviously wrong as 'TX_SIZE' is defined as 512. Define 'TX_SIZE' to the fifo size and use it accordingly for allocation to keep the current behavior. Return the correct remaining size of the fifo in gsmtty_write_room() via kfifo_avail(). Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220504081733.3494-3-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index bcb714031d691..fd8b86dde5255 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -137,6 +137,7 @@ struct gsm_dlci { int retries; /* Uplink tty if active */ struct tty_port port; /* The tty bound to this DLCI if there is one */ +#define TX_SIZE 4096 /* Must be power of 2. */ struct kfifo fifo; /* Queue fifo for the DLCI */ int adaption; /* Adaption layer in use */ int prev_adaption; @@ -1731,7 +1732,7 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr) return NULL; spin_lock_init(&dlci->lock); mutex_init(&dlci->mutex); - if (kfifo_alloc(&dlci->fifo, 4096, GFP_KERNEL) < 0) { + if (kfifo_alloc(&dlci->fifo, TX_SIZE, GFP_KERNEL) < 0) { kfree(dlci); return NULL; } @@ -2976,8 +2977,6 @@ static struct tty_ldisc_ops tty_ldisc_packet = { * Virtual tty side */ -#define TX_SIZE 512 - /** * gsm_modem_upd_via_data - send modem bits via convergence layer * @dlci: channel @@ -3217,7 +3216,7 @@ static unsigned int gsmtty_write_room(struct tty_struct *tty) struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return 0; - return TX_SIZE - kfifo_len(&dlci->fifo); + return kfifo_avail(&dlci->fifo); } static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty) From 401fb66a355eb0f22096cf26864324f8e63c7d78 Mon Sep 17 00:00:00 2001 From: Indan Zupancic Date: Thu, 5 May 2022 13:47:50 +0200 Subject: [PATCH 218/491] fsl_lpuart: Don't enable interrupts too early If an irq is pending when devm_request_irq() is called, the irq handler will cause a NULL pointer access because initialisation is not done yet. Fixes: 9d7ee0e28da59 ("tty: serial: lpuart: avoid report NULL interrupt") Cc: stable Signed-off-by: Indan Zupancic Link: https://lore.kernel.org/r/20220505114750.45423-1-Indan.Zupancic@mep-info.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 87789872f4006..be12fee94db55 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2664,6 +2664,7 @@ static int lpuart_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct lpuart_port *sport; struct resource *res; + irq_handler_t handler; int ret; sport = devm_kzalloc(&pdev->dev, sizeof(*sport), GFP_KERNEL); @@ -2741,17 +2742,11 @@ static int lpuart_probe(struct platform_device *pdev) if (lpuart_is_32(sport)) { lpuart_reg.cons = LPUART32_CONSOLE; - ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart32_int, 0, - DRIVER_NAME, sport); + handler = lpuart32_int; } else { lpuart_reg.cons = LPUART_CONSOLE; - ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart_int, 0, - DRIVER_NAME, sport); + handler = lpuart_int; } - - if (ret) - goto failed_irq_request; - ret = uart_add_one_port(&lpuart_reg, &sport->port); if (ret) goto failed_attach_port; @@ -2773,13 +2768,18 @@ static int lpuart_probe(struct platform_device *pdev) sport->port.rs485_config(&sport->port, &sport->port.rs485); + ret = devm_request_irq(&pdev->dev, sport->port.irq, handler, 0, + DRIVER_NAME, sport); + if (ret) + goto failed_irq_request; + return 0; +failed_irq_request: failed_get_rs485: failed_reset: uart_remove_one_port(&lpuart_reg, &sport->port); failed_attach_port: -failed_irq_request: lpuart_disable_clks(sport); failed_clock_enable: failed_out_of_range: From 6997fbd7a3dafa754f81d541498ace35b43246d8 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 5 May 2022 10:53:53 +0900 Subject: [PATCH 219/491] net: rds: use maybe_get_net() when acquiring refcount on TCP sockets Eric Dumazet is reporting addition on 0 problem at rds_tcp_tune(), for delayed works queued in rds_wq might be invoked after a net namespace's refcount already reached 0. Since rds_tcp_exit_net() from cleanup_net() calls flush_workqueue(rds_wq), it is guaranteed that we can instead use maybe_get_net() from delayed work functions until rds_tcp_exit_net() returns. Note that I'm not convinced that all works which might access a net namespace are already queued in rds_wq by the moment rds_tcp_exit_net() calls flush_workqueue(rds_wq). If some race is there, rds_tcp_exit_net() will fail to wait for work functions, and kmem_cache_free() could be called from net_free() before maybe_get_net() is called from rds_tcp_tune(). Reported-by: Eric Dumazet Fixes: 3a58f13a881ed351 ("net: rds: acquire refcount on TCP sockets") Signed-off-by: Tetsuo Handa Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/41d09faf-bc78-1a87-dfd1-c6d1b5984b61@I-love.SAKURA.ne.jp Signed-off-by: Jakub Kicinski --- net/rds/tcp.c | 12 +++++++++--- net/rds/tcp.h | 2 +- net/rds/tcp_connect.c | 5 ++++- net/rds/tcp_listen.c | 5 ++++- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 2f638f8b7b1e7..73ee2771093d6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -487,11 +487,11 @@ struct rds_tcp_net { /* All module specific customizations to the RDS-TCP socket should be done in * rds_tcp_tune() and applied after socket creation. */ -void rds_tcp_tune(struct socket *sock) +bool rds_tcp_tune(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct rds_tcp_net *rtn; tcp_sock_set_nodelay(sock->sk); lock_sock(sk); @@ -499,10 +499,15 @@ void rds_tcp_tune(struct socket *sock) * a process which created this net namespace terminated. */ if (!sk->sk_net_refcnt) { + if (!maybe_get_net(net)) { + release_sock(sk); + return false; + } sk->sk_net_refcnt = 1; - get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); } + rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; sk->sk_userlocks |= SOCK_SNDBUF_LOCK; @@ -512,6 +517,7 @@ void rds_tcp_tune(struct socket *sock) sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } release_sock(sk); + return true; } static void rds_tcp_accept_worker(struct work_struct *work) diff --git a/net/rds/tcp.h b/net/rds/tcp.h index dc8d745d68575..f8b5930d7b343 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -49,7 +49,7 @@ struct rds_tcp_statistics { }; /* tcp.c */ -void rds_tcp_tune(struct socket *sock); +bool rds_tcp_tune(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 5461d77fff4f4..f0c477c5d1db4 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) if (ret < 0) goto out; - rds_tcp_tune(sock); + if (!rds_tcp_tune(sock)) { + ret = -EINVAL; + goto out; + } if (isv6) { sin6.sin6_family = AF_INET6; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 09cadd556d1e1..7edf2e69d3fed 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock) __module_get(new_sock->ops->owner); rds_tcp_keepalive(new_sock); - rds_tcp_tune(new_sock); + if (!rds_tcp_tune(new_sock)) { + ret = -EINVAL; + goto out; + } inet = inet_sk(new_sock->sk); From e333eed63a091a09bd0db191b7710c594c6e995b Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 4 May 2022 11:31:03 -0300 Subject: [PATCH 220/491] net: phy: micrel: Do not use kszphy_suspend/resume for KSZ8061 Since commit f1131b9c23fb ("net: phy: micrel: use kszphy_suspend()/kszphy_resume for irq aware devices") the following NULL pointer dereference is observed on a board with KSZ8061: # udhcpc -i eth0 udhcpc: started, v1.35.0 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = f73cef4e [00000008] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM Modules linked in: CPU: 0 PID: 196 Comm: ifconfig Not tainted 5.15.37-dirty #94 Hardware name: Freescale i.MX6 SoloX (Device Tree) PC is at kszphy_config_reset+0x10/0x114 LR is at kszphy_resume+0x24/0x64 ... The KSZ8061 phy_driver structure does not have the .probe/..driver_data fields, which means that priv is not allocated. This causes the NULL pointer dereference inside kszphy_config_reset(). Fix the problem by using the generic suspend/resume functions as before. Another alternative would be to provide the .probe and .driver_data information into the structure, but to be on the safe side, let's just restore Ethernet functionality by using the generic suspend/resume. Cc: stable@vger.kernel.org Fixes: f1131b9c23fb ("net: phy: micrel: use kszphy_suspend()/kszphy_resume for irq aware devices") Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220504143104.1286960-1-festevam@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index fc53b71dc872b..7c243cedde9f8 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2782,8 +2782,8 @@ static struct phy_driver ksphy_driver[] = { .config_init = ksz8061_config_init, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, - .suspend = kszphy_suspend, - .resume = kszphy_resume, + .suspend = genphy_suspend, + .resume = genphy_resume, }, { .phy_id = PHY_ID_KSZ9021, .phy_id_mask = 0x000ffffe, From 15f03ffe4bb951e982457f44b6cf6b06ef4cbb93 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 4 May 2022 11:31:04 -0300 Subject: [PATCH 221/491] net: phy: micrel: Pass .probe for KS8737 Since commit f1131b9c23fb ("net: phy: micrel: use kszphy_suspend()/kszphy_resume for irq aware devices") the kszphy_suspend/ resume hooks are used. These functions require the probe function to be called so that priv can be allocated. Otherwise, a NULL pointer dereference happens inside kszphy_config_reset(). Cc: stable@vger.kernel.org Fixes: f1131b9c23fb ("net: phy: micrel: use kszphy_suspend()/kszphy_resume for irq aware devices") Reported-by: Andrew Lunn Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220504143104.1286960-2-festevam@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 7c243cedde9f8..9d7dafed39318 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2657,6 +2657,7 @@ static struct phy_driver ksphy_driver[] = { .name = "Micrel KS8737", /* PHY_BASIC_FEATURES */ .driver_data = &ks8737_type, + .probe = kszphy_probe, .config_init = kszphy_config_init, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, From e1a7ac6f3ba6e157adcd0ca94d92a401f1943f56 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 4 May 2022 11:07:38 +0200 Subject: [PATCH 222/491] ping: fix address binding wrt vrf When ping_group_range is updated, 'ping' uses the DGRAM ICMP socket, instead of an IP raw socket. In this case, 'ping' is unable to bind its socket to a local address owned by a vrflite. Before the patch: $ sysctl -w net.ipv4.ping_group_range='0 2147483647' $ ip link add blue type vrf table 10 $ ip link add foo type dummy $ ip link set foo master blue $ ip link set foo up $ ip addr add 192.168.1.1/24 dev foo $ ip addr add 2001::1/64 dev foo $ ip vrf exec blue ping -c1 -I 192.168.1.1 192.168.1.2 ping: bind: Cannot assign requested address $ ip vrf exec blue ping6 -c1 -I 2001::1 2001::2 ping6: bind icmp socket: Cannot assign requested address CC: stable@vger.kernel.org Fixes: 1b69c6d0ae90 ("net: Introduce L3 Master device abstraction") Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/ping.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3ee947557b883..aa9a11b20d18e 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -305,6 +305,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, struct net *net = sock_net(sk); if (sk->sk_family == AF_INET) { struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + u32 tb_id = RT_TABLE_LOCAL; int chk_addr_ret; if (addr_len < sizeof(*addr)) @@ -318,7 +319,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); - chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; + chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk), addr->sin_addr.s_addr, @@ -355,6 +357,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, return -ENODEV; } } + + if (!dev && sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, scoped); rcu_read_unlock(); From e71b7f1f44d3d88c677769c85ef0171caf9fc89f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 4 May 2022 11:07:39 +0200 Subject: [PATCH 223/491] selftests: add ping test with ping_group_range tuned The 'ping' utility is able to manage two kind of sockets (raw or icmp), depending on the sysctl ping_group_range. By default, ping_group_range is set to '1 0', which forces ping to use an ip raw socket. Let's replay the ping tests by allowing 'ping' to use the ip icmp socket. After the previous patch, ipv4 tests results are the same with both kinds of socket. For ipv6, there are a lot a new failures (the previous patch fixes only two cases). Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 47c4d4b4a44af..54701c8b0cd70 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -810,10 +810,16 @@ ipv4_ping() setup set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null ipv4_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_novrf log_subsection "With VRF" setup "yes" ipv4_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_vrf } ################################################################################ @@ -2348,10 +2354,16 @@ ipv6_ping() log_subsection "No VRF" setup ipv6_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_novrf log_subsection "With VRF" setup "yes" ipv6_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_vrf } ################################################################################ From 85db6352fc8a158a893151baa1716463d34a20d0 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 4 May 2022 11:09:14 +0300 Subject: [PATCH 224/491] net: Fix features skip in for_each_netdev_feature() The find_next_netdev_feature() macro gets the "remaining length", not bit index. Passing "bit - 1" for the following iteration is wrong as it skips the adjacent bit. Pass "bit" instead. Fixes: 3b89ea9c5902 ("net: Fix for_each_netdev_feature on Big endian") Signed-off-by: Tariq Toukan Reviewed-by: Gal Pressman Link: https://lore.kernel.org/r/20220504080914.1918-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/netdev_features.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 2c6b9e4162254..7c2d77d75a888 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -169,7 +169,7 @@ enum { #define NETIF_F_HW_HSR_FWD __NETIF_F(HW_HSR_FWD) #define NETIF_F_HW_HSR_DUP __NETIF_F(HW_HSR_DUP) -/* Finds the next feature with the highest number of the range of start till 0. +/* Finds the next feature with the highest number of the range of start-1 till 0. */ static inline int find_next_netdev_feature(u64 feature, unsigned long start) { @@ -188,7 +188,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) for ((bit) = find_next_netdev_feature((mask_addr), \ NETDEV_FEATURE_COUNT); \ (bit) >= 0; \ - (bit) = find_next_netdev_feature((mask_addr), (bit) - 1)) + (bit) = find_next_netdev_feature((mask_addr), (bit))) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ From 4e707344e18525b4edf5c2bc2e3eb60692e8c92e Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Wed, 4 May 2022 14:59:08 -0400 Subject: [PATCH 225/491] MAINTAINERS: add missing files for bonding definition The bonding entry did not include additional include files that have been added nor did it reference the documentation. Add these references for completeness. Signed-off-by: Jonathan Toppins Link: https://lore.kernel.org/r/903ed2906b93628b38a2015664a20d2802042863.1651690748.git.jtoppins@redhat.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index edc96cdb85e85..15037bd1ce484 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3571,8 +3571,9 @@ M: Andy Gospodarek L: netdev@vger.kernel.org S: Supported W: http://sourceforge.net/projects/bonding/ +F: Documentation/networking/bonding.rst F: drivers/net/bonding/ -F: include/net/bonding.h +F: include/net/bond* F: include/uapi/linux/if_bonding.h BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER From 5b53a405e4658580e1faf7c217db3f55a21ba849 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 5 May 2022 16:17:29 +0200 Subject: [PATCH 226/491] s390/dasd: fix data corruption for ESE devices For ESE devices we get an error when accessing an unformatted track. The handling of this error will return zero data for read requests and format the track on demand before writing to it. To do this the code needs to distinguish between read and write requests. This is done with data from the blocklayer request. A pointer to the blocklayer request is stored in the CQR. If there is an error on the device an ERP request is built to do error recovery. While the ERP request is mostly a copy of the original CQR the pointer to the blocklayer request is not copied to not accidentally pass it back to the blocklayer without cleanup. This leads to the error that during ESE handling after an ERP request was built it is not possible to determine the IO direction. This leads to the formatting of a track for read requests which might in turn lead to data corruption. Fixes: 5e2b17e712cf ("s390/dasd: Add dynamic formatting support for ESE volumes") Cc: stable@vger.kernel.org # 5.3+ Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20220505141733.1989450-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 8 +++++++- drivers/s390/block/dasd_eckd.c | 2 +- drivers/s390/block/dasd_int.h | 12 ++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 8e87a31e329d0..76d13c5ff205c 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1639,6 +1639,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, unsigned long now; int nrf_suppressed = 0; int fp_suppressed = 0; + struct request *req; u8 *sense = NULL; int expires; @@ -1739,7 +1740,12 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, } if (dasd_ese_needs_format(cqr->block, irb)) { - if (rq_data_dir((struct request *)cqr->callback_data) == READ) { + req = dasd_get_callback_data(cqr); + if (!req) { + cqr->status = DASD_CQR_ERROR; + return; + } + if (rq_data_dir(req) == READ) { device->discipline->ese_read(cqr, irb); cqr->status = DASD_CQR_SUCCESS; cqr->stopclk = now; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 8410a25a65c13..e3583502aca23 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3145,7 +3145,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr, sector_t curr_trk; int rc; - req = cqr->callback_data; + req = dasd_get_callback_data(cqr); block = cqr->block; base = block->base; private = base->private; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 3b7af00a7825f..07f9670ea61e7 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -756,6 +756,18 @@ dasd_check_blocksize(int bsize) return 0; } +/* + * return the callback data of the original request in case there are + * ERP requests build on top of it + */ +static inline void *dasd_get_callback_data(struct dasd_ccw_req *cqr) +{ + while (cqr->refers) + cqr = cqr->refers; + + return cqr->callback_data; +} + /* externals in dasd.c */ #define DASD_PROFILE_OFF 0 #define DASD_PROFILE_ON 1 From 71f3871657370dbbaf942a1c758f64e49a36c70f Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 5 May 2022 16:17:30 +0200 Subject: [PATCH 227/491] s390/dasd: prevent double format of tracks for ESE devices For ESE devices we get an error for write operations on an unformatted track. Afterwards the track will be formatted and the IO operation restarted. When using alias devices a track might be accessed by multiple requests simultaneously and there is a race window that a track gets formatted twice resulting in data loss. Prevent this by remembering the amount of formatted tracks when starting a request and comparing this number before actually formatting a track on the fly. If the number has changed there is a chance that the current track was finally formatted in between. As a result do not format the track and restart the current IO to check. The number of formatted tracks does not match the overall number of formatted tracks on the device and it might wrap around but this is no problem. It is only needed to recognize that a track has been formatted at all in between. Fixes: 5e2b17e712cf ("s390/dasd: Add dynamic formatting support for ESE volumes") Cc: stable@vger.kernel.org # 5.3+ Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20220505141733.1989450-3-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 7 +++++++ drivers/s390/block/dasd_eckd.c | 19 +++++++++++++++++-- drivers/s390/block/dasd_int.h | 2 ++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 76d13c5ff205c..d62a4c673962c 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1422,6 +1422,13 @@ int dasd_start_IO(struct dasd_ccw_req *cqr) if (!cqr->lpm) cqr->lpm = dasd_path_get_opm(device); } + /* + * remember the amount of formatted tracks to prevent double format on + * ESE devices + */ + if (cqr->block) + cqr->trkcount = atomic_read(&cqr->block->trkcount); + if (cqr->cpmode == 1) { rc = ccw_device_tm_start(device->cdev, cqr->cpaddr, (long) cqr, cqr->lpm); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index e3583502aca23..649eba51e0485 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3083,13 +3083,24 @@ static int dasd_eckd_format_device(struct dasd_device *base, } static bool test_and_set_format_track(struct dasd_format_entry *to_format, - struct dasd_block *block) + struct dasd_ccw_req *cqr) { + struct dasd_block *block = cqr->block; struct dasd_format_entry *format; unsigned long flags; bool rc = false; spin_lock_irqsave(&block->format_lock, flags); + if (cqr->trkcount != atomic_read(&block->trkcount)) { + /* + * The number of formatted tracks has changed after request + * start and we can not tell if the current track was involved. + * To avoid data corruption treat it as if the current track is + * involved + */ + rc = true; + goto out; + } list_for_each_entry(format, &block->format_list, list) { if (format->track == to_format->track) { rc = true; @@ -3109,6 +3120,7 @@ static void clear_format_track(struct dasd_format_entry *format, unsigned long flags; spin_lock_irqsave(&block->format_lock, flags); + atomic_inc(&block->trkcount); list_del_init(&format->list); spin_unlock_irqrestore(&block->format_lock, flags); } @@ -3170,8 +3182,11 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr, } format->track = curr_trk; /* test if track is already in formatting by another thread */ - if (test_and_set_format_track(format, block)) + if (test_and_set_format_track(format, cqr)) { + /* this is no real error so do not count down retries */ + cqr->retries++; return ERR_PTR(-EEXIST); + } fdata.start_unit = curr_trk; fdata.stop_unit = curr_trk; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 07f9670ea61e7..83b918b84b4ae 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -187,6 +187,7 @@ struct dasd_ccw_req { void (*callback)(struct dasd_ccw_req *, void *data); void *callback_data; unsigned int proc_bytes; /* bytes for partial completion */ + unsigned int trkcount; /* count formatted tracks */ }; /* @@ -610,6 +611,7 @@ struct dasd_block { struct list_head format_list; spinlock_t format_lock; + atomic_t trkcount; }; struct dasd_attention_data { From cd68c48ea15c85f1577a442dc4c285e112ff1b37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Thu, 5 May 2022 16:17:31 +0200 Subject: [PATCH 228/491] s390/dasd: Fix read for ESE with blksize < 4k MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reading unformatted tracks on ESE devices, the corresponding memory areas are simply set to zero for each segment. This is done incorrectly for blocksizes < 4096. There are two problems. First, the increment of dst is done using the counter of the loop (off), which is increased by blksize every iteration. This leads to a much bigger increment for dst as actually intended. Second, the increment of dst is done before the memory area is set to 0, skipping a significant amount of bytes of memory. This leads to illegal overwriting of memory and ultimately to a kernel panic. This is not a problem with 4k blocksize because blk_queue_max_segment_size is set to PAGE_SIZE, always resulting in a single iteration for the inner segment loop (bv.bv_len == blksize). The incorrectly used 'off' value to increment dst is 0 and the correct memory area is used. In order to fix this for blksize < 4k, increment dst correctly using the blksize and only do it at the end of the loop. Fixes: 5e2b17e712cf ("s390/dasd: Add dynamic formatting support for ESE volumes") Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Link: https://lore.kernel.org/r/20220505141733.1989450-4-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 649eba51e0485..e46461b4d8a75 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3285,12 +3285,11 @@ static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb) cqr->proc_bytes = blk_count * blksize; return 0; } - if (dst && !skip_block) { - dst += off; + if (dst && !skip_block) memset(dst, 0, blksize); - } else { + else skip_block--; - } + dst += blksize; blk_count++; } } From b9c10f68e23c13f56685559a0d6fdaca9f838324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Thu, 5 May 2022 16:17:32 +0200 Subject: [PATCH 229/491] s390/dasd: Fix read inconsistency for ESE DASD devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read requests that return with NRF error are partially completed in dasd_eckd_ese_read(). The function keeps track of the amount of processed bytes and the driver will eventually return this information back to the block layer for further processing via __dasd_cleanup_cqr() when the request is in the final stage of processing (from the driver's perspective). For this, blk_update_request() is used which requires the number of bytes to complete the request. As per documentation the nr_bytes parameter is described as follows: "number of bytes to complete for @req". This was mistakenly interpreted as "number of bytes _left_ for @req" leading to new requests with incorrect data length. The consequence are inconsistent and completely wrong read requests as data from random memory areas are read back. Fix this by correctly specifying the amount of bytes that should be used to complete the request. Fixes: 5e6bdd37c552 ("s390/dasd: fix data corruption for thin provisioned devices") Cc: stable@vger.kernel.org # 5.3+ Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Link: https://lore.kernel.org/r/20220505141733.1989450-5-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index d62a4c673962c..ba6d787896606 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2778,8 +2778,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) * complete a request partially. */ if (proc_bytes) { - blk_update_request(req, BLK_STS_OK, - blk_rq_bytes(req) - proc_bytes); + blk_update_request(req, BLK_STS_OK, proc_bytes); blk_mq_requeue_request(req, true); } else if (likely(!blk_should_fake_timeout(req->q))) { blk_mq_complete_request(req); From f1c8781ac9d87650ccf45a354c0bbfa3f9230371 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 5 May 2022 16:17:33 +0200 Subject: [PATCH 230/491] s390/dasd: Use kzalloc instead of kmalloc/memset Use kzalloc rather than duplicating its implementation, which makes code simple and easy to understand. Signed-off-by: Haowen Bai Reviewed-by: Sven Schnelle Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20220505141733.1989450-6-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index e46461b4d8a75..836838f7d6867 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1480,7 +1480,7 @@ static int dasd_eckd_pe_handler(struct dasd_device *device, { struct pe_handler_work_data *data; - data = kmalloc(sizeof(*data), GFP_ATOMIC | GFP_DMA); + data = kzalloc(sizeof(*data), GFP_ATOMIC | GFP_DMA); if (!data) { if (mutex_trylock(&dasd_pe_handler_mutex)) { data = pe_handler_worker; @@ -1488,9 +1488,6 @@ static int dasd_eckd_pe_handler(struct dasd_device *device, } else { return -ENOMEM; } - } else { - memset(data, 0, sizeof(*data)); - data->isglobal = 0; } INIT_WORK(&data->worker, do_pe_handler_work); dasd_get_device(device); From e1846cff2fe614d93a2f89461b5935678fd34bd9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 May 2022 02:54:59 +0300 Subject: [PATCH 231/491] net: mscc: ocelot: mark traps with a bool instead of keeping them in a list Since the blamed commit, VCAP filters can appear on more than one list. If their action is "trap", they are chained on ocelot->traps via filter->trap_list. This is in addition to their normal placement on the VCAP block->rules list head. Therefore, when we free a VCAP filter, we must remove it from all lists it is a member of, including ocelot->traps. There are at least 2 bugs which are direct consequences of this design decision. First is the incorrect usage of list_empty(), meant to denote whether "filter" is chained into ocelot->traps via filter->trap_list. This does not do the correct thing, because list_empty() checks whether "head->next == head", but in our case, head->next == head->prev == NULL. So we dereference NULL pointers and die when we call list_del(). Second is the fact that not all places that should remove the filter from ocelot->traps do so. One example is ocelot_vcap_block_remove_filter(), which is where we have the main kfree(filter). By keeping freed filters in ocelot->traps we end up in a use-after-free in felix_update_trapping_destinations(). Attempting to fix all the buggy patterns is a whack-a-mole game which makes the driver unmaintainable. Actually this is what the previous patch version attempted to do: https://patchwork.kernel.org/project/netdevbpf/patch/20220503115728.834457-3-vladimir.oltean@nxp.com/ but it introduced another set of bugs, because there are other places in which create VCAP filters, not just ocelot_vcap_filter_create(): - ocelot_trap_add() - felix_tag_8021q_vlan_add_rx() - felix_tag_8021q_vlan_add_tx() Relying on the convention that all those code paths must call INIT_LIST_HEAD(&filter->trap_list) is not going to scale. So let's do what should have been done in the first place and keep a bool in struct ocelot_vcap_filter which denotes whether we are looking at a trapping rule or not. Iterating now happens over the main VCAP IS2 block->rules. The advantage is that we no longer risk having stale references to a freed filter, since it is only present in that list. Fixes: e42bd4ed09aa ("net: mscc: ocelot: keep traps in a list") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 7 ++++++- drivers/net/ethernet/mscc/ocelot.c | 11 +++-------- drivers/net/ethernet/mscc/ocelot_flower.c | 4 +--- include/soc/mscc/ocelot_vcap.h | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 9e28219b223df..faccfb3f01583 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -403,6 +403,7 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds, { struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); + struct ocelot_vcap_block *block_vcap_is2; struct ocelot_vcap_filter *trap; enum ocelot_mask_mode mask_mode; unsigned long port_mask; @@ -422,9 +423,13 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds, /* We are sure that "cpu" was found, otherwise * dsa_tree_setup_default_cpu() would have failed earlier. */ + block_vcap_is2 = &ocelot->block[VCAP_IS2]; /* Make sure all traps are set up for that destination */ - list_for_each_entry(trap, &ocelot->traps, trap_list) { + list_for_each_entry(trap, &block_vcap_is2->rules, list) { + if (!trap->is_trap) + continue; + /* Figure out the current trapping destination */ if (using_tag_8021q) { /* Redirect to the tag_8021q CPU port. If timestamps diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ca71b62a44dc3..20ceac81a2c2c 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1622,7 +1622,7 @@ int ocelot_trap_add(struct ocelot *ocelot, int port, trap->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY; trap->action.port_mask = 0; trap->take_ts = take_ts; - list_add_tail(&trap->trap_list, &ocelot->traps); + trap->is_trap = true; new = true; } @@ -1634,10 +1634,8 @@ int ocelot_trap_add(struct ocelot *ocelot, int port, err = ocelot_vcap_filter_replace(ocelot, trap); if (err) { trap->ingress_port_mask &= ~BIT(port); - if (!trap->ingress_port_mask) { - list_del(&trap->trap_list); + if (!trap->ingress_port_mask) kfree(trap); - } return err; } @@ -1657,11 +1655,8 @@ int ocelot_trap_del(struct ocelot *ocelot, int port, unsigned long cookie) return 0; trap->ingress_port_mask &= ~BIT(port); - if (!trap->ingress_port_mask) { - list_del(&trap->trap_list); - + if (!trap->ingress_port_mask) return ocelot_vcap_filter_del(ocelot, trap); - } return ocelot_vcap_filter_replace(ocelot, trap); } diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 03b5e59d033e4..a9b26b3002be6 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -295,7 +295,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, filter->action.cpu_copy_ena = true; filter->action.cpu_qu_num = 0; filter->type = OCELOT_VCAP_FILTER_OFFLOAD; - list_add_tail(&filter->trap_list, &ocelot->traps); + filter->is_trap = true; break; case FLOW_ACTION_POLICE: if (filter->block_id == PSFP_BLOCK_ID) { @@ -878,8 +878,6 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port, ret = ocelot_flower_parse(ocelot, port, ingress, f, filter); if (ret) { - if (!list_empty(&filter->trap_list)) - list_del(&filter->trap_list); kfree(filter); return ret; } diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 7b2bf9b1fe697..de26c992f8214 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -681,7 +681,6 @@ struct ocelot_vcap_id { struct ocelot_vcap_filter { struct list_head list; - struct list_head trap_list; enum ocelot_vcap_filter_type type; int block_id; @@ -695,6 +694,7 @@ struct ocelot_vcap_filter { struct ocelot_vcap_stats stats; /* For VCAP IS1 and IS2 */ bool take_ts; + bool is_trap; unsigned long ingress_port_mask; /* For VCAP ES0 */ struct ocelot_vcap_port ingress_port; From 16bbebd35629c93a8c68c6d8d28557e100bcee73 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 May 2022 02:55:00 +0300 Subject: [PATCH 232/491] net: mscc: ocelot: fix last VCAP IS1/IS2 filter persisting in hardware when deleted ocelot_vcap_filter_del() works by moving the next filters over the current one, and then deleting the last filter by calling vcap_entry_set() with a del_filter which was specially created by memsetting its memory to zeroes. vcap_entry_set() then programs this to the TCAM and action RAM via the cache registers. The problem is that vcap_entry_set() is a dispatch function which looks at del_filter->block_id. But since del_filter is zeroized memory, the block_id is 0, or otherwise said, VCAP_ES0. So practically, what we do is delete the entry at the same TCAM index from VCAP ES0 instead of IS1 or IS2. The code was not always like this. vcap_entry_set() used to simply be is2_entry_set(), and then, the logic used to work. Restore the functionality by populating the block_id of the del_filter based on the VCAP block of the filter that we're deleting. This makes vcap_entry_set() know what to do. Fixes: 1397a2eb52e2 ("net: mscc: ocelot: create TCAM skeleton from tc filter chains") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_vcap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index c8701ac955a8f..2749df593ebcb 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -1250,7 +1250,11 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot, struct ocelot_vcap_filter del_filter; int i, index; + /* Need to inherit the block_id so that vcap_entry_set() + * does not get confused and knows where to install it. + */ memset(&del_filter, 0, sizeof(del_filter)); + del_filter.block_id = filter->block_id; /* Gets index of the filter */ index = ocelot_vcap_block_get_filter_index(block, filter); From 6741e11880003e35802d78cc58035057934f4dab Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 May 2022 02:55:01 +0300 Subject: [PATCH 233/491] net: mscc: ocelot: fix VCAP IS2 filters matching on both lookups The VCAP IS2 TCAM is looked up twice per packet, and each filter can be configured to only match during the first, second lookup, or both, or none. The blamed commit wrote the code for making VCAP IS2 filters match only on the given lookup. But right below that code, there was another line that explicitly made the lookup a "don't care", and this is overwriting the lookup we've selected. So the code had no effect. Some of the more noticeable effects of having filters match on both lookups: - in "tc -s filter show dev swp0 ingress", we see each packet matching a VCAP IS2 filter counted twice. This throws off scripts such as tools/testing/selftests/net/forwarding/tc_actions.sh and makes them fail. - a "tc-drop" action offloaded to VCAP IS2 needs a policer as well, because once the CPU port becomes a member of the destination port mask of a packet, nothing removes it, not even a PERMIT/DENY mask mode with a port mask of 0. But VCAP IS2 rules with the POLICE_ENA bit in the action vector can only appear in the first lookup. What happens when a filter matches both lookups is that the action vector is combined, and this makes the POLICE_ENA bit ineffective, since the last lookup in which it has appeared is the second one. In other words, "tc-drop" actions do not drop packets for the CPU port, dropped packets are still seen by software unless there was an FDB entry that directed those packets to some other place different from the CPU. The last bit used to work, because in the initial commit b596229448dd ("net: mscc: ocelot: Add support for tcam"), we were writing the FIRST field of the VCAP IS2 half key with a 1, not with a "don't care". The change to "don't care" was made inadvertently by me in commit c1c3993edb7c ("net: mscc: ocelot: generalize existing code for VCAP"), which I just realized, and which needs a separate fix from this one, for "stable" kernels that lack the commit blamed below. Fixes: 226e9cd82a96 ("net: mscc: ocelot: only install TCAM entries into a specific lookup and PAG") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_vcap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 2749df593ebcb..774cec3777037 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -374,7 +374,6 @@ static void is2_entry_set(struct ocelot *ocelot, int ix, OCELOT_VCAP_BIT_0); vcap_key_set(vcap, &data, VCAP_IS2_HK_IGR_PORT_MASK, 0, ~filter->ingress_port_mask); - vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_FIRST, OCELOT_VCAP_BIT_ANY); vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_HOST_MATCH, OCELOT_VCAP_BIT_ANY); vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L2_MC, filter->dmac_mc); From 477d2b91623e682e9a8126ea92acb8f684969cc7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 May 2022 02:55:02 +0300 Subject: [PATCH 234/491] net: mscc: ocelot: restrict tc-trap actions to VCAP IS2 lookup 0 Once the CPU port was added to the destination port mask of a packet, it can never be cleared, so even packets marked as dropped by the MASK_MODE of a VCAP IS2 filter will still reach it. This is why we need the OCELOT_POLICER_DISCARD to "kill dropped packets dead" and make software stop seeing them. We disallow policer rules from being put on any other chain than the one for the first lookup, but we don't do this for "drop" rules, although we should. This change is merely ascertaining that the rules dont't (completely) work and letting the user know. The blamed commit is the one that introduced the multi-chain architecture in ocelot. Prior to that, we should have always offloaded the filters to VCAP IS2 lookup 0, where they did work. Fixes: 1397a2eb52e2 ("net: mscc: ocelot: create TCAM skeleton from tc filter chains") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_flower.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index a9b26b3002be6..51cf241ff7d07 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -280,9 +280,10 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, filter->type = OCELOT_VCAP_FILTER_OFFLOAD; break; case FLOW_ACTION_TRAP: - if (filter->block_id != VCAP_IS2) { + if (filter->block_id != VCAP_IS2 || + filter->lookup != 0) { NL_SET_ERR_MSG_MOD(extack, - "Trap action can only be offloaded to VCAP IS2"); + "Trap action can only be offloaded to VCAP IS2 lookup 0"); return -EOPNOTSUPP; } if (filter->goto_target != -1) { From 93a8417088ea570b5721d2b526337a2d3aed9fa3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 May 2022 02:55:03 +0300 Subject: [PATCH 235/491] net: mscc: ocelot: avoid corrupting hardware counters when moving VCAP filters Given the following order of operations: (1) we add filter A using tc-flower (2) we send a packet that matches it (3) we read the filter's statistics to find a hit count of 1 (4) we add a second filter B with a higher preference than A, and A moves one position to the right to make room in the TCAM for it (5) we send another packet, and this matches the second filter B (6) we read the filter statistics again. When this happens, the hit count of filter A is 2 and of filter B is 1, despite a single packet having matched each filter. Furthermore, in an alternate history, reading the filter stats a second time between steps (3) and (4) makes the hit count of filter A remain at 1 after step (6), as expected. The reason why this happens has to do with the filter->stats.pkts field, which is written to hardware through the call path below: vcap_entry_set / | \ / | \ / | \ / | \ es0_entry_set is1_entry_set is2_entry_set \ | / \ | / \ | / vcap_data_set(data.counter, ...) The primary role of filter->stats.pkts is to transport the filter hit counters from the last readout all the way from vcap_entry_get() -> ocelot_vcap_filter_stats_update() -> ocelot_cls_flower_stats(). The reason why vcap_entry_set() writes it to hardware is so that the counters (saturating and having a limited bit width) are cleared after each user space readout. The writing of filter->stats.pkts to hardware during the TCAM entry movement procedure is an unintentional consequence of the code design, because the hit count isn't up to date at this point. So at step (4), when filter A is moved by ocelot_vcap_filter_add() to make room for filter B, the hardware hit count is 0 (no packet matched on it in the meantime), but filter->stats.pkts is 1, because the last readout saw the earlier packet. The movement procedure programs the old hit count back to hardware, so this creates the impression to user space that more packets have been matched than they really were. The bug can be seen when running the gact_drop_and_ok_test() from the tc_actions.sh selftest. Fix the issue by reading back the hit count to tmp->stats.pkts before migrating the VCAP filter. Sure, this is a best-effort technique, since the packets that hit the rule between vcap_entry_get() and vcap_entry_set() won't be counted, but at least it allows the counters to be reliably used for selftests where the traffic is under control. The vcap_entry_get() name is a bit unintuitive, but it only reads back the counter portion of the TCAM entry, not the entire entry. The index from which we retrieve the counter is also a bit unintuitive (i - 1 during add, i + 1 during del), but this is the way in which TCAM entry movement works. The "entry index" isn't a stored integer for a TCAM filter, instead it is dynamically computed by ocelot_vcap_block_get_filter_index() based on the entry's position in the &block->rules list. That position (as well as block->count) is automatically updated by ocelot_vcap_filter_add_to_block() on add, and by ocelot_vcap_block_remove_filter() on del. So "i" is the new filter index, and "i - 1" or "i + 1" respectively are the old addresses of that TCAM entry (we only support installing/deleting one filter at a time). Fixes: b596229448dd ("net: mscc: ocelot: Add support for tcam") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_vcap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 774cec3777037..eeb4cc07dd16f 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -1216,6 +1216,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, struct ocelot_vcap_filter *tmp; tmp = ocelot_vcap_block_find_filter_by_index(block, i); + /* Read back the filter's counters before moving it */ + vcap_entry_get(ocelot, i - 1, tmp); vcap_entry_set(ocelot, i, tmp); } @@ -1268,6 +1270,8 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot, struct ocelot_vcap_filter *tmp; tmp = ocelot_vcap_block_find_filter_by_index(block, i); + /* Read back the filter's counters before moving it */ + vcap_entry_get(ocelot, i + 1, tmp); vcap_entry_set(ocelot, i, tmp); } From 348c71344111d7a48892e3e52264ff11956fc196 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 5 May 2022 21:04:51 +0530 Subject: [PATCH 236/491] powerpc/papr_scm: Fix buffer overflow issue with CONFIG_FORTIFY_SOURCE With CONFIG_FORTIFY_SOURCE enabled, string functions will also perform dynamic checks for string size which can panic the kernel, like incase of overflow detection. In papr_scm, papr_scm_pmu_check_events function uses stat->stat_id with string operations, to populate the nvdimm_events_map array. Since stat_id variable is not NULL terminated, the kernel panics with CONFIG_FORTIFY_SOURCE enabled at boot time. Below are the logs of kernel panic: detected buffer overflow in __fortify_strlen ------------[ cut here ]------------ kernel BUG at lib/string_helpers.c:980! Oops: Exception in kernel mode, sig: 5 [#1] NIP [c00000000077dad0] fortify_panic+0x28/0x38 LR [c00000000077dacc] fortify_panic+0x24/0x38 Call Trace: [c0000022d77836e0] [c00000000077dacc] fortify_panic+0x24/0x38 (unreliable) [c00800000deb2660] papr_scm_pmu_check_events.constprop.0+0x118/0x220 [papr_scm] [c00800000deb2cb0] papr_scm_probe+0x288/0x62c [papr_scm] [c0000000009b46a8] platform_probe+0x98/0x150 Fix this issue by using kmemdup_nul() to copy the content of stat->stat_id directly to the nvdimm_events_map array. mpe: stat->stat_id comes from the hypervisor, not userspace, so there is no security exposure. Fixes: 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support") Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220505153451.35503-1-kjain@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f58728d5f10d2..39962c9055422 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -462,7 +462,6 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu { struct papr_scm_perf_stat *stat; struct papr_scm_perf_stats *stats; - char *statid; int index, rc, count; u32 available_events; @@ -493,14 +492,12 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu for (index = 0, stat = stats->scm_statistic, count = 0; index < available_events; index++, ++stat) { - statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL); - if (!statid) { + p->nvdimm_events_map[count] = kmemdup_nul(stat->stat_id, 8, GFP_KERNEL); + if (!p->nvdimm_events_map[count]) { rc = -ENOMEM; goto out_nvdimm_events_map; } - strcpy(statid, stat->stat_id); - p->nvdimm_events_map[count] = statid; count++; } p->nvdimm_events_map[count] = NULL; From 135332f34ba2662bc1e32b5c612e06a8cc41a053 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 4 May 2022 13:59:17 +0200 Subject: [PATCH 237/491] Revert "fbdev: Make fb_release() return -ENODEV if fbdev was unregistered" This reverts commit aafa025c76dcc7d1a8c8f0bdefcbe4eb480b2f6a. That commit attempted to fix a NULL pointer dereference, caused by the struct fb_info associated with a framebuffer device to not longer be valid when the file descriptor was closed. The issue was exposed by commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"), which added a new path that goes through the struct device removal instead of directly unregistering the fb. Most fbdev drivers have issues with the fb_info lifetime, because call to framebuffer_release() from their driver's .remove callback, rather than doing from fbops.fb_destroy callback. This meant that due to this switch, the fb_info was now destroyed too early, while references still existed, while before it was simply leaked. The patch we're reverting here reinstated that leak, hence "fixed" the regression. But the proper solution is to fix the drivers to not release the fb_info too soon. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220504115917.758787-1-javierm@redhat.com --- drivers/video/fbdev/core/fbmem.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 97eb0dee411cf..a6bb0e4382167 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1434,10 +1434,7 @@ fb_release(struct inode *inode, struct file *file) __acquires(&info->lock) __releases(&info->lock) { - struct fb_info * const info = file_fb_info(file); - - if (!info) - return -ENODEV; + struct fb_info * const info = file->private_data; lock_fb_info(info); if (info->fbops->fb_release) From 89bfd4017e58faaf70411555e7f508495114e90b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 6 May 2022 00:04:13 +0200 Subject: [PATCH 238/491] fbdev: Prevent possible use-after-free in fb_release() Most fbdev drivers have issues with the fb_info lifetime, because call to framebuffer_release() from their driver's .remove callback, rather than doing from fbops.fb_destroy callback. Doing that will destroy the fb_info too early, while references to it may still exist, leading to a use-after-free error. To prevent this, check the fb_info reference counter when attempting to kfree the data structure in framebuffer_release(). That will leak it but at least will prevent the mentioned error. Signed-off-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220505220413.365977-1-javierm@redhat.com --- drivers/video/fbdev/core/fbsysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c index 26892940c2136..82e31a2d845e1 100644 --- a/drivers/video/fbdev/core/fbsysfs.c +++ b/drivers/video/fbdev/core/fbsysfs.c @@ -80,6 +80,10 @@ void framebuffer_release(struct fb_info *info) { if (!info) return; + + if (WARN_ON(refcount_read(&info->count))) + return; + kfree(info->apertures); kfree(info); } From 666b90b3ce9e4aac1e1deba266c3a230fb3913b0 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 6 May 2022 00:04:56 +0200 Subject: [PATCH 239/491] fbdev: simplefb: Cleanup fb_info in .fb_destroy rather than .remove The driver is calling framebuffer_release() in its .remove callback, but this will cause the struct fb_info to be freed too early. Since it could be that a reference is still hold to it if user-space opened the fbdev. This would lead to a use-after-free error if the framebuffer device was unregistered but later a user-space process tries to close the fbdev fd. To prevent this, move the framebuffer_release() call to fb_ops.fb_destroy instead of doing it in the driver's .remove callback. Strictly speaking, the code flow in the driver is still wrong because all the hardware cleanupd (i.e: iounmap) should be done in .remove while the software cleanup (i.e: releasing the framebuffer) should be done in the .fb_destroy handler. But this at least makes to match the behavior before commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220505220456.366090-1-javierm@redhat.com --- drivers/video/fbdev/simplefb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c index 94fc9c6d04113..2c198561c338f 100644 --- a/drivers/video/fbdev/simplefb.c +++ b/drivers/video/fbdev/simplefb.c @@ -84,6 +84,10 @@ struct simplefb_par { static void simplefb_clocks_destroy(struct simplefb_par *par); static void simplefb_regulators_destroy(struct simplefb_par *par); +/* + * fb_ops.fb_destroy is called by the last put_fb_info() call at the end + * of unregister_framebuffer() or fb_release(). Do any cleanup here. + */ static void simplefb_destroy(struct fb_info *info) { struct simplefb_par *par = info->par; @@ -94,6 +98,8 @@ static void simplefb_destroy(struct fb_info *info) if (info->screen_base) iounmap(info->screen_base); + framebuffer_release(info); + if (mem) release_mem_region(mem->start, resource_size(mem)); } @@ -545,8 +551,8 @@ static int simplefb_remove(struct platform_device *pdev) { struct fb_info *info = platform_get_drvdata(pdev); + /* simplefb_destroy takes care of info cleanup */ unregister_framebuffer(info); - framebuffer_release(info); return 0; } From d258d00fb9c7c0cdf9d10c1ded84f10339d2d349 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 6 May 2022 00:05:40 +0200 Subject: [PATCH 240/491] fbdev: efifb: Cleanup fb_info in .fb_destroy rather than .remove The driver is calling framebuffer_release() in its .remove callback, but this will cause the struct fb_info to be freed too early. Since it could be that a reference is still hold to it if user-space opened the fbdev. This would lead to a use-after-free error if the framebuffer device was unregistered but later a user-space process tries to close the fbdev fd. To prevent this, move the framebuffer_release() call to fb_ops.fb_destroy instead of doing it in the driver's .remove callback. Strictly speaking, the code flow in the driver is still wrong because all the hardware cleanupd (i.e: iounmap) should be done in .remove while the software cleanup (i.e: releasing the framebuffer) should be done in the .fb_destroy handler. But this at least makes to match the behavior before commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220505220540.366218-1-javierm@redhat.com --- drivers/video/fbdev/efifb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index ea42ba6445b2d..cfa3dc0b4eeef 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -243,6 +243,10 @@ static void efifb_show_boot_graphics(struct fb_info *info) static inline void efifb_show_boot_graphics(struct fb_info *info) {} #endif +/* + * fb_ops.fb_destroy is called by the last put_fb_info() call at the end + * of unregister_framebuffer() or fb_release(). Do any cleanup here. + */ static void efifb_destroy(struct fb_info *info) { if (efifb_pci_dev) @@ -254,6 +258,9 @@ static void efifb_destroy(struct fb_info *info) else memunmap(info->screen_base); } + + framebuffer_release(info); + if (request_mem_succeeded) release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size); @@ -620,9 +627,9 @@ static int efifb_remove(struct platform_device *pdev) { struct fb_info *info = platform_get_drvdata(pdev); + /* efifb_destroy takes care of info cleanup */ unregister_framebuffer(info); sysfs_remove_groups(&pdev->dev.kobj, efifb_groups); - framebuffer_release(info); return 0; } From b3c9a924aab61adbc29df110006aa03afe1a78ba Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 6 May 2022 00:06:31 +0200 Subject: [PATCH 241/491] fbdev: vesafb: Cleanup fb_info in .fb_destroy rather than .remove The driver is calling framebuffer_release() in its .remove callback, but this will cause the struct fb_info to be freed too early. Since it could be that a reference is still hold to it if user-space opened the fbdev. This would lead to a use-after-free error if the framebuffer device was unregistered but later a user-space process tries to close the fbdev fd. To prevent this, move the framebuffer_release() call to fb_ops.fb_destroy instead of doing it in the driver's .remove callback. Strictly speaking, the code flow in the driver is still wrong because all the hardware cleanupd (i.e: iounmap) should be done in .remove while the software cleanup (i.e: releasing the framebuffer) should be done in the .fb_destroy handler. But this at least makes to match the behavior before commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220505220631.366371-1-javierm@redhat.com --- drivers/video/fbdev/vesafb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/vesafb.c b/drivers/video/fbdev/vesafb.c index df6de5a9dd4cd..e25e8de5ff672 100644 --- a/drivers/video/fbdev/vesafb.c +++ b/drivers/video/fbdev/vesafb.c @@ -179,6 +179,10 @@ static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green, return err; } +/* + * fb_ops.fb_destroy is called by the last put_fb_info() call at the end + * of unregister_framebuffer() or fb_release(). Do any cleanup here. + */ static void vesafb_destroy(struct fb_info *info) { struct vesafb_par *par = info->par; @@ -188,6 +192,8 @@ static void vesafb_destroy(struct fb_info *info) if (info->screen_base) iounmap(info->screen_base); release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size); + + framebuffer_release(info); } static struct fb_ops vesafb_ops = { @@ -484,10 +490,10 @@ static int vesafb_remove(struct platform_device *pdev) { struct fb_info *info = platform_get_drvdata(pdev); + /* vesafb_destroy takes care of info cleanup */ unregister_framebuffer(info); if (((struct vesafb_par *)(info->par))->region) release_region(0x3c0, 32); - framebuffer_release(info); return 0; } From 581dd69830341d299b0c097fc366097ab497d679 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thi=C3=A9baud=20Weksteen?= Date: Mon, 2 May 2022 10:49:52 +1000 Subject: [PATCH 242/491] firmware_loader: use kernel credentials when reading firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Device drivers may decide to not load firmware when probed to avoid slowing down the boot process should the firmware filesystem not be available yet. In this case, the firmware loading request may be done when a device file associated with the driver is first accessed. The credentials of the userspace process accessing the device file may be used to validate access to the firmware files requested by the driver. Ensure that the kernel assumes the responsibility of reading the firmware. This was observed on Android for a graphic driver loading their firmware when the device file (e.g. /dev/mali0) was first opened by userspace (i.e. surfaceflinger). The security context of surfaceflinger was used to validate the access to the firmware file (e.g. /vendor/firmware/mali.bin). Previously, Android configurations were not setting up the firmware_class.path command line argument and were relying on the userspace fallback mechanism. In this case, the security context of the userspace daemon (i.e. ueventd) was consistently used to read firmware files. More Android devices are now found to set firmware_class.path which gives the kernel the opportunity to read the firmware directly (via kernel_read_file_from_path_initns). In this scenario, the current process credentials were used, even if unrelated to the loading of the firmware file. Signed-off-by: Thiébaud Weksteen Cc: # 5.10 Reviewed-by: Paul Moore Acked-by: Luis Chamberlain Link: https://lore.kernel.org/r/20220502004952.3970800-1-tweek@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/main.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 94d1789a233e0..406a907a4caec 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -735,6 +735,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name, size_t offset, u32 opt_flags) { struct firmware *fw = NULL; + struct cred *kern_cred = NULL; + const struct cred *old_cred; bool nondirect = false; int ret; @@ -751,6 +753,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name, if (ret <= 0) /* error or already assigned */ goto out; + /* + * We are about to try to access the firmware file. Because we may have been + * called by a driver when serving an unrelated request from userland, we use + * the kernel credentials to read the file. + */ + kern_cred = prepare_kernel_cred(NULL); + if (!kern_cred) { + ret = -ENOMEM; + goto out; + } + old_cred = override_creds(kern_cred); + ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); /* Only full reads can support decompression, platform, and sysfs. */ @@ -776,6 +790,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name, } else ret = assign_fw(fw, device); + revert_creds(old_cred); + put_cred(kern_cred); + out: if (ret < 0) { fw_abort_batch_reqs(fw); From 8bf6e0e3c7de857b91a75aa57cecd56c10035bb2 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Wed, 27 Apr 2022 10:29:57 -0700 Subject: [PATCH 243/491] Documentation/process: Make groups alphabetical and use tabs consistently The list appears to be grouped by type (silicon, software, cloud) and mostly alphabetical within each group, with a few exceptions. Before adding to it, cleanup the list to be alphabetical within the groups, and use tabs consistently throughout the list. Signed-off-by: Darren Hart Link: https://lore.kernel.org/r/ec574b5d55584a3adda9bd31b7695193636ff136.1650995848.git.darren@os.amperecomputing.com Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 6f8f36e10e8ba..4f9efae02b6b2 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -244,10 +244,10 @@ disclosure of a particular issue, unless requested by a response team or by an involved disclosed party. The current ambassadors list: ============= ======================================================== - ARM Grant Likely AMD Tom Lendacky - IBM Z Christian Borntraeger - IBM Power Anton Blanchard + ARM Grant Likely + IBM Power Anton Blanchard + IBM Z Christian Borntraeger Intel Tony Luck Qualcomm Trilok Soni From 29ad05fd6760c55fa7ab0e1f96e5be1b66daa4fc Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Wed, 27 Apr 2022 10:29:58 -0700 Subject: [PATCH 244/491] Documentation/process: Add embargoed HW contact for Ampere Computing Add Darren Hart as Ampere Computing's ambassador for the embargoed hardware issues process. Signed-off-by: Darren Hart Link: https://lore.kernel.org/r/2e36a8e925bc958928b4afa189b2f876c392831b.1650995848.git.darren@os.amperecomputing.com Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 4f9efae02b6b2..98d7bc868f2a3 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -245,6 +245,7 @@ an involved disclosed party. The current ambassadors list: ============= ======================================================== AMD Tom Lendacky + Ampere Darren Hart ARM Grant Likely IBM Power Anton Blanchard IBM Z Christian Borntraeger From c25d7f32e3e209462cd82e6e93e66b72dbb2308f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 28 Apr 2022 22:05:00 -0500 Subject: [PATCH 245/491] platform/x86: thinkpad_acpi: Convert btusb DMI list to quirks DMI matching in thinkpad_acpi happens local to a function meaning quirks can only match that function. Future changes to thinkpad_acpi may need to quirk other code, so change this to use a quirk infrastructure. Signed-off-by: Mario Limonciello Tested-by: Mark Pearson Link: https://lore.kernel.org/r/20220429030501.1909-2-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index c568fae56db29..2820205c01fd1 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -309,6 +309,15 @@ struct ibm_init_struct { struct ibm_struct *data; }; +/* DMI Quirks */ +struct quirk_entry { + bool btusb_bug; +}; + +static struct quirk_entry quirk_btusb_bug = { + .btusb_bug = true, +}; + static struct { u32 bluetooth:1; u32 hotkey:1; @@ -338,6 +347,7 @@ static struct { u32 hotkey_poll_active:1; u32 has_adaptive_kbd:1; u32 kbd_lang:1; + struct quirk_entry *quirks; } tp_features; static struct { @@ -4359,9 +4369,10 @@ static void bluetooth_exit(void) bluetooth_shutdown(); } -static const struct dmi_system_id bt_fwbug_list[] __initconst = { +static const struct dmi_system_id fwbug_list[] __initconst = { { .ident = "ThinkPad E485", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20KU"), @@ -4369,6 +4380,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = { }, { .ident = "ThinkPad E585", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20KV"), @@ -4376,6 +4388,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = { }, { .ident = "ThinkPad A285 - 20MW", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20MW"), @@ -4383,6 +4396,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = { }, { .ident = "ThinkPad A285 - 20MX", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20MX"), @@ -4390,6 +4404,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = { }, { .ident = "ThinkPad A485 - 20MU", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20MU"), @@ -4397,6 +4412,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = { }, { .ident = "ThinkPad A485 - 20MV", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20MV"), @@ -4419,7 +4435,8 @@ static int __init have_bt_fwbug(void) * Some AMD based ThinkPads have a firmware bug that calling * "GBDC" will cause bluetooth on Intel wireless cards blocked */ - if (dmi_check_system(bt_fwbug_list) && pci_dev_present(fwbug_cards_ids)) { + if (tp_features.quirks && tp_features.quirks->btusb_bug && + pci_dev_present(fwbug_cards_ids)) { vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_RFKILL, FW_BUG "disable bluetooth subdriver for Intel cards\n"); return 1; @@ -11496,6 +11513,7 @@ static void thinkpad_acpi_module_exit(void) static int __init thinkpad_acpi_module_init(void) { + const struct dmi_system_id *dmi_id; int ret, i; tpacpi_lifecycle = TPACPI_LIFE_INIT; @@ -11535,6 +11553,10 @@ static int __init thinkpad_acpi_module_init(void) return -ENODEV; } + dmi_id = dmi_first_match(fwbug_list); + if (dmi_id) + tp_features.quirks = dmi_id->driver_data; + /* Device initialization */ tpacpi_pdev = platform_device_register_simple(TPACPI_DRVR_NAME, -1, NULL, 0); From 455cd867b85b53fd3602345f9b8a8facc551adc9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 28 Apr 2022 22:05:01 -0500 Subject: [PATCH 246/491] platform/x86: thinkpad_acpi: Add a s2idle resume quirk for a number of laptops Lenovo laptops that contain NVME SSDs across a variety of generations have trouble resuming from suspend to idle when the IOMMU translation layer is active for the NVME storage device. This generally manifests as a large resume delay or page faults. These delays and page faults occur as a result of a Lenovo BIOS specific SMI that runs during the D3->D0 transition on NVME devices. This SMI occurs because of a flag that is set during resume by Lenovo firmware: ``` OperationRegion (PM80, SystemMemory, 0xFED80380, 0x10) Field (PM80, AnyAcc, NoLock, Preserve) { SI3R, 1 } Method (_ON, 0, NotSerialized) // _ON_: Power On { TPST (0x60D0) If ((DAS3 == 0x00)) { If (SI3R) { TPST (0x60E0) M020 (NBRI, 0x00, 0x00, 0x04, (NCMD | 0x06)) M020 (NBRI, 0x00, 0x00, 0x10, NBAR) APMC = HDSI /* \HDSI */ SLPS = 0x01 SI3R = 0x00 TPST (0x60E1) } D0NV = 0x01 } } ``` Create a quirk that will run early in the resume process to prevent this SMI from running. As any of these machines are fixed, they can be peeled back from this quirk or narrowed down to individual firmware versions. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1910 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1689 Signed-off-by: Mario Limonciello Tested-by: Mark Pearson Link: https://lore.kernel.org/r/20220429030501.1909-3-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 126 +++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 2820205c01fd1..8180d7789f566 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -312,12 +312,17 @@ struct ibm_init_struct { /* DMI Quirks */ struct quirk_entry { bool btusb_bug; + u32 s2idle_bug_mmio; }; static struct quirk_entry quirk_btusb_bug = { .btusb_bug = true, }; +static struct quirk_entry quirk_s2idle_bug = { + .s2idle_bug_mmio = 0xfed80380, +}; + static struct { u32 bluetooth:1; u32 hotkey:1; @@ -4418,9 +4423,119 @@ static const struct dmi_system_id fwbug_list[] __initconst = { DMI_MATCH(DMI_BOARD_NAME, "20MV"), }, }, + { + .ident = "L14 Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20X5"), + } + }, + { + .ident = "T14s Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20XF"), + } + }, + { + .ident = "X13 Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20XH"), + } + }, + { + .ident = "T14 Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20XK"), + } + }, + { + .ident = "T14 Gen1 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20UD"), + } + }, + { + .ident = "T14 Gen1 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20UE"), + } + }, + { + .ident = "T14s Gen1 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20UH"), + } + }, + { + .ident = "P14s Gen1 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20Y1"), + } + }, + { + .ident = "P14s Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21A0"), + } + }, {} }; +#ifdef CONFIG_SUSPEND +/* + * Lenovo laptops from a variety of generations run a SMI handler during the D3->D0 + * transition that occurs specifically when exiting suspend to idle which can cause + * large delays during resume when the IOMMU translation layer is enabled (the default + * behavior) for NVME devices: + * + * To avoid this firmware problem, skip the SMI handler on these machines before the + * D0 transition occurs. + */ +static void thinkpad_acpi_amd_s2idle_restore(void) +{ + struct resource *res; + void __iomem *addr; + u8 val; + + res = request_mem_region_muxed(tp_features.quirks->s2idle_bug_mmio, 1, + "thinkpad_acpi_pm80"); + if (!res) + return; + + addr = ioremap(tp_features.quirks->s2idle_bug_mmio, 1); + if (!addr) + goto cleanup_resource; + + val = ioread8(addr); + iowrite8(val & ~BIT(0), addr); + + iounmap(addr); +cleanup_resource: + release_resource(res); +} + +static struct acpi_s2idle_dev_ops thinkpad_acpi_s2idle_dev_ops = { + .restore = thinkpad_acpi_amd_s2idle_restore, +}; +#endif + static const struct pci_device_id fwbug_cards_ids[] __initconst = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24F3) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24FD) }, @@ -11472,6 +11587,10 @@ static void thinkpad_acpi_module_exit(void) tpacpi_lifecycle = TPACPI_LIFE_EXITING; +#ifdef CONFIG_SUSPEND + if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio) + acpi_unregister_lps0_dev(&thinkpad_acpi_s2idle_dev_ops); +#endif if (tpacpi_hwmon) hwmon_device_unregister(tpacpi_hwmon); if (tp_features.sensors_pdrv_registered) @@ -11645,6 +11764,13 @@ static int __init thinkpad_acpi_module_init(void) tp_features.input_device_registered = 1; } +#ifdef CONFIG_SUSPEND + if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio) { + if (!acpi_register_lps0_dev(&thinkpad_acpi_s2idle_dev_ops)) + pr_info("Using s2idle quirk to avoid %s platform firmware bug\n", + (dmi_id && dmi_id->ident) ? dmi_id->ident : ""); + } +#endif return 0; } From aa2fef6f40e6ccc22e932b36898f260f0e5a021a Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 2 May 2022 15:12:00 -0400 Subject: [PATCH 247/491] platform/x86: thinkpad_acpi: Correct dual fan probe There was an issue with the dual fan probe whereby the probe was failing as it assuming that second_fan support was not available. Corrected the logic so the probe works correctly. Cleaned up so quirks only used if 2nd fan not detected. Tested on X1 Carbon 10 (2 fans), X1 Carbon 9 (2 fans) and T490 (1 fan) Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20220502191200.63470-1-markpearson@lenovo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 8180d7789f566..e6cb4a14cdd47 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8880,24 +8880,27 @@ static int __init fan_init(struct ibm_init_struct *iibm) fan_status_access_mode = TPACPI_FAN_RD_TPEC; if (quirks & TPACPI_FAN_Q1) fan_quirk1_setup(); - if (quirks & TPACPI_FAN_2FAN) { - tp_features.second_fan = 1; - pr_info("secondary fan support enabled\n"); - } - if (quirks & TPACPI_FAN_2CTL) { - tp_features.second_fan = 1; - tp_features.second_fan_ctl = 1; - pr_info("secondary fan control enabled\n"); - } /* Try and probe the 2nd fan */ + tp_features.second_fan = 1; /* needed for get_speed to work */ res = fan2_get_speed(&speed); if (res >= 0) { /* It responded - so let's assume it's there */ tp_features.second_fan = 1; tp_features.second_fan_ctl = 1; pr_info("secondary fan control detected & enabled\n"); + } else { + /* Fan not auto-detected */ + tp_features.second_fan = 0; + if (quirks & TPACPI_FAN_2FAN) { + tp_features.second_fan = 1; + pr_info("secondary fan support enabled\n"); + } + if (quirks & TPACPI_FAN_2CTL) { + tp_features.second_fan = 1; + tp_features.second_fan_ctl = 1; + pr_info("secondary fan control enabled\n"); + } } - } else { pr_err("ThinkPad ACPI EC access misbehaving, fan status and control unavailable\n"); return -ENODEV; From 2cdfa0c20d58da3757054797c2974c967035926a Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 29 Apr 2022 08:23:22 -0400 Subject: [PATCH 248/491] platform/x86/intel: Fix 'rmmod pmt_telemetry' panic 'rmmod pmt_telemetry' panics with: BUG: kernel NULL pointer dereference, address: 0000000000000040 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 1697 Comm: rmmod Tainted: G S W -------- --- 5.18.0-rc4 #3 Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-P DDR5 RVP, BIOS ADLPFWI1.R00.3056.B00.2201310233 01/31/2022 RIP: 0010:device_del+0x1b/0x3d0 Code: e8 1a d9 e9 ff e9 58 ff ff ff 48 8b 08 eb dc 0f 1f 44 00 00 41 56 41 55 41 54 55 48 8d af 80 00 00 00 53 48 89 fb 48 83 ec 18 <4c> 8b 67 40 48 89 ef 65 48 8b 04 25 28 00 00 00 48 89 44 24 10 31 RSP: 0018:ffffb520415cfd60 EFLAGS: 00010286 RAX: 0000000000000070 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000080 R08: ffffffffffffffff R09: ffffb520415cfd78 R10: 0000000000000002 R11: ffffb520415cfd78 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f7e198e5740(0000) GS:ffff905c9f700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000040 CR3: 000000010782a005 CR4: 0000000000770ee0 PKRU: 55555554 Call Trace: ? __xa_erase+0x53/0xb0 device_unregister+0x13/0x50 intel_pmt_dev_destroy+0x34/0x60 [pmt_class] pmt_telem_remove+0x40/0x50 [pmt_telemetry] auxiliary_bus_remove+0x18/0x30 device_release_driver_internal+0xc1/0x150 driver_detach+0x44/0x90 bus_remove_driver+0x74/0xd0 auxiliary_driver_unregister+0x12/0x20 pmt_telem_exit+0xc/0xe4a [pmt_telemetry] __x64_sys_delete_module+0x13a/0x250 ? syscall_trace_enter.isra.19+0x11e/0x1a0 do_syscall_64+0x58/0x80 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x67/0x80 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x67/0x80 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x67/0x80 ? exc_page_fault+0x64/0x140 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f7e1803a05b Code: 73 01 c3 48 8b 0d 2d 4e 38 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d fd 4d 38 00 f7 d8 64 89 01 48 The probe function, pmt_telem_probe(), adds an entry for devices even if they have not been initialized. This results in the array of initialized devices containing both initialized and uninitialized entries. This causes a panic in the remove function, pmt_telem_remove() which expects the array to only contain initialized entries. Only use an entry when a device is initialized. Cc: "David E. Box" Cc: Hans de Goede Cc: Mark Gross Cc: platform-driver-x86@vger.kernel.org Signed-off-by: David Arcari Signed-off-by: Prarit Bhargava Reviewed-by: David E. Box Link: https://lore.kernel.org/r/20220429122322.2550003-1-prarit@redhat.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmt/telemetry.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmt/telemetry.c b/drivers/platform/x86/intel/pmt/telemetry.c index 6b6f3e2a617af..f73ecfd4a3092 100644 --- a/drivers/platform/x86/intel/pmt/telemetry.c +++ b/drivers/platform/x86/intel/pmt/telemetry.c @@ -103,7 +103,7 @@ static int pmt_telem_probe(struct auxiliary_device *auxdev, const struct auxilia auxiliary_set_drvdata(auxdev, priv); for (i = 0; i < intel_vsec_dev->num_resources; i++) { - struct intel_pmt_entry *entry = &priv->entry[i]; + struct intel_pmt_entry *entry = &priv->entry[priv->num_entries]; ret = intel_pmt_dev_create(entry, &pmt_telem_ns, intel_vsec_dev, i); if (ret < 0) From ed13d4ac57474d959c40fd05d8860e2b1607becb Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 29 Apr 2022 20:00:49 +0200 Subject: [PATCH 249/491] platform/surface: gpe: Add support for Surface Pro 8 The new Surface Pro 8 uses GPEs for lid events as well. Add an entry for that so that the lid can be used to wake the device. Note that this is a device with a keyboard type-cover, where this acts as the "lid". Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220429180049.1282447-1-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- drivers/platform/surface/surface_gpe.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/surface/surface_gpe.c b/drivers/platform/surface/surface_gpe.c index c1775db29efb6..ec66fde28e75a 100644 --- a/drivers/platform/surface/surface_gpe.c +++ b/drivers/platform/surface/surface_gpe.c @@ -99,6 +99,14 @@ static const struct dmi_system_id dmi_lid_device_table[] = { }, .driver_data = (void *)lid_device_props_l4D, }, + { + .ident = "Surface Pro 8", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 8"), + }, + .driver_data = (void *)lid_device_props_l4B, + }, { .ident = "Surface Book 1", .matches = { From 44acfc22c7d055d9c4f8f0974ee28422405b971a Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 29 Apr 2022 21:57:38 +0200 Subject: [PATCH 250/491] platform/surface: aggregator: Fix initialization order when compiling as builtin module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building the Surface Aggregator Module (SAM) core, registry, and other SAM client drivers as builtin modules (=y), proper initialization order is not guaranteed. Due to this, client driver registration (triggered by device registration in the registry) races against bus initialization in the core. If any attempt is made at registering the device driver before the bus has been initialized (i.e. if bus initialization fails this race) driver registration will fail with a message similar to: Driver surface_battery was unable to register with bus_type surface_aggregator because the bus was not initialized Switch from module_init() to subsys_initcall() to resolve this issue. Note that the serdev subsystem uses postcore_initcall() so we are still able to safely register the serdev device driver for the core. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Reported-by: Blaž Hrastnik Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220429195738.535751-1-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/surface/aggregator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c index d384d36098c27..a62c5dfe42d64 100644 --- a/drivers/platform/surface/aggregator/core.c +++ b/drivers/platform/surface/aggregator/core.c @@ -817,7 +817,7 @@ static int __init ssam_core_init(void) err_bus: return status; } -module_init(ssam_core_init); +subsys_initcall(ssam_core_init); static void __exit ssam_core_exit(void) { From aa482ddca85a3485be0e7b83a0789dc4d987670b Mon Sep 17 00:00:00 2001 From: Richard Gong Date: Fri, 8 Apr 2022 12:08:38 -0500 Subject: [PATCH 251/491] drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems Active State Power Management (ASPM) feature is enabled since kernel 5.14. There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200 and RX640, that do not work with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as video/display output, Intel Alder Lake based systems will freeze after suspend/resume. The issue was originally reported on one system (Dell Precision 3660 with BIOS version 0.14.81), but was later confirmed to affect at least 4 pre-production Alder Lake based systems. Add an extra check to disable ASPM on Intel Alder Lake based systems with the problematic AMD Volcanic Islands GFX cards. Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885 Signed-off-by: Richard Gong Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 039b90cdc3bca..45f0188c42739 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -81,6 +81,10 @@ #include "mxgpu_vi.h" #include "amdgpu_dm.h" +#if IS_ENABLED(CONFIG_X86) +#include +#endif + #define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6 #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev) WREG32_PCIE(ixPCIE_LC_CNTL, data); } +static bool aspm_support_quirk_check(void) +{ +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); + + return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE); +#else + return true; +#endif +} + static void vi_program_aspm(struct amdgpu_device *adev) { u32 data, data1, orig; bool bL1SS = false; bool bClkReqSupport = true; - if (!amdgpu_device_should_use_aspm(adev)) + if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check()) return; if (adev->flags & AMD_IS_APU || From 9b9bd3f640640f94272a461b2dfe558f91b322c5 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Sat, 19 Mar 2022 16:34:24 -0400 Subject: [PATCH 252/491] drm/amd/display: undo clearing of z10 related function pointers [Why] Z10 and S0i3 have some shared path. Previous code clean up , incorrectly removed these pointers, which breaks s0i3 restore [How] Do not clear the function pointers based on Z10 disable. Reviewed-by: Nicholas Kazlauskas Acked-by: Pavle Kotarac Signed-off-by: Eric Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index d7559e5a99ce8..e708f07fe75af 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -153,9 +153,4 @@ void dcn31_hw_sequencer_construct(struct dc *dc) dc->hwss.init_hw = dcn20_fpga_init_hw; dc->hwseq->funcs.init_pipes = NULL; } - if (dc->debug.disable_z10) { - /*hw not support z10 or sw disable it*/ - dc->hwss.z10_restore = NULL; - dc->hwss.z10_save_init = NULL; - } } From dba785798526a3282cc4d0f0ea751883715dbbb4 Mon Sep 17 00:00:00 2001 From: Puyou Lu Date: Fri, 6 May 2022 16:06:30 +0800 Subject: [PATCH 253/491] gpio: pca953x: fix irq_stat not updated when irq is disabled (irq_mask not set) When one port's input state get inverted (eg. from low to hight) after pca953x_irq_setup but before setting irq_mask (by some other driver such as "gpio-keys"), the next inversion of this port (eg. from hight to low) will not be triggered any more (because irq_stat is not updated at the first time). Issue should be fixed after this commit. Fixes: 89ea8bbe9c3e ("gpio: pca953x.c: add interrupt handling capability") Signed-off-by: Puyou Lu Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index d2fe76f3f34fd..8726921a11294 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -762,11 +762,11 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin bitmap_xor(cur_stat, new_stat, old_stat, gc->ngpio); bitmap_and(trigger, cur_stat, chip->irq_mask, gc->ngpio); + bitmap_copy(chip->irq_stat, new_stat, gc->ngpio); + if (bitmap_empty(trigger, gc->ngpio)) return false; - bitmap_copy(chip->irq_stat, new_stat, gc->ngpio); - bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio); bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio); bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio); From 0c2c7c069285374fc8feacddc0498f8ab7627117 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Mon, 2 May 2022 09:58:07 -0700 Subject: [PATCH 254/491] KVM: SEV: Mark nested locking of vcpu->lock svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all source and target vcpu->locks. Unfortunately there is an 8 subclass limit, so a new subclass cannot be used for each vCPU. Instead maintain ownership of the first vcpu's mutex.dep_map using a role specific subclass: source vs target. Release the other vcpu's mutex.dep_maps. Fixes: b56639318bb2b ("KVM: SEV: Add support for SEV intra host migration") Reported-by: John Sperbeck Suggested-by: David Rientjes Suggested-by: Sean Christopherson Suggested-by: Paolo Bonzini Cc: Hillf Danton Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Gonda Message-Id: <20220502165807.529624-1-pgonda@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0ad70c12c7c31..7c392873626fd 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1594,24 +1594,51 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) atomic_set_release(&src_sev->migration_in_progress, 0); } +/* vCPU mutex subclasses. */ +enum sev_migration_role { + SEV_MIGRATION_SOURCE = 0, + SEV_MIGRATION_TARGET, + SEV_NR_MIGRATION_ROLES, +}; -static int sev_lock_vcpus_for_migration(struct kvm *kvm) +static int sev_lock_vcpus_for_migration(struct kvm *kvm, + enum sev_migration_role role) { struct kvm_vcpu *vcpu; unsigned long i, j; + bool first = true; kvm_for_each_vcpu(i, vcpu, kvm) { - if (mutex_lock_killable(&vcpu->mutex)) + if (mutex_lock_killable_nested(&vcpu->mutex, role)) goto out_unlock; + + if (first) { + /* + * Reset the role to one that avoids colliding with + * the role used for the first vcpu mutex. + */ + role = SEV_NR_MIGRATION_ROLES; + first = false; + } else { + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); + } } return 0; out_unlock: + + first = true; kvm_for_each_vcpu(j, vcpu, kvm) { if (i == j) break; + if (first) + first = false; + else + mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); + + mutex_unlock(&vcpu->mutex); } return -EINTR; @@ -1621,8 +1648,15 @@ static void sev_unlock_vcpus_for_migration(struct kvm *kvm) { struct kvm_vcpu *vcpu; unsigned long i; + bool first = true; kvm_for_each_vcpu(i, vcpu, kvm) { + if (first) + first = false; + else + mutex_acquire(&vcpu->mutex.dep_map, + SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); + mutex_unlock(&vcpu->mutex); } } @@ -1748,10 +1782,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) charged = true; } - ret = sev_lock_vcpus_for_migration(kvm); + ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE); if (ret) goto out_dst_cgroup; - ret = sev_lock_vcpus_for_migration(source_kvm); + ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET); if (ret) goto out_dst_vcpu; From 053d2290c0307e3642e75e0185ddadf084dc36c1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 2 May 2022 22:18:50 +0000 Subject: [PATCH 255/491] KVM: VMX: Exit to userspace if vCPU has injected exception and invalid state Exit to userspace with an emulation error if KVM encounters an injected exception with invalid guest state, in addition to the existing check of bailing if there's a pending exception (KVM doesn't support emulating exceptions except when emulating real mode via vm86). In theory, KVM should never get to such a situation as KVM is supposed to exit to userspace before injecting an exception with invalid guest state. But in practice, userspace can intervene and manually inject an exception and/or stuff registers to force invalid guest state while a previously injected exception is awaiting reinjection. Fixes: fc4fad79fc3d ("KVM: VMX: Reject KVM_RUN if emulation is required with pending exception") Reported-by: syzbot+cfafed3bb76d3e37581b@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Message-Id: <20220502221850.131873-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d58b763df855f..610355b9ccceb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5472,7 +5472,7 @@ static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); return vmx->emulation_required && !vmx->rmode.vm86_active && - vcpu->arch.exception.pending; + (vcpu->arch.exception.pending || vcpu->arch.exception.injected); } static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) From 486b9eee57ddca5c9a2d59fc41153f36002e0a00 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 23 Apr 2022 12:20:21 +0200 Subject: [PATCH 256/491] ice: Fix race during aux device (un)plugging Function ice_plug_aux_dev() assigns pf->adev field too early prior aux device initialization and on other side ice_unplug_aux_dev() starts aux device deinit and at the end assigns NULL to pf->adev. This is wrong because pf->adev should always be non-NULL only when aux device is fully initialized and ready. This wrong order causes a crash when ice_send_event_to_aux() call occurs because that function depends on non-NULL value of pf->adev and does not assume that aux device is half-initialized or half-destroyed. After order correction the race window is tiny but it is still there, as Leon mentioned and manipulation with pf->adev needs to be protected by mutex. Fix (un-)plugging functions so pf->adev field is set after aux device init and prior aux device destroy and protect pf->adev assignment by new mutex. This mutex is also held during ice_send_event_to_aux() call to ensure that aux device is valid during that call. Note that device lock used ice_send_event_to_aux() needs to be kept to avoid race with aux drv unload. Reproducer: cycle=1 while :;do echo "#### Cycle: $cycle" ip link set ens7f0 mtu 9000 ip link add bond0 type bond mode 1 miimon 100 ip link set bond0 up ifenslave bond0 ens7f0 ip link set bond0 mtu 9000 ethtool -L ens7f0 combined 1 ip link del bond0 ip link set ens7f0 mtu 1500 sleep 1 let cycle++ done In short when the device is added/removed to/from bond the aux device is unplugged/plugged. When MTU of the device is changed an event is sent to aux device asynchronously. This can race with (un)plugging operation and because pf->adev is set too early (plug) or too late (unplug) the function ice_send_event_to_aux() can touch uninitialized or destroyed fields. In the case of crash below pf->adev->dev.mutex. Crash: [ 53.372066] bond0: (slave ens7f0): making interface the new active one [ 53.378622] bond0: (slave ens7f0): Enslaving as an active interface with an u p link [ 53.386294] IPv6: ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready [ 53.549104] bond0: (slave ens7f1): Enslaving as a backup interface with an up link [ 54.118906] ice 0000:ca:00.0 ens7f0: Number of in use tx queues changed inval idating tc mappings. Priority traffic classification disabled! [ 54.233374] ice 0000:ca:00.1 ens7f1: Number of in use tx queues changed inval idating tc mappings. Priority traffic classification disabled! [ 54.248204] bond0: (slave ens7f0): Releasing backup interface [ 54.253955] bond0: (slave ens7f1): making interface the new active one [ 54.274875] bond0: (slave ens7f1): Releasing backup interface [ 54.289153] bond0 (unregistering): Released all slaves [ 55.383179] MII link monitoring set to 100 ms [ 55.398696] bond0: (slave ens7f0): making interface the new active one [ 55.405241] BUG: kernel NULL pointer dereference, address: 0000000000000080 [ 55.405289] bond0: (slave ens7f0): Enslaving as an active interface with an u p link [ 55.412198] #PF: supervisor write access in kernel mode [ 55.412200] #PF: error_code(0x0002) - not-present page [ 55.412201] PGD 25d2ad067 P4D 0 [ 55.412204] Oops: 0002 [#1] PREEMPT SMP NOPTI [ 55.412207] CPU: 0 PID: 403 Comm: kworker/0:2 Kdump: loaded Tainted: G S 5.17.0-13579-g57f2d6540f03 #1 [ 55.429094] bond0: (slave ens7f1): Enslaving as a backup interface with an up link [ 55.430224] Hardware name: Dell Inc. PowerEdge R750/06V45N, BIOS 1.4.4 10/07/ 2021 [ 55.430226] Workqueue: ice ice_service_task [ice] [ 55.468169] RIP: 0010:mutex_unlock+0x10/0x20 [ 55.472439] Code: 0f b1 13 74 96 eb e0 4c 89 ee eb d8 e8 79 54 ff ff 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 65 48 8b 04 25 40 ef 01 00 31 d2 48 0f b1 17 75 01 c3 e9 e3 fe ff ff 0f 1f 00 0f 1f 44 00 00 48 [ 55.491186] RSP: 0018:ff4454230d7d7e28 EFLAGS: 00010246 [ 55.496413] RAX: ff1a79b208b08000 RBX: ff1a79b2182e8880 RCX: 0000000000000001 [ 55.503545] RDX: 0000000000000000 RSI: ff4454230d7d7db0 RDI: 0000000000000080 [ 55.510678] RBP: ff1a79d1c7e48b68 R08: ff4454230d7d7db0 R09: 0000000000000041 [ 55.517812] R10: 00000000000000a5 R11: 00000000000006e6 R12: ff1a79d1c7e48bc0 [ 55.524945] R13: 0000000000000000 R14: ff1a79d0ffc305c0 R15: 0000000000000000 [ 55.532076] FS: 0000000000000000(0000) GS:ff1a79d0ffc00000(0000) knlGS:0000000000000000 [ 55.540163] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 55.545908] CR2: 0000000000000080 CR3: 00000003487ae003 CR4: 0000000000771ef0 [ 55.553041] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 55.560173] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 55.567305] PKRU: 55555554 [ 55.570018] Call Trace: [ 55.572474] [ 55.574579] ice_service_task+0xaab/0xef0 [ice] [ 55.579130] process_one_work+0x1c5/0x390 [ 55.583141] ? process_one_work+0x390/0x390 [ 55.587326] worker_thread+0x30/0x360 [ 55.590994] ? process_one_work+0x390/0x390 [ 55.595180] kthread+0xe6/0x110 [ 55.598325] ? kthread_complete_and_exit+0x20/0x20 [ 55.603116] ret_from_fork+0x1f/0x30 [ 55.606698] Fixes: f9f5301e7e2d ("ice: Register auxiliary device to provide RDMA") Reviewed-by: Leon Romanovsky Signed-off-by: Ivan Vecera Reviewed-by: Dave Ertman Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_idc.c | 25 +++++++++++++++-------- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 8ed3c9ab7ff72..a895e3a8e988c 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -540,6 +540,7 @@ struct ice_pf { struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */ struct mutex sw_mutex; /* lock for protecting VSI alloc flow */ struct mutex tc_mutex; /* lock to protect TC changes */ + struct mutex adev_mutex; /* lock to protect aux device access */ u32 msg_enable; struct ice_ptp ptp; struct tty_driver *ice_gnss_tty_driver; diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 25a436d342c29..3e3b2ed4cd5d9 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -37,14 +37,17 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event) if (WARN_ON_ONCE(!in_task())) return; + mutex_lock(&pf->adev_mutex); if (!pf->adev) - return; + goto finish; device_lock(&pf->adev->dev); iadrv = ice_get_auxiliary_drv(pf); if (iadrv && iadrv->event_handler) iadrv->event_handler(pf, event); device_unlock(&pf->adev->dev); +finish: + mutex_unlock(&pf->adev_mutex); } /** @@ -290,7 +293,6 @@ int ice_plug_aux_dev(struct ice_pf *pf) return -ENOMEM; adev = &iadev->adev; - pf->adev = adev; iadev->pf = pf; adev->id = pf->aux_idx; @@ -300,18 +302,20 @@ int ice_plug_aux_dev(struct ice_pf *pf) ret = auxiliary_device_init(adev); if (ret) { - pf->adev = NULL; kfree(iadev); return ret; } ret = auxiliary_device_add(adev); if (ret) { - pf->adev = NULL; auxiliary_device_uninit(adev); return ret; } + mutex_lock(&pf->adev_mutex); + pf->adev = adev; + mutex_unlock(&pf->adev_mutex); + return 0; } @@ -320,12 +324,17 @@ int ice_plug_aux_dev(struct ice_pf *pf) */ void ice_unplug_aux_dev(struct ice_pf *pf) { - if (!pf->adev) - return; + struct auxiliary_device *adev; - auxiliary_device_delete(pf->adev); - auxiliary_device_uninit(pf->adev); + mutex_lock(&pf->adev_mutex); + adev = pf->adev; pf->adev = NULL; + mutex_unlock(&pf->adev_mutex); + + if (adev) { + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); + } } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 9a0a358a15c25..949669fed7d60 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3769,6 +3769,7 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf) static void ice_deinit_pf(struct ice_pf *pf) { ice_service_task_stop(pf); + mutex_destroy(&pf->adev_mutex); mutex_destroy(&pf->sw_mutex); mutex_destroy(&pf->tc_mutex); mutex_destroy(&pf->avail_q_mutex); @@ -3847,6 +3848,7 @@ static int ice_init_pf(struct ice_pf *pf) mutex_init(&pf->sw_mutex); mutex_init(&pf->tc_mutex); + mutex_init(&pf->adev_mutex); INIT_HLIST_HEAD(&pf->aq_wait_list); spin_lock_init(&pf->aq_wait_lock); From 6096dae926a22e2892ef9169f582589c16d39639 Mon Sep 17 00:00:00 2001 From: Anatolii Gerasymenko Date: Thu, 28 Apr 2022 12:01:00 +0000 Subject: [PATCH 257/491] ice: clear stale Tx queue settings before configuring The iAVF driver uses 3 virtchnl op codes to communicate with the PF regarding the VF Tx queues: * VIRTCHNL_OP_CONFIG_VSI_QUEUES configures the hardware and firmware logic for the Tx queues * VIRTCHNL_OP_ENABLE_QUEUES configures the queue interrupts * VIRTCHNL_OP_DISABLE_QUEUES disables the queue interrupts and Tx rings. There is a bug in the iAVF driver due to the race condition between VF reset request and shutdown being executed in parallel. This leads to a break in logic and VIRTCHNL_OP_DISABLE_QUEUES is not being sent. If this occurs, the PF driver never cleans up the Tx queues. This results in leaving behind stale Tx queue settings in the hardware and firmware. The most obvious outcome is that upon the next VIRTCHNL_OP_CONFIG_VSI_QUEUES, the PF will fail to program the Tx scheduler node due to a lack of space. We need to protect ICE driver against such situation. To fix this, make sure we clear existing stale settings out when handling VIRTCHNL_OP_CONFIG_VSI_QUEUES. This ensures we remove the previous settings. Calling ice_vf_vsi_dis_single_txq should be safe as it will do nothing if the queue is not configured. The function already handles the case when the Tx queue is not currently configured and exits with a 0 return in that case. Fixes: 7ad15440acf8 ("ice: Refactor VIRTCHNL_OP_CONFIG_VSI_QUEUES handling") Signed-off-by: Jacob Keller Signed-off-by: Anatolii Gerasymenko Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 68 ++++++++++++++----- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index b72606c9e6d03..2889e050a4c93 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -1307,13 +1307,52 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) NULL, 0); } +/** + * ice_vf_vsi_dis_single_txq - disable a single Tx queue + * @vf: VF to disable queue for + * @vsi: VSI for the VF + * @q_id: VF relative (0-based) queue ID + * + * Attempt to disable the Tx queue passed in. If the Tx queue was successfully + * disabled then clear q_id bit in the enabled queues bitmap and return + * success. Otherwise return error. + */ +static int +ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id) +{ + struct ice_txq_meta txq_meta = { 0 }; + struct ice_tx_ring *ring; + int err; + + if (!test_bit(q_id, vf->txq_ena)) + dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", + q_id, vsi->vsi_num); + + ring = vsi->tx_rings[q_id]; + if (!ring) + return -EINVAL; + + ice_fill_txq_meta(vsi, ring, &txq_meta); + + err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta); + if (err) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", + q_id, vsi->vsi_num); + return err; + } + + /* Clear enabled queues flag */ + clear_bit(q_id, vf->txq_ena); + + return 0; +} + /** * ice_vc_dis_qs_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer * - * called from the VF to disable all or specific - * queue(s) + * called from the VF to disable all or specific queue(s) */ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) { @@ -1350,30 +1389,15 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - struct ice_tx_ring *ring = vsi->tx_rings[vf_q_id]; - struct ice_txq_meta txq_meta = { 0 }; - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (!test_bit(vf_q_id, vf->txq_ena)) - dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", - vf_q_id, vsi->vsi_num); - - ice_fill_txq_meta(vsi, ring, &txq_meta); - - if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, - ring, &txq_meta)) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", - vf_q_id, vsi->vsi_num); + if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - - /* Clear enabled queues flag */ - clear_bit(vf_q_id, vf->txq_ena); } } @@ -1622,6 +1646,14 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) if (qpi->txq.ring_len > 0) { vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr; vsi->tx_rings[i]->count = qpi->txq.ring_len; + + /* Disable any existing queue first */ + if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Configure a queue with the requested settings */ if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; From a11b6c1a383ff092f432e040c20e032503785d47 Mon Sep 17 00:00:00 2001 From: Michal Michalik Date: Wed, 20 Apr 2022 14:23:02 +0200 Subject: [PATCH 258/491] ice: fix PTP stale Tx timestamps cleanup Read stale PTP Tx timestamps from PHY on cleanup. After running out of Tx timestamps request handlers, hardware (HW) stops reporting finished requests. Function ice_ptp_tx_tstamp_cleanup() used to only clean up stale handlers in driver and was leaving the hardware registers not read. Not reading stale PTP Tx timestamps prevents next interrupts from arriving and makes timestamping unusable. Fixes: ea9b847cda64 ("ice: enable transmit timestamps for E810 devices") Signed-off-by: Michal Michalik Reviewed-by: Jacob Keller Reviewed-by: Paul Menzel Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index a1cd33273ca49..da025c204577f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2287,6 +2287,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) /** * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped + * @hw: pointer to the hw struct * @tx: PTP Tx tracker to clean up * * Loop through the Tx timestamp requests and see if any of them have been @@ -2295,7 +2296,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) * timestamp will never be captured. This might happen if the packet gets * discarded before it reaches the PHY timestamping block. */ -static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx) +static void ice_ptp_tx_tstamp_cleanup(struct ice_hw *hw, struct ice_ptp_tx *tx) { u8 idx; @@ -2304,11 +2305,16 @@ static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx) for_each_set_bit(idx, tx->in_use, tx->len) { struct sk_buff *skb; + u64 raw_tstamp; /* Check if this SKB has been waiting for too long */ if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ)) continue; + /* Read tstamp to be able to use this register again */ + ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset, + &raw_tstamp); + spin_lock(&tx->lock); skb = tx->tstamps[idx].skb; tx->tstamps[idx].skb = NULL; @@ -2330,7 +2336,7 @@ static void ice_ptp_periodic_work(struct kthread_work *work) ice_ptp_update_cached_phctime(pf); - ice_ptp_tx_tstamp_cleanup(&pf->ptp.port.tx); + ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx); /* Run twice a second */ kthread_queue_delayed_work(ptp->kworker, &ptp->work, From 9e6c6d17d1d6a3f1515ce399f9a011629ec79aa0 Mon Sep 17 00:00:00 2001 From: Lokesh Dhoundiyal Date: Thu, 5 May 2022 14:00:17 +1200 Subject: [PATCH 259/491] ipv4: drop dst in multicast routing path kmemleak reports the following when routing multicast traffic over an ipsec tunnel. Kmemleak output: unreferenced object 0x8000000044bebb00 (size 256): comm "softirq", pid 0, jiffies 4294985356 (age 126.810s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 80 00 00 00 05 13 74 80 ..............t. 80 00 00 00 04 9b bf f9 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000f83947e0>] __kmalloc+0x1e8/0x300 [<00000000b7ed8dca>] metadata_dst_alloc+0x24/0x58 [<0000000081d32c20>] __ipgre_rcv+0x100/0x2b8 [<00000000824f6cf1>] gre_rcv+0x178/0x540 [<00000000ccd4e162>] gre_rcv+0x7c/0xd8 [<00000000c024b148>] ip_protocol_deliver_rcu+0x124/0x350 [<000000006a483377>] ip_local_deliver_finish+0x54/0x68 [<00000000d9271b3a>] ip_local_deliver+0x128/0x168 [<00000000bd4968ae>] xfrm_trans_reinject+0xb8/0xf8 [<0000000071672a19>] tasklet_action_common.isra.16+0xc4/0x1b0 [<0000000062e9c336>] __do_softirq+0x1fc/0x3e0 [<00000000013d7914>] irq_exit+0xc4/0xe0 [<00000000a4d73e90>] plat_irq_dispatch+0x7c/0x108 [<000000000751eb8e>] handle_int+0x16c/0x178 [<000000001668023b>] _raw_spin_unlock_irqrestore+0x1c/0x28 The metadata dst is leaked when ip_route_input_mc() updates the dst for the skb. Commit f38a9eb1f77b ("dst: Metadata destinations") correctly handled dropping the dst in ip_route_input_slow() but missed the multicast case which is handled by ip_route_input_mc(). Drop the dst in ip_route_input_mc() avoiding the leak. Fixes: f38a9eb1f77b ("dst: Metadata destinations") Signed-off-by: Lokesh Dhoundiyal Signed-off-by: Chris Packham Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220505020017.3111846-1-chris.packham@alliedtelesis.co.nz Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98c6f34295931..57abd27e842cf 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1753,6 +1753,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif RT_CACHE_STAT_INC(in_slow_mc); + skb_dst_drop(skb); skb_dst_set(skb, &rth->dst); return 0; } From 87fd2b091fb33871a7f812658a0971e8e26f903f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 5 Apr 2022 15:21:34 +0100 Subject: [PATCH 260/491] drm/nouveau/tegra: Stop using iommu_present() Even if some IOMMU has registered itself on the platform "bus", that doesn't necessarily mean it provides translation for the device we care about. Replace iommu_present() with a more appropriate check. Signed-off-by: Robin Murphy Reviewed-by: Lyude Paul [added cc for stable] Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org # v5.0+ Link: https://patchwork.freedesktop.org/patch/msgid/70d40ea441da3663c2824d54102b471e9a621f8a.1649168494.git.robin.murphy@arm.com --- drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 992cc285f2fec..2ed528c065fae 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -123,7 +123,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) mutex_init(&tdev->iommu.mutex); - if (iommu_present(&platform_bus_type)) { + if (device_iommu_mapped(dev)) { tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type); if (!tdev->iommu.domain) goto error; From ab244be47a8f111bc82496a8a20c907236e37f95 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 9 Feb 2022 07:03:11 +0100 Subject: [PATCH 261/491] drm/nouveau: Fix a potential theorical leak in nouveau_get_backlight_name() If successful ida_simple_get() calls are not undone when needed, some additional memory may be allocated and wasted. Here, an ID between 0 and MAX_INT is required. If this ID is >=100, it is not taken into account and is wasted. It should be released. Instead of calling ida_simple_remove(), take advantage of the 'max' parameter to require the ID not to be too big. Should it be too big, it is not allocated and don't need to be freed. While at it, use ida_alloc_xxx()/ida_free() instead to ida_simple_get()/ida_simple_remove(). The latter is deprecated and more verbose. Fixes: db1a0ae21461 ("drm/nouveau/bl: Assign different names to interfaces") Signed-off-by: Christophe JAILLET Reviewed-by: Lyude Paul [Fixed formatting warning from checkpatch] Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/9ba85bca59df6813dc029e743a836451d5173221.1644386541.git.christophe.jaillet@wanadoo.fr --- drivers/gpu/drm/nouveau/nouveau_backlight.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index daf9f87477ba1..a2141d3d9b1d2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -46,8 +46,9 @@ static bool nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], struct nouveau_backlight *bl) { - const int nb = ida_simple_get(&bl_ida, 0, 0, GFP_KERNEL); - if (nb < 0 || nb >= 100) + const int nb = ida_alloc_max(&bl_ida, 99, GFP_KERNEL); + + if (nb < 0) return false; if (nb > 0) snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb); @@ -414,7 +415,7 @@ nouveau_backlight_init(struct drm_connector *connector) nv_encoder, ops, &props); if (IS_ERR(bl->dev)) { if (bl->id >= 0) - ida_simple_remove(&bl_ida, bl->id); + ida_free(&bl_ida, bl->id); ret = PTR_ERR(bl->dev); goto fail_alloc; } @@ -442,7 +443,7 @@ nouveau_backlight_fini(struct drm_connector *connector) return; if (bl->id >= 0) - ida_simple_remove(&bl_ida, bl->id); + ida_free(&bl_ida, bl->id); backlight_device_unregister(bl->dev); nv_conn->backlight = NULL; From d5076fe4049cadef1f040eda4aaa001bb5424225 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 May 2022 09:19:46 -0700 Subject: [PATCH 262/491] netlink: do not reset transport header in netlink_recvmsg() netlink_recvmsg() does not need to change transport header. If transport header was needed, it should have been reset by the producer (netlink_dump()), not the consumer(s). The following trace probably happened when multiple threads were using MSG_PEEK. BUG: KCSAN: data-race in netlink_recvmsg / netlink_recvmsg write to 0xffff88811e9f15b2 of 2 bytes by task 32012 on cpu 1: skb_reset_transport_header include/linux/skbuff.h:2760 [inline] netlink_recvmsg+0x1de/0x790 net/netlink/af_netlink.c:1978 sock_recvmsg_nosec net/socket.c:948 [inline] sock_recvmsg net/socket.c:966 [inline] __sys_recvfrom+0x204/0x2c0 net/socket.c:2097 __do_sys_recvfrom net/socket.c:2115 [inline] __se_sys_recvfrom net/socket.c:2111 [inline] __x64_sys_recvfrom+0x74/0x90 net/socket.c:2111 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae write to 0xffff88811e9f15b2 of 2 bytes by task 32005 on cpu 0: skb_reset_transport_header include/linux/skbuff.h:2760 [inline] netlink_recvmsg+0x1de/0x790 net/netlink/af_netlink.c:1978 ____sys_recvmsg+0x162/0x2f0 ___sys_recvmsg net/socket.c:2674 [inline] __sys_recvmsg+0x209/0x3f0 net/socket.c:2704 __do_sys_recvmsg net/socket.c:2714 [inline] __se_sys_recvmsg net/socket.c:2711 [inline] __x64_sys_recvmsg+0x42/0x50 net/socket.c:2711 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0xffff -> 0x0000 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 32005 Comm: syz-executor.4 Not tainted 5.18.0-rc1-syzkaller-00328-ge1f700ebd6be-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20220505161946.2867638-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 05a3795eac8e9..73e9c0a9c1876 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1975,7 +1975,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, copied = len; } - skb_reset_transport_header(data_skb); err = skb_copy_datagram_msg(data_skb, 0, msg, copied); if (msg->msg_name) { From 1c7ab9cd98b78bef1657a5db7204d8d437e24c94 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 5 May 2022 16:31:01 -0700 Subject: [PATCH 263/491] net: chelsio: cxgb4: Avoid potential negative array offset Using min_t(int, ...) as a potential array index implies to the compiler that negative offsets should be allowed. This is not the case, though. Replace "int" with "unsigned int". Fixes the following warning exposed under future CONFIG_FORTIFY_SOURCE improvements: In file included from include/linux/string.h:253, from include/linux/bitmap.h:11, from include/linux/cpumask.h:12, from include/linux/smp.h:13, from include/linux/lockdep.h:14, from include/linux/rcupdate.h:29, from include/linux/rculist.h:11, from include/linux/pid.h:5, from include/linux/sched.h:14, from include/linux/delay.h:23, from drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:35: drivers/net/ethernet/chelsio/cxgb4/t4_hw.c: In function 't4_get_raw_vpd_params': include/linux/fortify-string.h:46:33: warning: '__builtin_memcpy' pointer overflow between offset 29 and size [2147483648, 4294967295] [-Warray-bounds] 46 | #define __underlying_memcpy __builtin_memcpy | ^ include/linux/fortify-string.h:388:9: note: in expansion of macro '__underlying_memcpy' 388 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ include/linux/fortify-string.h:433:26: note: in expansion of macro '__fortify_memcpy_chk' 433 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:2796:9: note: in expansion of macro 'memcpy' 2796 | memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN)); | ^~~~~~ include/linux/fortify-string.h:46:33: warning: '__builtin_memcpy' pointer overflow between offset 0 and size [2147483648, 4294967295] [-Warray-bounds] 46 | #define __underlying_memcpy __builtin_memcpy | ^ include/linux/fortify-string.h:388:9: note: in expansion of macro '__underlying_memcpy' 388 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ include/linux/fortify-string.h:433:26: note: in expansion of macro '__fortify_memcpy_chk' 433 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:2798:9: note: in expansion of macro 'memcpy' 2798 | memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN)); | ^~~~~~ Additionally remove needless cast from u8[] to char * in last strim() call. Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202205031926.FVP7epJM-lkp@intel.com Fixes: fc9279298e3a ("cxgb4: Search VPD with pci_vpd_find_ro_info_keyword()") Fixes: 24c521f81c30 ("cxgb4: Use pci_vpd_find_id_string() to find VPD ID string") Cc: Raju Rangoju Cc: Eric Dumazet Cc: Paolo Abeni Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220505233101.1224230-1-keescook@chromium.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index e7b4e3ed056c7..8d719f82854a9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2793,14 +2793,14 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) goto out; na = ret; - memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN)); + memcpy(p->id, vpd + id, min_t(unsigned int, id_len, ID_LEN)); strim(p->id); - memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN)); + memcpy(p->sn, vpd + sn, min_t(unsigned int, sn_len, SERNUM_LEN)); strim(p->sn); - memcpy(p->pn, vpd + pn, min_t(int, pn_len, PN_LEN)); + memcpy(p->pn, vpd + pn, min_t(unsigned int, pn_len, PN_LEN)); strim(p->pn); - memcpy(p->na, vpd + na, min_t(int, na_len, MACADDR_LEN)); - strim((char *)p->na); + memcpy(p->na, vpd + na, min_t(unsigned int, na_len, MACADDR_LEN)); + strim(p->na); out: vfree(vpd); From 1b5853dfab7fdde450f00f145327342238135c8a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 6 May 2022 15:22:25 +0200 Subject: [PATCH 264/491] fbdev: efifb: Fix a use-after-free due early fb_info cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d258d00fb9c7 ("fbdev: efifb: Cleanup fb_info in .fb_destroy rather than .remove") attempted to fix a use-after-free error due driver freeing the fb_info in the .remove handler instead of doing it in .fb_destroy. But ironically that change introduced yet another use-after-free since the fb_info was still used after the free. This should fix for good by freeing the fb_info at the end of the handler. Fixes: d258d00fb9c7 ("fbdev: efifb: Cleanup fb_info in .fb_destroy rather than .remove") Reported-by: Ville Syrjälä Reported-by: Andrzej Hajda Signed-off-by: Javier Martinez Canillas Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Reviewed-by: Thomas Zimmermann Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220506132225.588379-1-javierm@redhat.com --- drivers/video/fbdev/efifb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index cfa3dc0b4eeef..b3d5f884c5445 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -259,12 +259,12 @@ static void efifb_destroy(struct fb_info *info) memunmap(info->screen_base); } - framebuffer_release(info); - if (request_mem_succeeded) release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size); fb_dealloc_cmap(&info->cmap); + + framebuffer_release(info); } static const struct fb_ops efifb_ops = { From 3d1b0d351441c2be7de082b92f43d0bfdc95a8f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 May 2022 13:48:21 -0400 Subject: [PATCH 265/491] Revert "SUNRPC: Ensure gss-proxy connects on setup" This reverts commit 892de36fd4a98fab3298d417c051d9099af5448d. The gssproxy server is unresponsive when it calls into the kernel to start the upcall service, so it will not reply to our RPC ping at all. Reported-by: "J.Bruce Fields" Fixes: 892de36fd4a9 ("SUNRPC: Ensure gss-proxy connects on setup") Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 - net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +- net/sunrpc/clnt.c | 3 --- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index db5149567305e..267b7aeaf1a69 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -160,7 +160,6 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) -#define RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL (1UL << 12) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 8ca1d809b78d9..61c276bddaf25 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -97,7 +97,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) * timeout, which would result in reconnections being * done without the correct namespace: */ - .flags = RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL | + .flags = RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_NO_IDLE_TIMEOUT }; struct rpc_clnt *clnt; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 22c28cf43ebae..98133aa54f198 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -479,9 +479,6 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { int err = rpc_ping(clnt); - if ((args->flags & RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL) && - err == -EOPNOTSUPP) - err = 0; if (err != 0) { rpc_shutdown_client(clnt); return ERR_PTR(err); From fd13359f54ee854f00134abc6be32da94ec53dbf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 May 2022 13:53:59 -0400 Subject: [PATCH 266/491] SUNRPC: Ensure that the gssproxy client can start in a connected state Ensure that the gssproxy client connects to the server from the gssproxy daemon process context so that the AF_LOCAL socket connection is done using the correct path and namespaces. Fixes: 1d658336b05f ("SUNRPC: Add RPC based upcall mechanism for RPCGSS auth") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/auth_gss/gss_rpc_upcall.c | 1 + net/sunrpc/clnt.c | 33 ++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 267b7aeaf1a69..90501404fa49f 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -160,6 +160,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) +#define RPC_CLNT_CREATE_CONNECTED (1UL << 12) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 61c276bddaf25..f549e4c05defc 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -98,6 +98,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) * done without the correct namespace: */ .flags = RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_CONNECTED | RPC_CLNT_CREATE_NO_IDLE_TIMEOUT }; struct rpc_clnt *clnt; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 98133aa54f198..e2c6eca0271b3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -76,6 +76,7 @@ static int rpc_encode_header(struct rpc_task *task, static int rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr); static int rpc_ping(struct rpc_clnt *clnt); +static int rpc_ping_noreply(struct rpc_clnt *clnt); static void rpc_check_timeout(struct rpc_task *task); static void rpc_register_client(struct rpc_clnt *clnt) @@ -483,6 +484,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, rpc_shutdown_client(clnt); return ERR_PTR(err); } + } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) { + int err = rpc_ping_noreply(clnt); + if (err != 0) { + rpc_shutdown_client(clnt); + return ERR_PTR(err); + } } clnt->cl_softrtry = 1; @@ -2709,6 +2716,10 @@ static const struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; +static const struct rpc_procinfo rpcproc_null_noreply = { + .p_encode = rpcproc_encode_null, +}; + static void rpc_null_call_prepare(struct rpc_task *task, void *data) { @@ -2762,6 +2773,28 @@ static int rpc_ping(struct rpc_clnt *clnt) return status; } +static int rpc_ping_noreply(struct rpc_clnt *clnt) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_null_noreply, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_message = &msg, + .callback_ops = &rpc_null_ops, + .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS, + }; + struct rpc_task *task; + int status; + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = task->tk_status; + rpc_put_task(task); + return status; +} + struct rpc_cb_add_xprt_calldata { struct rpc_xprt_switch *xps; struct rpc_xprt *xprt; From d7be213849232a2accb219d537edf056d29186b4 Mon Sep 17 00:00:00 2001 From: Forest Crossman Date: Tue, 3 May 2022 19:24:44 -0500 Subject: [PATCH 267/491] ALSA: usb-audio: Don't get sample rate for MCT Trigger 5 USB-to-HDMI This device doesn't support reading the sample rate, so we need to apply this quirk to avoid a 15-second delay waiting for three timeouts. Signed-off-by: Forest Crossman Link: https://lore.kernel.org/r/20220504002444.114011-2-cyrozap@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ab9f3da49941f..fbbe59054c3fb 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1822,6 +1822,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */ QUIRK_FLAG_IGNORE_CTL_ERROR), + DEVICE_FLG(0x0711, 0x5800, /* MCT Trigger 5 USB-to-HDMI */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x074d, 0x3553, /* Outlaw RR2150 (Micronas UAC3553B) */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */ From c3d9ca93f1e3bd3d1adfc4479a12c82fed424c87 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Apr 2022 12:33:18 -0700 Subject: [PATCH 268/491] ALSA: hda - fix unused Realtek function when PM is not enabled When CONFIG_PM is not enabled, alc_shutup() is not needed, so move it inside the #ifdef CONFIG_PM guard. Also drop some contiguous #endif / #ifdef CONFIG_PM for simplicity. Fixes this build warning: sound/pci/hda/patch_realtek.c:886:20: warning: unused function 'alc_shutup' Fixes: 08c189f2c552 ("ALSA: hda - Use generic parser codes for Realtek driver") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: https://lore.kernel.org/r/20220430193318.29024-1-rdunlap@infradead.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cf531c1efa132..89b705d38f5b0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -937,6 +937,9 @@ static int alc_init(struct hda_codec *codec) return 0; } +#define alc_free snd_hda_gen_free + +#ifdef CONFIG_PM static inline void alc_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -950,9 +953,6 @@ static inline void alc_shutup(struct hda_codec *codec) alc_shutup_pins(codec); } -#define alc_free snd_hda_gen_free - -#ifdef CONFIG_PM static void alc_power_eapd(struct hda_codec *codec) { alc_auto_setup_eapd(codec, false); @@ -966,9 +966,7 @@ static int alc_suspend(struct hda_codec *codec) spec->power_hook(codec); return 0; } -#endif -#ifdef CONFIG_PM static int alc_resume(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; From 1efcdd9c1f34f5a6590bc9ac5471e562fb011386 Mon Sep 17 00:00:00 2001 From: Gabriele Mazzotta Date: Sun, 1 May 2022 14:42:37 +0200 Subject: [PATCH 269/491] ALSA: hda/realtek: Add quirk for Dell Latitude 7520 The driver is currently using ALC269_FIXUP_DELL4_MIC_NO_PRESENCE for the Latitude 7520, but this fixup chain has some issues: - The internal mic is really loud and the recorded audio is distorted at "standard" audio levels. - There are pop noises at system startup and when plugging/unplugging headphone jacks. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215885 Signed-off-by: Gabriele Mazzotta Link: https://lore.kernel.org/r/20220501124237.4667-1-gabriele.mzt@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 43 +++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 89b705d38f5b0..37ad7b6780a23 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6778,6 +6778,41 @@ static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec, } } +static void alc_fixup_dell4_mic_no_presence_quiet(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + struct hda_input_mux *imux = &spec->gen.input_mux; + int i; + + alc269_fixup_limit_int_mic_boost(codec, fix, action); + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + /** + * Set the vref of pin 0x19 (Headset Mic) and pin 0x1b (Headphone Mic) + * to Hi-Z to avoid pop noises at startup and when plugging and + * unplugging headphones. + */ + snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREFHIZ); + snd_hda_codec_set_pin_target(codec, 0x1b, PIN_VREFHIZ); + break; + case HDA_FIXUP_ACT_PROBE: + /** + * Make the internal mic (0x12) the default input source to + * prevent pop noises on cold boot. + */ + for (i = 0; i < imux->num_items; i++) { + if (spec->gen.imux_pins[i] == 0x12) { + spec->gen.cur_mux[0] = i; + break; + } + } + break; + } +} + enum { ALC269_FIXUP_GPIO2, ALC269_FIXUP_SONY_VAIO, @@ -6819,6 +6854,7 @@ enum { ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, + ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET, ALC269_FIXUP_HEADSET_MODE, ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, ALC269_FIXUP_ASPIRE_HEADSET_MIC, @@ -8806,6 +8842,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC285_FIXUP_HP_MUTE_LED, }, + [ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_dell4_mic_no_presence_quiet, + .chained = true, + .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8896,6 +8938,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1028, 0x0a38, "Dell Latitude 7520", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET), SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a61, "Dell XPS 15 9510", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK), From f71f01394f742fc4558b3f9f4c7ef4c4cf3b07c8 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 8 May 2022 11:37:07 +0200 Subject: [PATCH 270/491] floppy: use a statically allocated error counter Interrupt handler bad_flp_intr() may cause a UAF on the recently freed request just to increment the error count. There's no point keeping that one in the request anyway, and since the interrupt handler uses a static pointer to the error which cannot be kept in sync with the pending request, better make it use a static error counter that's reset for each new request. This reset now happens when entering redo_fd_request() for a new request via set_next_request(). One initial concern about a single error counter was that errors on one floppy drive could be reported on another one, but this problem is not real given that the driver uses a single drive at a time, as that PC-compatible controllers also have this limitation by using shared signals. As such the error count is always for the "current" drive. Reported-by: Minh Yuan Suggested-by: Linus Torvalds Tested-by: Denis Efremov Signed-off-by: Willy Tarreau Signed-off-by: Linus Torvalds --- drivers/block/floppy.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index d5b9ff9bcbb2b..015841f50f4e9 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -509,8 +509,8 @@ static unsigned long fdc_busy; static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); static DECLARE_WAIT_QUEUE_HEAD(command_done); -/* Errors during formatting are counted here. */ -static int format_errors; +/* errors encountered on the current (or last) request */ +static int floppy_errors; /* Format request descriptor. */ static struct format_descr format_req; @@ -530,7 +530,6 @@ static struct format_descr format_req; static char *floppy_track_buffer; static int max_buffer_sectors; -static int *errors; typedef void (*done_f)(int); static const struct cont_t { void (*interrupt)(void); @@ -1455,7 +1454,7 @@ static int interpret_errors(void) if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); bad = 0; - } else if (*errors >= drive_params[current_drive].max_errors.reporting) { + } else if (floppy_errors >= drive_params[current_drive].max_errors.reporting) { print_errors(); } if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC) @@ -2095,7 +2094,7 @@ static void bad_flp_intr(void) if (!next_valid_format(current_drive)) return; } - err_count = ++(*errors); + err_count = ++floppy_errors; INFBOUND(write_errors[current_drive].badness, err_count); if (err_count > drive_params[current_drive].max_errors.abort) cont->done(0); @@ -2241,9 +2240,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req) return -EINVAL; } format_req = *tmp_format_req; - format_errors = 0; cont = &format_cont; - errors = &format_errors; + floppy_errors = 0; ret = wait_til_done(redo_format, true); if (ret == -EINTR) return -EINTR; @@ -2759,10 +2757,11 @@ static int set_next_request(void) current_req = list_first_entry_or_null(&floppy_reqs, struct request, queuelist); if (current_req) { - current_req->error_count = 0; + floppy_errors = 0; list_del_init(¤t_req->queuelist); + return 1; } - return current_req != NULL; + return 0; } /* Starts or continues processing request. Will automatically unlock the @@ -2821,7 +2820,6 @@ static void redo_fd_request(void) _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format]; } else probing = 0; - errors = &(current_req->error_count); tmp = make_raw_rw_request(); if (tmp < 2) { request_done(tmp); From f3b10a3c22c6a5f1d623b70eca2b4d1efafccd71 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 8 May 2022 11:37:08 +0200 Subject: [PATCH 271/491] ataflop: use a statically allocated error counters This is the last driver making use of fd_request->error_count, which is easy to get wrong as was shown in floppy.c. We don't need to keep it there, it can be moved to the atari_floppy_struct instead, so let's do this. Suggested-by: Linus Torvalds Cc: Minh Yuan Cc: Geert Uytterhoeven Signed-off-by: Willy Tarreau Signed-off-by: Linus Torvalds --- drivers/block/ataflop.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 5d819a466e2f3..e232cc4fd444b 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -303,6 +303,7 @@ static struct atari_floppy_struct { int ref; int type; struct blk_mq_tag_set tag_set; + int error_count; } unit[FD_MAX_UNITS]; #define UD unit[drive] @@ -705,14 +706,14 @@ static void fd_error( void ) if (!fd_request) return; - fd_request->error_count++; - if (fd_request->error_count >= MAX_ERRORS) { + unit[SelectedDrive].error_count++; + if (unit[SelectedDrive].error_count >= MAX_ERRORS) { printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); fd_end_request_cur(BLK_STS_IOERR); finish_fdc(); return; } - else if (fd_request->error_count == RECALIBRATE_ERRORS) { + else if (unit[SelectedDrive].error_count == RECALIBRATE_ERRORS) { printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive ); if (SelectedDrive != -1) SUD.track = -1; @@ -1491,7 +1492,7 @@ static void setup_req_params( int drive ) ReqData = ReqBuffer + 512 * ReqCnt; if (UseTrackbuffer) - read_track = (ReqCmd == READ && fd_request->error_count == 0); + read_track = (ReqCmd == READ && unit[drive].error_count == 0); else read_track = 0; @@ -1520,6 +1521,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_RESOURCE; } fd_request = bd->rq; + unit[drive].error_count = 0; blk_mq_start_request(fd_request); atari_disable_irq( IRQ_MFP_FDC ); From 2e3afb42dd480d755cd7d97ca04586a5616c1a5e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 8 May 2022 11:37:09 +0200 Subject: [PATCH 272/491] blk-mq: remove the error_count from struct request The last two users were floppy.c and ataflop.c respectively, it was verified that no other drivers makes use of this, so let's remove it. Suggested-by: Linus Torvalds Cc: Minh Yuan Cc: Denis Efremov , Cc: Geert Uytterhoeven Cc: Christoph Hellwig Signed-off-by: Willy Tarreau Signed-off-by: Linus Torvalds --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7aa5c54901a93..9f07061418db0 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -163,7 +163,6 @@ struct request { struct rb_node rb_node; /* sort/lookup */ struct bio_vec special_vec; void *completion_data; - int error_count; /* for legacy drivers, don't use */ }; From 9dc4241bb14afecd16518a0760bceb3d7359b12a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 7 May 2022 15:31:16 +0200 Subject: [PATCH 273/491] Revert "parisc: Mark cr16 CPU clocksource unstable on all SMP machines" This reverts commit afdb4a5b1d340e4afffc65daa21cc71890d7d589. It triggers RCU stalls at boot with a 32-bit kernel. Signed-off-by: Helge Deller Noticed-by: John David Anglin Cc: stable@vger.kernel.org # v5.16+ --- arch/parisc/kernel/time.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index bb27dfeeddfcc..95ee9e1a364b5 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -251,16 +251,30 @@ void __init time_init(void) static int __init init_cr16_clocksource(void) { /* - * The cr16 interval timers are not syncronized across CPUs, even if - * they share the same socket. + * The cr16 interval timers are not syncronized across CPUs on + * different sockets, so mark them unstable and lower rating on + * multi-socket SMP systems. */ if (num_online_cpus() > 1 && !running_on_qemu) { - /* mark sched_clock unstable */ - clear_sched_clock_stable(); - - clocksource_cr16.name = "cr16_unstable"; - clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; - clocksource_cr16.rating = 0; + int cpu; + unsigned long cpu0_loc; + cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; + + for_each_online_cpu(cpu) { + if (cpu == 0) + continue; + if ((cpu0_loc != 0) && + (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) + continue; + + /* mark sched_clock unstable */ + clear_sched_clock_stable(); + + clocksource_cr16.name = "cr16_unstable"; + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; + break; + } } /* register at clocksource framework */ From 7962c0896429af2a0e00ec6bc15d992536453b2d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 7 May 2022 15:32:38 +0200 Subject: [PATCH 274/491] Revert "parisc: Mark sched_clock unstable only if clocks are not syncronized" This reverts commit d97180ad68bdb7ee10f327205a649bc2f558741d. It triggers RCU stalls at boot with a 32-bit kernel. Signed-off-by: Helge Deller Noticed-by: John David Anglin Cc: stable@vger.kernel.org # v5.15+ --- arch/parisc/kernel/setup.c | 2 ++ arch/parisc/kernel/time.c | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index b91cb45ffd4e3..f005ddedb50e4 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -161,6 +161,8 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_PA11 dma_ops_init(); #endif + + clear_sched_clock_stable(); } /* diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 95ee9e1a364b5..19c31a72fe764 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -267,9 +267,6 @@ static int __init init_cr16_clocksource(void) (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) continue; - /* mark sched_clock unstable */ - clear_sched_clock_stable(); - clocksource_cr16.name = "cr16_unstable"; clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; clocksource_cr16.rating = 0; @@ -277,6 +274,10 @@ static int __init init_cr16_clocksource(void) } } + /* XXX: We may want to mark sched_clock stable here if cr16 clocks are + * in sync: + * (clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */ + /* register at clocksource framework */ clocksource_register_hz(&clocksource_cr16, 100 * PAGE0->mem_10msec); From 6c800d7f55fcd78e17deae5ae4374d8e73482c13 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 8 May 2022 10:18:40 +0200 Subject: [PATCH 275/491] Revert "parisc: Fix patch code locking and flushing" This reverts commit a9fe7fa7d874a536e0540469f314772c054a0323. Leads to segfaults on 32bit kernel. Signed-off-by: Helge Deller --- arch/parisc/kernel/patch.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c index e59574f65e641..80a0ab372802d 100644 --- a/arch/parisc/kernel/patch.c +++ b/arch/parisc/kernel/patch.c @@ -40,7 +40,10 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags, *need_unmap = 1; set_fixmap(fixmap, page_to_phys(page)); - raw_spin_lock_irqsave(&patch_lock, *flags); + if (flags) + raw_spin_lock_irqsave(&patch_lock, *flags); + else + __acquire(&patch_lock); return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); } @@ -49,7 +52,10 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { clear_fixmap(fixmap); - raw_spin_unlock_irqrestore(&patch_lock, *flags); + if (flags) + raw_spin_unlock_irqrestore(&patch_lock, *flags); + else + __release(&patch_lock); } void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) @@ -61,9 +67,8 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) int mapped; /* Make sure we don't have any aliases in cache */ - flush_kernel_dcache_range_asm(start, end); - flush_kernel_icache_range_asm(start, end); - flush_tlb_kernel_range(start, end); + flush_kernel_vmap_range(addr, len); + flush_icache_range(start, end); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped); @@ -76,10 +81,8 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) * We're crossing a page boundary, so * need to remap */ - flush_kernel_dcache_range_asm((unsigned long)fixmap, - (unsigned long)p); - flush_tlb_kernel_range((unsigned long)fixmap, - (unsigned long)p); + flush_kernel_vmap_range((void *)fixmap, + (p-fixmap) * sizeof(*p)); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, @@ -87,10 +90,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) } } - flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p); - flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p); + flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p)); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); + flush_icache_range(start, end); } void __kprobes __patch_text(void *addr, u32 insn) From 0921244f6f4f0d05698b953fe632a99b38907226 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Apr 2022 09:19:11 +0200 Subject: [PATCH 276/491] parisc: Only list existing CPUs in cpu_possible_mask The inventory knows which CPUs are in the system, so this bitmask should be in cpu_possible_mask instead of the bitmask based on CONFIG_NR_CPUS. Reset the cpu_possible_mask before scanning the system for CPUs, and mark each existing CPU as possible during initialization of that CPU. This avoids those warnings later on too: register_cpu_capacity_sysctl: too early to get CPU4 device! Signed-off-by: Helge Deller Noticed-by: John David Anglin --- arch/parisc/kernel/processor.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index d98692115221a..9e92b76b0ce02 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -171,6 +171,7 @@ static int __init processor_probe(struct parisc_device *dev) p->cpu_num = cpu_info.cpu_num; p->cpu_loc = cpu_info.cpu_loc; + set_cpu_possible(cpuid, true); store_cpu_topology(cpuid); #ifdef CONFIG_SMP @@ -461,6 +462,13 @@ static struct parisc_driver cpu_driver __refdata = { */ void __init processor_init(void) { + unsigned int cpu; + reset_cpu_topology(); + + /* reset possible mask. We will mark those which are possible. */ + for_each_possible_cpu(cpu) + set_cpu_possible(cpu, false); + register_parisc_driver(&cpu_driver); } From 7e93a3dd63db2341d094ab1d9ba29b5d8d5093d1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Apr 2022 18:55:27 +0200 Subject: [PATCH 277/491] parisc: Update 32- and 64-bit defconfigs Enable CONFIG_CGROUPS=y on 32-bit defconfig for systemd-support, and enable CONFIG_NAMESPACES and CONFIG_USER_NS. Signed-off-by: Helge Deller --- arch/parisc/configs/generic-32bit_defconfig | 4 +++- arch/parisc/configs/generic-64bit_defconfig | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index a5fee10d76ee6..8ce0ae3706804 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -6,6 +6,9 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 +CONFIG_CGROUPS=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_PERF_EVENTS=y @@ -47,7 +50,6 @@ CONFIG_PARPORT=y CONFIG_PARPORT_PC=m CONFIG_PARPORT_1284=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=6144 CONFIG_BLK_DEV_SD=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index 1b8fd80cbe7f8..57501b0aed92e 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -16,6 +16,7 @@ CONFIG_CGROUPS=y CONFIG_MEMCG=y CONFIG_CGROUP_PIDS=y CONFIG_CPUSETS=y +CONFIG_USER_NS=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -267,9 +268,9 @@ CONFIG_CRYPTO_DEFLATE=m CONFIG_CRC_CCITT=m CONFIG_LIBCRC32C=y CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_KERNEL=y CONFIG_STRIP_ASM_SYMS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_SCHED_DEBUG is not set From 1955c4f879a130c7822f483cf593338ad747aed4 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Apr 2022 22:24:20 +0200 Subject: [PATCH 278/491] parisc: Re-enable GENERIC_CPU_DEVICES for !SMP In commit 62773112acc5 ("parisc: Switch from GENERIC_CPU_DEVICES to GENERIC_ARCH_TOPOLOGY") GENERIC_CPU_DEVICES was unconditionally turned off, but this triggers a warning in topology_add_dev(). Turning it back on for the !SMP case avoids this warning. Reported-by: Guenter Roeck Tested-by: Guenter Roeck Fixes: 62773112acc5 ("parisc: Switch from GENERIC_CPU_DEVICES to GENERIC_ARCH_TOPOLOGY") Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 52e550b456924..bd22578859d00 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -38,6 +38,7 @@ config PARISC select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_SMP_IDLE_THREAD select GENERIC_ARCH_TOPOLOGY if SMP + select GENERIC_CPU_DEVICES if !SMP select GENERIC_LIB_DEVMEM_IS_ALLOWED select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_EXCEPTION_TRACE From 5b89966bc96a06f6ad65f64ae4b0461918fcc9d3 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 3 Apr 2022 21:57:51 +0200 Subject: [PATCH 279/491] parisc: Merge model and model name into one line in /proc/cpuinfo The Linux tool "lscpu" shows the double amount of CPUs if we have "model" and "model name" in two different lines in /proc/cpuinfo. This change combines the model and the model name into one line. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- arch/parisc/kernel/processor.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 9e92b76b0ce02..26eb568f8b961 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -420,8 +420,7 @@ show_cpuinfo (struct seq_file *m, void *v) } seq_printf(m, " (0x%02lx)\n", boot_cpu_data.pdc.capabilities); - seq_printf(m, "model\t\t: %s\n" - "model name\t: %s\n", + seq_printf(m, "model\t\t: %s - %s\n", boot_cpu_data.pdc.sys_model_name, cpuinfo->dev ? cpuinfo->dev->name : "Unknown"); From 234ff4c585d704896450a3634a7c29fa4e1907e1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 5 Apr 2022 21:28:37 +0200 Subject: [PATCH 280/491] parisc: Change MAX_ADDRESS to become unsigned long long Dave noticed that for the 32-bit kernel MAX_ADDRESS should be a ULL, otherwise this define would become 0: MAX_ADDRESS (1UL << MAX_ADDRBITS) It has no real effect on the kernel. Signed-off-by: Helge Deller Noticed-by: John David Anglin --- arch/parisc/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 939db6fe620bd..69765a6dbe89d 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -160,7 +160,7 @@ extern void __update_cache(pte_t pte); #define SPACEID_SHIFT (MAX_ADDRBITS - 32) #else #define MAX_ADDRBITS (BITS_PER_LONG) -#define MAX_ADDRESS (1UL << MAX_ADDRBITS) +#define MAX_ADDRESS (1ULL << MAX_ADDRBITS) #define SPACEID_SHIFT 0 #endif From a65bcad5421507c2f6c52e1e2ca6a6ce02fd1ad6 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 30 Apr 2022 21:07:18 +0200 Subject: [PATCH 281/491] parisc: Fix typos in comments Various spelling mistakes in comments. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Helge Deller --- arch/parisc/kernel/kprobes.c | 2 +- arch/parisc/kernel/traps.c | 2 +- arch/parisc/math-emu/dfadd.c | 2 +- arch/parisc/math-emu/dfsub.c | 2 +- arch/parisc/math-emu/sfadd.c | 2 +- arch/parisc/math-emu/sfsub.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c index 3343d2fb78897..6e0b86652f30d 100644 --- a/arch/parisc/kernel/kprobes.c +++ b/arch/parisc/kernel/kprobes.c @@ -152,7 +152,7 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs) /* for absolute branch instructions we can copy iaoq_b. for relative * branch instructions we need to calculate the new address based on the * difference between iaoq_f and iaoq_b. We cannot use iaoq_b without - * modificationt because it's based on our ainsn.insn address. + * modifications because it's based on our ainsn.insn address. */ if (p->post_handler) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index a6e61cf2cad04..b78f1b9d45c18 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -469,7 +469,7 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o * panic notifiers, and we should call panic * directly from the location that we wish. * e.g. We should not call panic from - * parisc_terminate, but rather the oter way around. + * parisc_terminate, but rather the other way around. * This hack works, prints the panic message twice, * and it enables reboot timers! */ diff --git a/arch/parisc/math-emu/dfadd.c b/arch/parisc/math-emu/dfadd.c index ec487e07f004f..00e561d4aa550 100644 --- a/arch/parisc/math-emu/dfadd.c +++ b/arch/parisc/math-emu/dfadd.c @@ -253,7 +253,7 @@ dbl_fadd( return(NOEXCEPTION); } right_exponent = 1; /* Set exponent to reflect different bias - * with denomalized numbers. */ + * with denormalized numbers. */ } else { diff --git a/arch/parisc/math-emu/dfsub.c b/arch/parisc/math-emu/dfsub.c index c4f30acf2d48d..4f03782284bd0 100644 --- a/arch/parisc/math-emu/dfsub.c +++ b/arch/parisc/math-emu/dfsub.c @@ -256,7 +256,7 @@ dbl_fsub( return(NOEXCEPTION); } right_exponent = 1; /* Set exponent to reflect different bias - * with denomalized numbers. */ + * with denormalized numbers. */ } else { diff --git a/arch/parisc/math-emu/sfadd.c b/arch/parisc/math-emu/sfadd.c index 838758279d5bd..9b98c874dfac7 100644 --- a/arch/parisc/math-emu/sfadd.c +++ b/arch/parisc/math-emu/sfadd.c @@ -249,7 +249,7 @@ sgl_fadd( return(NOEXCEPTION); } right_exponent = 1; /* Set exponent to reflect different bias - * with denomalized numbers. */ + * with denormalized numbers. */ } else { diff --git a/arch/parisc/math-emu/sfsub.c b/arch/parisc/math-emu/sfsub.c index 583d3ace46346..29d9eed09d12d 100644 --- a/arch/parisc/math-emu/sfsub.c +++ b/arch/parisc/math-emu/sfsub.c @@ -252,7 +252,7 @@ sgl_fsub( return(NOEXCEPTION); } right_exponent = 1; /* Set exponent to reflect different bias - * with denomalized numbers. */ + * with denormalized numbers. */ } else { From 340233dcc0160aafcce46ca893d1679f16acf409 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 8 May 2022 18:25:00 +0200 Subject: [PATCH 282/491] parisc: Mark cr16 clock unstable on all SMP machines The cr16 interval timers are not synchronized across CPUs, even with just one dual-core CPU. This becomes visible if the machines have a longer uptime. Signed-off-by: Helge Deller --- arch/parisc/kernel/time.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 19c31a72fe764..9714fbd7c42d6 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -251,33 +251,14 @@ void __init time_init(void) static int __init init_cr16_clocksource(void) { /* - * The cr16 interval timers are not syncronized across CPUs on - * different sockets, so mark them unstable and lower rating on - * multi-socket SMP systems. + * The cr16 interval timers are not synchronized across CPUs. */ if (num_online_cpus() > 1 && !running_on_qemu) { - int cpu; - unsigned long cpu0_loc; - cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; - - for_each_online_cpu(cpu) { - if (cpu == 0) - continue; - if ((cpu0_loc != 0) && - (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) - continue; - - clocksource_cr16.name = "cr16_unstable"; - clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; - clocksource_cr16.rating = 0; - break; - } + clocksource_cr16.name = "cr16_unstable"; + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; } - /* XXX: We may want to mark sched_clock stable here if cr16 clocks are - * in sync: - * (clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */ - /* register at clocksource framework */ clocksource_register_hz(&clocksource_cr16, 100 * PAGE0->mem_10msec); From ba0c04104082ca211e108dd8eec6db2ad7676528 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 8 May 2022 19:55:13 +0200 Subject: [PATCH 283/491] Revert "parisc: Increase parisc_cache_flush_threshold setting" This reverts commit a58e9d0984e8dad53f17ec73ae3c1cc7f8d88151. Triggers segfaults with 32-bit kernels on PA8500 machines. Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 23348199f3f80..e7911225a4f89 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -403,7 +403,7 @@ void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; unsigned long size; - unsigned long threshold, threshold2; + unsigned long threshold; alltime = mfctl(16); flush_data_cache(); @@ -418,20 +418,8 @@ void __init parisc_setup_cache_timing(void) alltime, size, rangetime); threshold = L1_CACHE_ALIGN(size * alltime / rangetime); - - /* - * The threshold computed above isn't very reliable since the - * flush times depend greatly on the percentage of dirty lines - * in the flush range. Further, the whole cache time doesn't - * include the time to refill lines that aren't in the mm/vma - * being flushed. By timing glibc build and checks on mako cpus, - * the following formula seems to work reasonably well. The - * value from the timing calculation is too small, and increases - * build and check times by almost a factor two. - */ - threshold2 = cache_info.dc_size * num_online_cpus(); - if (threshold2 > threshold) - threshold = threshold2; + if (threshold > cache_info.dc_size) + threshold = cache_info.dc_size; if (threshold) parisc_cache_flush_threshold = threshold; printk(KERN_INFO "Cache flush threshold set to %lu KiB\n", From c5eb0a61238dd6faf37f58c9ce61c9980aaffd7a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 May 2022 13:54:17 -0700 Subject: [PATCH 284/491] Linux 5.18-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9a820c525b861..2284d1ca25039 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Superb Owl # *DOCUMENTATION* From 183d4f2d23acecb29695f4d07427c8594e3a9691 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 28 Apr 2022 13:29:11 -0700 Subject: [PATCH 285/491] perf bench: Fix two numa NDEBUG warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BUG_ON is a no-op if NDEBUG is defined, otherwise it is an assert. Compiling with NDEBUG yields: bench/numa.c: In function ‘bind_to_cpu’: bench/numa.c:314:1: error: control reaches end of non-void function [-Werror=return-type] 314 | } | ^ bench/numa.c: In function ‘bind_to_node’: bench/numa.c:367:1: error: control reaches end of non-void function [-Werror=return-type] 367 | } | ^ Add return statements to cover this case. Reviewed-by: Athira Jajeev Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20220428202912.1056444-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 44e1f8a44087e..d5289fa58a4f1 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -311,6 +311,7 @@ static cpu_set_t *bind_to_cpu(int target_cpu) /* BUG_ON due to failure in allocation of orig_mask/mask */ BUG_ON(-1); + return NULL; } static cpu_set_t *bind_to_node(int target_node) @@ -364,6 +365,7 @@ static cpu_set_t *bind_to_node(int target_node) /* BUG_ON due to failure in allocation of orig_mask/mask */ BUG_ON(-1); + return NULL; } static void bind_to_cpumask(cpu_set_t *mask) From 45fa7c38696bae632310c2876ba81fdfa25cc9c2 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 28 Apr 2022 10:19:47 -0500 Subject: [PATCH 286/491] perf tests: Fix coresight `perf test` failure. Currently the `perf test` always fails the coresight test like: 89: Check Arm CoreSight trace data recording and synthesized samples: FAILED! That is because the test_arm_coresight.sh is attempting to SIGINT the parent but is using $$ rather than $PPID and it sigint's itself when run under the perf test framework. Since this is done in a trap clause it ends up returning a non zero return. Since $PPID is a bash ism and not all distros are linking /bin/sh to bash, the alternative parent pid lookups are uglier than just dropping the kill, and its not strictly needed, lets pick the simple solution and drop the sigint. Fixes: 133fe2e617e48ca0 ("perf tests: Improve temp file cleanup in test_arm_coresight.sh") Reviewed-by: James Clark Signed-off-by: Jeremy Linton Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Jeremy Linton Link: https://lore.kernel.org/r/20220428151947.290146-1-jeremy.linton@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_coresight.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 6de53b7ef5ffd..e4cb4f1806ffa 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -29,7 +29,6 @@ cleanup_files() rm -f ${file} rm -f "${perfdata}.old" trap - exit term int - kill -2 $$ exit $glb_err } From 474e76c4075c10461c4373d932a083e25d6adf3c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 09:39:02 -0300 Subject: [PATCH 287/491] tools headers UAPI: Sync linux/kvm.h with the kernel sources To pick the changes in: d495f942f40aa412 ("KVM: fix bad user ABI for KVM_EXIT_SYSTEM_EVENT") That just rebuilds perf, as these patches don't add any new KVM ioctl to be harvested for the the 'perf trace' ioctl syscall argument beautifiers. This is also by now used by tools/testing/selftests/kvm/, a simple test build succeeded. This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Link: http://lore.kernel.org/lkml/YnE5BIweGmCkpOTN@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 91a6fe4e02c08..6a184d260c7f2 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -445,7 +445,13 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; - __u64 flags; + __u32 ndata; + union { +#ifndef __KERNEL__ + __u64 flags; +#endif + __u64 data[16]; + }; } system_event; /* KVM_EXIT_S390_STSI */ struct { @@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 #define KVM_CAP_DISABLE_QUIRKS2 213 +/* #define KVM_CAP_VM_TSC_CONTROL 214 */ +#define KVM_CAP_SYSTEM_EVENT_DATA 215 #ifdef KVM_CAP_IRQ_ROUTING From 3220c3b2115102bb35f8f07d90d2989a3f5eb452 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 20 Apr 2022 11:57:20 +0200 Subject: [PATCH 288/491] drm/i915: Fix race in __i915_vma_remove_closed i915_vma_reopen checked if the vma is closed before without taking the lock. So multiple threads could attempt removing the vma. Instead the lock needs to be taken before actually checking. v2: move struct declaration Cc: Chris Wilson Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: # v5.3+ Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5732 Signed-off-by: Karol Herbst Fixes: 155ab8836caa ("drm/i915: Move object close under its own lock") Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220420095720.3331609-1-kherbst@redhat.com (cherry picked from commit 1df1c79cbb7ac9bf148930be3418973c76ba8dde) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_vma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 94fcdb7bd21d3..eeaa8d0d04075 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1605,17 +1605,17 @@ void i915_vma_close(struct i915_vma *vma) static void __i915_vma_remove_closed(struct i915_vma *vma) { - struct intel_gt *gt = vma->vm->gt; - - spin_lock_irq(>->closed_lock); list_del_init(&vma->closed_link); - spin_unlock_irq(>->closed_lock); } void i915_vma_reopen(struct i915_vma *vma) { + struct intel_gt *gt = vma->vm->gt; + + spin_lock_irq(>->closed_lock); if (i915_vma_is_closed(vma)) __i915_vma_remove_closed(vma); + spin_unlock_irq(>->closed_lock); } void i915_vma_release(struct kref *ref) @@ -1641,6 +1641,7 @@ static void force_unbind(struct i915_vma *vma) static void release_references(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; + struct intel_gt *gt = vma->vm->gt; GEM_BUG_ON(i915_vma_is_active(vma)); @@ -1650,7 +1651,9 @@ static void release_references(struct i915_vma *vma) rb_erase(&vma->obj_node, &obj->vma.tree); spin_unlock(&obj->vma.lock); + spin_lock_irq(>->closed_lock); __i915_vma_remove_closed(vma); + spin_unlock_irq(>->closed_lock); __i915_vma_put(vma); } From 49e6123c65dac6393b04f39ceabf79c44f66b8be Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 4 May 2022 12:32:27 +0000 Subject: [PATCH 289/491] net: sfc: fix memory leak due to ptp channel It fixes memory leak in ring buffer change logic. When ring buffer size is changed(ethtool -G eth0 rx 4096), sfc driver works like below. 1. stop all channels and remove ring buffers. 2. allocates new buffer array. 3. allocates rx buffers. 4. start channels. While the above steps are working, it skips some steps if the channel doesn't have a ->copy callback function. Due to ptp channel doesn't have ->copy callback, these above steps are skipped for ptp channel. It eventually makes some problems. a. ptp channel's ring buffer size is not changed, it works only 1024(default). b. memory leak. The reason for memory leak is to use the wrong ring buffer values. There are some values, which is related to ring buffer size. a. efx->rxq_entries - This is global value of rx queue size. b. rx_queue->ptr_mask - used for access ring buffer as circular ring. - roundup_pow_of_two(efx->rxq_entries) - 1 c. rx_queue->max_fill - efx->rxq_entries - EFX_RXD_HEAD_ROOM These all values should be based on ring buffer size consistently. But ptp channel's values are not. a. efx->rxq_entries - This is global(for sfc) value, always new ring buffer size. b. rx_queue->ptr_mask - This is always 1023(default). c. rx_queue->max_fill - This is new ring buffer size - EFX_RXD_HEAD_ROOM. Let's assume we set 4096 for rx ring buffer, normal channel ptp channel efx->rxq_entries 4096 4096 rx_queue->ptr_mask 4095 1023 rx_queue->max_fill 4086 4086 sfc driver allocates rx ring buffers based on these values. When it allocates ptp channel's ring buffer, 4086 ring buffers are allocated then, these buffers are attached to the allocated array. But ptp channel's ring buffer array size is still 1024(default) and ptr_mask is still 1023 too. So, 3062 ring buffers will be overwritten to the array. This is the reason for memory leak. Test commands: ethtool -G rx 4096 while : do ip link set up ip link set down done In order to avoid this problem, it adds ->copy callback to ptp channel type. So that rx_queue->ptr_mask value will be updated correctly. Fixes: 7c236c43b838 ("sfc: Add support for IEEE-1588 PTP") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 7 ++++++- drivers/net/ethernet/sfc/ptp.c | 14 +++++++++++++- drivers/net/ethernet/sfc/ptp.h | 1 + 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index 377df8b7f0159..40df910aa1401 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -867,7 +867,9 @@ static void efx_set_xdp_channels(struct efx_nic *efx) int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) { - struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; + struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel, + *ptp_channel = efx_ptp_channel(efx); + struct efx_ptp_data *ptp_data = efx->ptp_data; unsigned int i, next_buffer_table = 0; u32 old_rxq_entries, old_txq_entries; int rc, rc2; @@ -938,6 +940,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) efx_set_xdp_channels(efx); out: + efx->ptp_data = NULL; /* Destroy unused channel structures */ for (i = 0; i < efx->n_channels; i++) { channel = other_channel[i]; @@ -948,6 +951,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) } } + efx->ptp_data = ptp_data; rc2 = efx_soft_enable_interrupts(efx); if (rc2) { rc = rc ? rc : rc2; @@ -966,6 +970,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) efx->txq_entries = old_txq_entries; for (i = 0; i < efx->n_channels; i++) swap(efx->channel[i], other_channel[i]); + efx_ptp_update_channel(efx, ptp_channel); goto out; } diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index f0ef515e2ade5..4625f85acab2e 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -45,6 +45,7 @@ #include "farch_regs.h" #include "tx.h" #include "nic.h" /* indirectly includes ptp.h */ +#include "efx_channels.h" /* Maximum number of events expected to make up a PTP event */ #define MAX_EVENT_FRAGS 3 @@ -541,6 +542,12 @@ struct efx_channel *efx_ptp_channel(struct efx_nic *efx) return efx->ptp_data ? efx->ptp_data->channel : NULL; } +void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel) +{ + if (efx->ptp_data) + efx->ptp_data->channel = channel; +} + static u32 last_sync_timestamp_major(struct efx_nic *efx) { struct efx_channel *channel = efx_ptp_channel(efx); @@ -1443,6 +1450,11 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) int rc = 0; unsigned int pos; + if (efx->ptp_data) { + efx->ptp_data->channel = channel; + return 0; + } + ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL); efx->ptp_data = ptp; if (!efx->ptp_data) @@ -2176,7 +2188,7 @@ static const struct efx_channel_type efx_ptp_channel_type = { .pre_probe = efx_ptp_probe_channel, .post_remove = efx_ptp_remove_channel, .get_name = efx_ptp_get_channel_name, - /* no copy operation; there is no need to reallocate this channel */ + .copy = efx_copy_channel, .receive_skb = efx_ptp_rx, .want_txqs = efx_ptp_want_txqs, .keep_eventq = false, diff --git a/drivers/net/ethernet/sfc/ptp.h b/drivers/net/ethernet/sfc/ptp.h index 9855e8c9e544d..7b1ef7002b3f0 100644 --- a/drivers/net/ethernet/sfc/ptp.h +++ b/drivers/net/ethernet/sfc/ptp.h @@ -16,6 +16,7 @@ struct ethtool_ts_info; int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel); void efx_ptp_defer_probe_with_channel(struct efx_nic *efx); struct efx_channel *efx_ptp_channel(struct efx_nic *efx); +void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel); void efx_ptp_remove(struct efx_nic *efx); int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr); int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr); From cf3ab8d4a797960b4be20565abb3bcd227b18a68 Mon Sep 17 00:00:00 2001 From: Lina Wang Date: Thu, 5 May 2022 13:48:49 +0800 Subject: [PATCH 290/491] net: fix wrong network header length When clatd starts with ebpf offloaing, and NETIF_F_GRO_FRAGLIST is enable, several skbs are gathered in skb_shinfo(skb)->frag_list. The first skb's ipv6 header will be changed to ipv4 after bpf_skb_proto_6_to_4, network_header\transport_header\mac_header have been updated as ipv4 acts, but other skbs in frag_list didnot update anything, just ipv6 packets. udp_queue_rcv_skb will call skb_segment_list to traverse other skbs in frag_list and make sure right udp payload is delivered to user space. Unfortunately, other skbs in frag_list who are still ipv6 packets are updated like the first skb and will have wrong transport header length. e.g.before bpf_skb_proto_6_to_4,the first skb and other skbs in frag_list has the same network_header(24)& transport_header(64), after bpf_skb_proto_6_to_4, ipv6 protocol has been changed to ipv4, the first skb's network_header is 44,transport_header is 64, other skbs in frag_list didnot change.After skb_segment_list, the other skbs in frag_list has different network_header(24) and transport_header(44), so there will be 20 bytes different from original,that is difference between ipv6 header and ipv4 header. Just change transport_header to be the same with original. Actually, there are two solutions to fix it, one is traversing all skbs and changing every skb header in bpf_skb_proto_6_to_4, the other is modifying frag_list skb's header in skb_segment_list. Considering efficiency, adopt the second one--- when the first skb and other skbs in frag_list has different network_header length, restore them to make sure right udp payload is delivered to user space. Signed-off-by: Lina Wang Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 30b523fa4ad2e..c90c74de90d5a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3897,7 +3897,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int delta_len = 0; struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; - int err; + int len_diff, err; skb_push(skb, -skb_network_offset(skb) + offset); @@ -3937,9 +3937,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_push(nskb, -skb_network_offset(nskb) + offset); skb_release_head_state(nskb); + len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb); __copy_skb_header(nskb, skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); + nskb->transport_header += len_diff; skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, offset + tnl_hlen); From edae34a3ed9293b5077dddf9e51a3d86c95dc76a Mon Sep 17 00:00:00 2001 From: Lina Wang Date: Thu, 5 May 2022 13:48:50 +0800 Subject: [PATCH 291/491] selftests net: add UDP GRO fraglist + bpf self-tests When NET_F_F_GRO_FRAGLIST is enabled and bpf_skb_change_proto is used, check if udp packets and tcp packets are successfully delivered to user space. If wrong udp packets are delivered, udpgso_bench_rx will exit with "Initial byte out of range" Signed-off-by: Maciej enczykowski Signed-off-by: Lina Wang Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 3 + tools/testing/selftests/net/bpf/Makefile | 14 + tools/testing/selftests/net/bpf/nat6to4.c | 285 ++++++++++++++++++ tools/testing/selftests/net/udpgro_frglist.sh | 101 +++++++ 4 files changed, 403 insertions(+) create mode 100644 tools/testing/selftests/net/bpf/Makefile create mode 100644 tools/testing/selftests/net/bpf/nat6to4.c create mode 100755 tools/testing/selftests/net/udpgro_frglist.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 0f2ebc38d8934..e1f998defd107 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -25,6 +25,7 @@ TEST_PROGS += bareudp.sh TEST_PROGS += amt.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh +TEST_PROGS += udpgro_frglist.sh TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh @@ -61,6 +62,8 @@ TEST_FILES := settings KSFT_KHDR_INSTALL := 1 include ../lib.mk +include bpf/Makefile + $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread $(OUTPUT)/tcp_inq: LDLIBS += -lpthread diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile new file mode 100644 index 0000000000000..f91bf14bbee7b --- /dev/null +++ b/tools/testing/selftests/net/bpf/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +CLANG ?= clang +CCINCLUDE += -I../../bpf +CCINCLUDE += -I../../../../../usr/include/ + +TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o +all: $(TEST_CUSTOM_PROGS) + +$(OUTPUT)/%.o: %.c + $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@ + +clean: + rm -f $(TEST_CUSTOM_PROGS) diff --git a/tools/testing/selftests/net/bpf/nat6to4.c b/tools/testing/selftests/net/bpf/nat6to4.c new file mode 100644 index 0000000000000..ac54c36b25fc8 --- /dev/null +++ b/tools/testing/selftests/net/bpf/nat6to4.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This code is taken from the Android Open Source Project and the author + * (Maciej Żenczykowski) has gave permission to relicense it under the + * GPLv2. Therefore this program is free software; + * You can redistribute it and/or modify it under the terms of the GNU + * General Public License version 2 as published by the Free Software + * Foundation + + * The original headers, including the original license headers, are + * included below for completeness. + * + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include + +#include +#include + +#define IP_DF 0x4000 // Flag: "Don't Fragment" + +SEC("schedcls/ingress6/nat_6") +int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb) +{ + const int l2_header_size = sizeof(struct ethhdr); + void *data = (void *)(long)skb->data; + const void *data_end = (void *)(long)skb->data_end; + const struct ethhdr * const eth = data; // used iff is_ethernet + const struct ipv6hdr * const ip6 = (void *)(eth + 1); + + // Require ethernet dst mac address to be our unicast address. + if (skb->pkt_type != PACKET_HOST) + return TC_ACT_OK; + + // Must be meta-ethernet IPv6 frame + if (skb->protocol != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + // Must have (ethernet and) ipv6 header + if (data + l2_header_size + sizeof(*ip6) > data_end) + return TC_ACT_OK; + + // Ethertype - if present - must be IPv6 + if (eth->h_proto != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + // IP version must be 6 + if (ip6->version != 6) + return TC_ACT_OK; + // Maximum IPv6 payload length that can be translated to IPv4 + if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr)) + return TC_ACT_OK; + switch (ip6->nexthdr) { + case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 + case IPPROTO_UDP: // address means there is no need to update their checksums. + case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers, + case IPPROTO_ESP: // since there is never a checksum to update. + break; + default: // do not know how to handle anything else + return TC_ACT_OK; + } + + struct ethhdr eth2; // used iff is_ethernet + + eth2 = *eth; // Copy over the ethernet header (src/dst mac) + eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype + + struct iphdr ip = { + .version = 4, // u4 + .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4 + .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8 + .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16 + .id = 0, // u16 + .frag_off = bpf_htons(IP_DF), // u16 + .ttl = ip6->hop_limit, // u8 + .protocol = ip6->nexthdr, // u8 + .check = 0, // u16 + .saddr = 0x0201a8c0, // u32 + .daddr = 0x0101a8c0, // u32 + }; + + // Calculate the IPv4 one's complement checksum of the IPv4 header. + __wsum sum4 = 0; + + for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i) + sum4 += ((__u16 *)&ip)[i]; + + // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4 + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 + ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF + + // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header. + __wsum sum6 = 0; + // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits) + for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i) + sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation + + // Note that there is no L4 checksum update: we are relying on the checksum neutrality + // of the ipv6 address chosen by netd's ClatdController. + + // Packet mutations begin - point of no return, but if this first modification fails + // the packet is probably still pristine, so let clatd handle it. + if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0)) + return TC_ACT_OK; + bpf_csum_update(skb, sum6); + + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + if (data + l2_header_size + sizeof(struct iphdr) > data_end) + return TC_ACT_SHOT; + + struct ethhdr *new_eth = data; + + // Copy over the updated ethernet header + *new_eth = eth2; + + // Copy over the new ipv4 header. + *(struct iphdr *)(new_eth + 1) = ip; + return bpf_redirect(skb->ifindex, BPF_F_INGRESS); +} + +SEC("schedcls/egress4/snat4") +int sched_cls_egress4_snat4_prog(struct __sk_buff *skb) +{ + const int l2_header_size = sizeof(struct ethhdr); + void *data = (void *)(long)skb->data; + const void *data_end = (void *)(long)skb->data_end; + const struct ethhdr *const eth = data; // used iff is_ethernet + const struct iphdr *const ip4 = (void *)(eth + 1); + + // Must be meta-ethernet IPv4 frame + if (skb->protocol != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + // Must have ipv4 header + if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end) + return TC_ACT_OK; + + // Ethertype - if present - must be IPv4 + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + // IP version must be 4 + if (ip4->version != 4) + return TC_ACT_OK; + + // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header + if (ip4->ihl != 5) + return TC_ACT_OK; + + // Maximum IPv6 payload length that can be translated to IPv4 + if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr)) + return TC_ACT_OK; + + // Calculate the IPv4 one's complement checksum of the IPv4 header. + __wsum sum4 = 0; + + for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i) + sum4 += ((__u16 *)ip4)[i]; + + // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4 + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 + // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF + if (sum4 != 0xFFFF) + return TC_ACT_OK; + + // Minimum IPv4 total length is the size of the header + if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4)) + return TC_ACT_OK; + + // We are incapable of dealing with IPv4 fragments + if (ip4->frag_off & ~bpf_htons(IP_DF)) + return TC_ACT_OK; + + switch (ip4->protocol) { + case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 + case IPPROTO_GRE: // address means there is no need to update their checksums. + case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers, + break; // since there is never a checksum to update. + + case IPPROTO_UDP: // See above comment, but must also have UDP header... + if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end) + return TC_ACT_OK; + const struct udphdr *uh = (const struct udphdr *)(ip4 + 1); + // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the + // checksum. Otherwise the network or more likely the NAT64 gateway might + // drop the packet because in most cases IPv6/UDP packets with a zero checksum + // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff() + if (!uh->check) + return TC_ACT_OK; + break; + + default: // do not know how to handle anything else + return TC_ACT_OK; + } + struct ethhdr eth2; // used iff is_ethernet + + eth2 = *eth; // Copy over the ethernet header (src/dst mac) + eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype + + struct ipv6hdr ip6 = { + .version = 6, // __u8:4 + .priority = ip4->tos >> 4, // __u8:4 + .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3] + .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16 + .nexthdr = ip4->protocol, // __u8 + .hop_limit = ip4->ttl, // __u8 + }; + ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); + ip6.saddr.in6_u.u6_addr32[1] = 0; + ip6.saddr.in6_u.u6_addr32[2] = 0; + ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1); + ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); + ip6.daddr.in6_u.u6_addr32[1] = 0; + ip6.daddr.in6_u.u6_addr32[2] = 0; + ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2); + + // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header. + __wsum sum6 = 0; + // We'll end up with a non-zero sum due to ip6.version == 6 + for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i) + sum6 += ((__u16 *)&ip6)[i]; + + // Packet mutations begin - point of no return, but if this first modification fails + // the packet is probably still pristine, so let clatd handle it. + if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0)) + return TC_ACT_OK; + + // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet. + // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload, + // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum. + // However, we've already verified the ipv4 checksum is correct and thus 0. + // Thus we only need to add the ipv6 header's sum. + // + // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error + // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details). + bpf_csum_update(skb, sum6); + + // bpf_skb_change_proto() invalidates all pointers - reload them. + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + + // I cannot think of any valid way for this error condition to trigger, however I do + // believe the explicit check is required to keep the in kernel ebpf verifier happy. + if (data + l2_header_size + sizeof(ip6) > data_end) + return TC_ACT_SHOT; + + struct ethhdr *new_eth = data; + + // Copy over the updated ethernet header + *new_eth = eth2; + // Copy over the new ipv4 header. + *(struct ipv6hdr *)(new_eth + 1) = ip6; + return TC_ACT_OK; +} + +char _license[] SEC("license") = ("GPL"); diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh new file mode 100755 index 0000000000000..807b74c8fd80f --- /dev/null +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -0,0 +1,101 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgro benchmarks + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" + +cleanup() { + local -r jobs="$(jobs -p)" + local -r ns="$(ip netns list|grep $PEER_NS)" + + [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null + [ -n "$ns" ] && ip netns del $ns 2>/dev/null +} +trap cleanup EXIT + +run_one() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local rx_args=${all#*rx} + + + + ip netns add "${PEER_NS}" + ip -netns "${PEER_NS}" link set lo up + ip link add type veth + ip link set dev veth0 up + ip addr add dev veth0 192.168.1.2/24 + ip addr add dev veth0 2001:db8::2/64 nodad + + ip link set dev veth1 netns "${PEER_NS}" + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad + ip -netns "${PEER_NS}" link set dev veth1 up + ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on + + + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + tc -n "${PEER_NS}" qdisc add dev veth1 clsact + tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action + tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action + echo ${rx_args} + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} +} + +run_in_netns() { + local -r args=$@ + echo ${args} + ./in_netns.sh $0 __subprocess ${args} +} + +run_udp() { + local -r args=$@ + + echo "udp gso - over veth touching data" + run_in_netns ${args} -u -S 0 rx -4 -v + + echo "udp gso and gro - over veth touching data" + run_in_netns ${args} -S 0 rx -4 -G +} + +run_tcp() { + local -r args=$@ + + echo "tcp - over veth touching data" + run_in_netns ${args} -t rx -4 -t +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 192.168.1.1" + local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + + echo "ipv6" + run_tcp "${ipv6_args}" + run_udp "${ipv6_args}" +} + +if [ ! -f ../bpf/xdp_dummy.o ]; then + echo "Missing xdp_dummy helper. Build bpf selftest first" + exit -1 +fi + +if [ ! -f bpf/nat6to4.o ]; then + echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first" + exit -1 +fi + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +else + run_in_netns $@ +fi From ceaf69f8eadcafb323392be88e7a5248c415d423 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 7 May 2022 11:00:28 +0300 Subject: [PATCH 292/491] fanotify: do not allow setting dirent events in mask of non-dir Dirent events (create/delete/move) are only reported on watched directory inodes, but in fanotify as well as in legacy inotify, it was always allowed to set them on non-dir inode, which does not result in any meaningful outcome. Until kernel v5.17, dirent events in fanotify also differed from events "on child" (e.g. FAN_OPEN) in the information provided in the event. For example, FAN_OPEN could be set in the mask of a non-dir or the mask of its parent and event would report the fid of the child regardless of the marked object. By contrast, FAN_DELETE is not reported if the child is marked and the child fid was not reported in the events. Since kernel v5.17, with fanotify group flag FAN_REPORT_TARGET_FID, the fid of the child is reported with dirent events, like events "on child", which may create confusion for users expecting the same behavior as events "on child" when setting events in the mask on a child. The desired semantics of setting dirent events in the mask of a child are not clear, so for now, deny this action for a group initialized with flag FAN_REPORT_TARGET_FID and for the new event FAN_RENAME. We may relax this restriction in the future if we decide on the semantics and implement them. Fixes: d61fd650e9d2 ("fanotify: introduce group flag FAN_REPORT_TARGET_FID") Fixes: 8cc3b1ccd930 ("fanotify: wire up FAN_RENAME event") Link: https://lore.kernel.org/linux-fsdevel/20220505133057.zm5t6vumc4xdcnsg@quack3.lan/ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20220507080028.219826-1-amir73il@gmail.com --- fs/notify/fanotify/fanotify_user.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9b32b76a9c303..a792e21c53099 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1657,6 +1657,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, else mnt = path.mnt; + /* + * FAN_RENAME is not allowed on non-dir (for now). + * We shouldn't have allowed setting any dirent events in mask of + * non-dir, but because we always allowed it, error only if group + * was initialized with the new flag FAN_REPORT_TARGET_FID. + */ + ret = -ENOTDIR; + if (inode && !S_ISDIR(inode->i_mode) && + ((mask & FAN_RENAME) || + ((mask & FANOTIFY_DIRENT_EVENTS) && + FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID)))) + goto path_put_and_out; + /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ if (mnt || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; From c7e34c1e263f71b2dd78a12b6b7259a89b3a1f44 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Fri, 6 May 2022 11:42:12 +0300 Subject: [PATCH 293/491] mailmap: update Kalle Valo's email MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I switched to use my kernel.org address, the old kvalo@codeaurora.org address doesn't work anymore. Signed-off-by: Kalle Valo Tested-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220506084212.8952-1-kvalo@kernel.org --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index b9d3582175864..2c05d72575098 100644 --- a/.mailmap +++ b/.mailmap @@ -204,6 +204,7 @@ Juha Yrjola Juha Yrjola Juha Yrjola Julien Thierry +Kalle Valo Kalyan Thota Kay Sievers Kees Cook From a59d55568d02bbbdf9c0cc15be9580180f855b4f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 May 2022 23:04:21 +0200 Subject: [PATCH 294/491] mac80211_hwsim: fix RCU protected chanctx access We need to RCU protect the chanctx_conf access, so do that. Fixes: 585625c955b1 ("mac80211_hwsim: check TX and STA bandwidth") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20220505230421.fb8055c081a2.Ic6da3307c77a909bd61a0ea25dc2a4b08fe1b03f@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 28bfa7b7b73c0..3ac3693dbecb6 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2202,11 +2202,14 @@ mac80211_hwsim_sta_rc_update(struct ieee80211_hw *hw, if (!data->use_chanctx) { confbw = data->bw; } else { - struct ieee80211_chanctx_conf *chanctx_conf = - rcu_dereference(vif->chanctx_conf); + struct ieee80211_chanctx_conf *chanctx_conf; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(vif->chanctx_conf); if (!WARN_ON(!chanctx_conf)) confbw = chanctx_conf->def.width; + rcu_read_unlock(); } WARN(bw > hwsim_get_chanwidth(confbw), From 9e2db50f1ef2238fc2f71c5de1c0418b7a5b0ea2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 May 2022 23:04:22 +0200 Subject: [PATCH 295/491] mac80211_hwsim: call ieee80211_tx_prepare_skb under RCU protection This is needed since it might use (and pass out) pointers to e.g. keys protected by RCU. Can't really happen here as the frames aren't encrypted, but we need to still adhere to the rules. Fixes: cacfddf82baf ("mac80211_hwsim: initialize ieee80211_tx_info at hw_scan_work") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20220505230421.5f139f9de173.I77ae111a28f7c0e9fd1ebcee7f39dbec5c606770@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 3ac3693dbecb6..e9ec63e0e395b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2478,11 +2478,13 @@ static void hw_scan_work(struct work_struct *work) if (req->ie_len) skb_put_data(probe, req->ie, req->ie_len); + rcu_read_lock(); if (!ieee80211_tx_prepare_skb(hwsim->hw, hwsim->hw_scan_vif, probe, hwsim->tmp_chan->band, NULL)) { + rcu_read_unlock(); kfree_skb(probe); continue; } @@ -2490,6 +2492,7 @@ static void hw_scan_work(struct work_struct *work) local_bh_disable(); mac80211_hwsim_tx_frame(hwsim->hw, probe, hwsim->tmp_chan); + rcu_read_unlock(); local_bh_enable(); } } From f971e1887fdb3ab500c9bebf4b98f62d49a20655 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 May 2022 10:21:38 +0200 Subject: [PATCH 296/491] nl80211: fix locking in nl80211_set_tx_bitrate_mask() This accesses the wdev's chandef etc., so cannot safely be used without holding the lock. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20220506102136.06b7205419e6.I2a87c05fbd8bc5e565e84d190d4cfd2e92695a90@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index aa6094c3c9b00..1a3551b6d18bb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11666,18 +11666,23 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct cfg80211_bitrate_mask mask; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; + wdev_lock(wdev); err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, dev, true); if (err) - return err; + goto out; - return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask); +out: + wdev_unlock(wdev); + return err; } static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) From a36e07dfe6ee71e209383ea9288cd8d1617e14f9 Mon Sep 17 00:00:00 2001 From: Gleb Fotengauer-Malinovskiy Date: Fri, 6 May 2022 17:24:54 +0000 Subject: [PATCH 297/491] rfkill: uapi: fix RFKILL_IOCTL_MAX_SIZE ioctl request definition The definition of RFKILL_IOCTL_MAX_SIZE introduced by commit 54f586a91532 ("rfkill: make new event layout opt-in") is unusable since it is based on RFKILL_IOC_EXT_SIZE which has not been defined. Fix that by replacing the undefined constant with the constant which is intended to be used in this definition. Fixes: 54f586a91532 ("rfkill: make new event layout opt-in") Cc: stable@vger.kernel.org # 5.11+ Signed-off-by: Gleb Fotengauer-Malinovskiy Signed-off-by: Dmitry V. Levin Link: https://lore.kernel.org/r/20220506172454.120319-1-glebfm@altlinux.org [add commit message provided later by Dmitry] Signed-off-by: Johannes Berg --- include/uapi/linux/rfkill.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h index 283c5a7b3f2c8..db6c8588c1d0c 100644 --- a/include/uapi/linux/rfkill.h +++ b/include/uapi/linux/rfkill.h @@ -184,7 +184,7 @@ struct rfkill_event_ext { #define RFKILL_IOC_NOINPUT 1 #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) #define RFKILL_IOC_MAX_SIZE 2 -#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32) +#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_MAX_SIZE, __u32) /* and that's all userspace gets */ From fe503887eed6ea528e144ec8dacfa1d47aa701ac Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 29 Apr 2022 17:49:17 +0100 Subject: [PATCH 298/491] slimbus: qcom: Fix IRQ check in qcom_slim_probe platform_get_irq() returns non-zero IRQ number on success, negative error number on failure. And the doc of platform_get_irq() provides a usage example: int irq = platform_get_irq(pdev, 0); if (irq < 0) return irq; Fix the check of return value to catch errors correctly. Fixes: ad7fcbc308b0 ("slimbus: qcom: Add Qualcomm Slimbus controller driver") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220429164917.5202-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ctrl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c index f04b961b96cd4..ec58091fc948a 100644 --- a/drivers/slimbus/qcom-ctrl.c +++ b/drivers/slimbus/qcom-ctrl.c @@ -510,9 +510,9 @@ static int qcom_slim_probe(struct platform_device *pdev) } ctrl->irq = platform_get_irq(pdev, 0); - if (!ctrl->irq) { + if (ctrl->irq < 0) { dev_err(&pdev->dev, "no slimbus IRQ\n"); - return -ENODEV; + return ctrl->irq; } sctrl = &ctrl->ctrl; From 085d16d5f949b64713d5e960d6c9bbf51bc1d511 Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Sun, 8 May 2022 15:54:50 +0300 Subject: [PATCH 299/491] nfs: fix broken handling of the softreval mount option Turns out that ever since this mount option was added, passing `softreval` in NFS mount options cancelled all other flags while not affecting the underlying flag `NFS_MOUNT_SOFTREVAL`. Fixes: c74dfe97c104 ("NFS: Add mount option 'softreval'") Signed-off-by: Dan Aloni Signed-off-by: Trond Myklebust --- fs/nfs/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index e2d59bb5e6bbe..9a16897e8dc6b 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -517,7 +517,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, if (result.negated) ctx->flags &= ~NFS_MOUNT_SOFTREVAL; else - ctx->flags &= NFS_MOUNT_SOFTREVAL; + ctx->flags |= NFS_MOUNT_SOFTREVAL; break; case Opt_posix: if (result.negated) From e4b1045bf9cfec6f70ac6d3783be06c3a88dcb25 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 6 May 2022 11:40:40 +0800 Subject: [PATCH 300/491] ionic: fix missing pci_release_regions() on error in ionic_probe() If ionic_map_bars() fails, pci_release_regions() need be called. Fixes: fbfb8031533c ("ionic: Add hardware init and device commands") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220506034040.2614129-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 6ffc62c411655..0a7a757494bc5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -256,7 +256,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = ionic_map_bars(ionic); if (err) - goto err_out_pci_disable_device; + goto err_out_pci_release_regions; /* Configure the device */ err = ionic_setup(ionic); @@ -360,6 +360,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_unmap_bars: ionic_unmap_bars(ionic); +err_out_pci_release_regions: pci_release_regions(pdev); err_out_pci_disable_device: pci_disable_device(pdev); From 51ca86b4c9c7c75f5630fa0dbe5f8f0bd98e3c3e Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 6 May 2022 17:42:50 +0800 Subject: [PATCH 301/491] ethernet: tulip: fix missing pci_disable_device() on error in tulip_init_one() Fix the missing pci_disable_device() before return from tulip_init_one() in the error handling case. Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220506094250.3630615-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dec/tulip/tulip_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 79df5a72877b8..0040dcaab9455 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1399,8 +1399,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* alloc_etherdev ensures aligned and zeroed private structures */ dev = alloc_etherdev (sizeof (*tp)); - if (!dev) + if (!dev) { + pci_disable_device(pdev); return -ENOMEM; + } SET_NETDEV_DEV(dev, &pdev->dev); if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) { @@ -1785,6 +1787,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_free_netdev: free_netdev (dev); + pci_disable_device(pdev); return -ENODEV; } From 4bd46bb037f8e1883dbe1fc9e79896b7f885db3f Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Fri, 6 May 2022 15:37:39 -0700 Subject: [PATCH 302/491] ptp: ocp: Use DIV64_U64_ROUND_UP for rounding. The initial code used roundup() to round the starting time to a multiple of a period. This generated an error on 32-bit systems, so was replaced with DIV_ROUND_UP_ULL(). However, this truncates to 32-bits on a 64-bit system. Replace with DIV64_U64_ROUND_UP() instead. Fixes: b325af3cfab9 ("ptp: ocp: Add signal generators and update sysfs nodes") Signed-off-by: Jonathan Lemon Link: https://lore.kernel.org/r/20220506223739.1930-2-jonathan.lemon@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 0feaa4b453175..dd45471f67808 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -1557,7 +1557,7 @@ ptp_ocp_signal_set(struct ptp_ocp *bp, int gen, struct ptp_ocp_signal *s) start_ns = ktime_set(ts.tv_sec, ts.tv_nsec) + NSEC_PER_MSEC; if (!s->start) { /* roundup() does not work on 32-bit systems */ - s->start = DIV_ROUND_UP_ULL(start_ns, s->period); + s->start = DIV64_U64_ROUND_UP(start_ns, s->period); s->start = ktime_add(s->start, s->phase); } From ee1444b5e1df4155b591d0d9b1e72853a99ea861 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Fri, 6 May 2022 18:10:38 -0700 Subject: [PATCH 303/491] dim: initialize all struct fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The W=2 build pointed out that the code wasn't initializing all the variables in the dim_cq_moder declarations with the struct initializers. The net change here is zero since these structs were already static const globals and were initialized with zeros by the compiler, but removing compiler warnings has value in and of itself. lib/dim/net_dim.c: At top level: lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] 54 | NET_DIM_RX_EQE_PROFILES, | ^~~~~~~~~~~~~~~~~~~~~~~ In file included from lib/dim/net_dim.c:6: ./include/linux/dim.h:45:13: note: ‘comps’ declared here 45 | u16 comps; | ^~~~~ and repeats for the tx struct, and once you fix the comps entry then the cq_period_mode field needs the same treatment. Use the commonly accepted style to indicate to the compiler that we know what we're doing, and add a comma at the end of each struct initializer to clean up the issue, and use explicit initializers for the fields we are initializing which makes the compiler happy. While here and fixing these lines, clean up the code slightly with a fix for the super long lines by removing the word "_MODERATION" from a couple defines only used in this file. Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") Signed-off-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski --- lib/dim/net_dim.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index 06811d866775c..53f6b9c6e9366 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -12,41 +12,41 @@ * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES */ #define NET_DIM_PARAMS_NUM_PROFILES 5 -#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 -#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 +#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128 #define NET_DIM_DEF_PROFILE_CQE 1 #define NET_DIM_DEF_PROFILE_EQE 1 #define NET_DIM_RX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \ } #define NET_DIM_RX_CQE_PROFILES { \ - {2, 256}, \ - {8, 128}, \ - {16, 64}, \ - {32, 64}, \ - {64, 64} \ + {.usec = 2, .pkts = 256,}, \ + {.usec = 8, .pkts = 128,}, \ + {.usec = 16, .pkts = 64,}, \ + {.usec = 32, .pkts = 64,}, \ + {.usec = 64, .pkts = 64,} \ } #define NET_DIM_TX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ + {.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \ } #define NET_DIM_TX_CQE_PROFILES { \ - {5, 128}, \ - {8, 64}, \ - {16, 32}, \ - {32, 32}, \ - {64, 32} \ + {.usec = 5, .pkts = 128,}, \ + {.usec = 8, .pkts = 64,}, \ + {.usec = 16, .pkts = 32,}, \ + {.usec = 32, .pkts = 32,}, \ + {.usec = 64, .pkts = 32,} \ } static const struct dim_cq_moder From 151d6dcbed836270c6c240932da66f147950cbdb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 May 2022 16:47:40 -0700 Subject: [PATCH 304/491] hwmon: (ltq-cputemp) restrict it to SOC_XWAY Building with SENSORS_LTQ_CPUTEMP=y with SOC_FALCON=y causes build errors since FALCON does not support the same features as XWAY. Change this symbol to depend on SOC_XWAY since that provides the necessary interfaces. Repairs these build errors: ../drivers/hwmon/ltq-cputemp.c: In function 'ltq_cputemp_enable': ../drivers/hwmon/ltq-cputemp.c:23:9: error: implicit declaration of function 'ltq_cgu_w32'; did you mean 'ltq_ebu_w32'? [-Werror=implicit-function-declaration] 23 | ltq_cgu_w32(ltq_cgu_r32(CGU_GPHY1_CR) | CGU_TEMP_PD, CGU_GPHY1_CR); ../drivers/hwmon/ltq-cputemp.c:23:21: error: implicit declaration of function 'ltq_cgu_r32'; did you mean 'ltq_ebu_r32'? [-Werror=implicit-function-declaration] 23 | ltq_cgu_w32(ltq_cgu_r32(CGU_GPHY1_CR) | CGU_TEMP_PD, CGU_GPHY1_CR); ../drivers/hwmon/ltq-cputemp.c: In function 'ltq_cputemp_probe': ../drivers/hwmon/ltq-cputemp.c:92:31: error: 'SOC_TYPE_VR9_2' undeclared (first use in this function) 92 | if (ltq_soc_type() != SOC_TYPE_VR9_2) Fixes: 7074d0a92758 ("hwmon: (ltq-cputemp) add cpu temp sensor driver") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Florian Eckert Cc: Guenter Roeck Cc: Jean Delvare Cc: linux-hwmon@vger.kernel.org Link: https://lore.kernel.org/r/20220509234740.26841-1-rdunlap@infradead.org Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 68a8a27ab3b70..f2b038fa3b84c 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -960,7 +960,7 @@ config SENSORS_LTC4261 config SENSORS_LTQ_CPUTEMP bool "Lantiq cpu temperature sensor driver" - depends on LANTIQ + depends on SOC_XWAY help If you say yes here you get support for the temperature sensor inside your CPU. From 7d1e6496616275f3830e2f2f91fa69a66953e95b Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Mon, 9 May 2022 17:34:28 -0700 Subject: [PATCH 305/491] mm: mremap: fix sign for EFAULT error return value The mremap syscall is supposed to return a pointer to the new virtual memory area on success, and a negative value of the error code in case of failure. Currently, EFAULT is returned when the VMA is not found, instead of -EFAULT. The users of this syscall will therefore believe the syscall succeeded in case the VMA didn't exist, as it returns a pointer to address 0xe (0xe being the value of EFAULT). Fix the sign of the error value. Link: https://lkml.kernel.org/r/20220427224439.23828-2-dossche.niels@gmail.com Fixes: 550a7d60bd5e ("mm, hugepages: add mremap() support for hugepage backed vma") Signed-off-by: Niels Dossche Cc: Mina Almasry Cc: Mike Kravetz Cc: Signed-off-by: Andrew Morton --- mm/mremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index 303d3290b9386..0b93fac768511 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -947,7 +947,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, return -EINTR; vma = vma_lookup(mm, addr); if (!vma) { - ret = EFAULT; + ret = -EFAULT; goto out; } From 1927e498aee1757b3df755a194cbfc5cc0f2b663 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 9 May 2022 17:34:28 -0700 Subject: [PATCH 306/491] procfs: prevent unprivileged processes accessing fdinfo dir The file permissions on the fdinfo dir from were changed from S_IRUSR|S_IXUSR to S_IRUGO|S_IXUGO, and a PTRACE_MODE_READ check was added for opening the fdinfo files [1]. However, the ptrace permission check was not added to the directory, allowing anyone to get the open FD numbers by reading the fdinfo directory. Add the missing ptrace permission check for opening the fdinfo directory. [1] https://lkml.kernel.org/r/20210308170651.919148-1-kaleshsingh@google.com Link: https://lkml.kernel.org/r/20210713162008.1056986-1-kaleshsingh@google.com Fixes: 7bc3fa0172a4 ("procfs: allow reading fdinfo with PTRACE_MODE_READ") Signed-off-by: Kalesh Singh Cc: Kees Cook Cc: Eric W. Biederman Cc: Christian Brauner Cc: Suren Baghdasaryan Cc: Hridya Valsaraju Cc: Jann Horn Signed-off-by: Andrew Morton --- fs/proc/fd.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 172c86270b312..913bef0d2a36c 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -72,7 +72,7 @@ static int seq_show(struct seq_file *m, void *v) return 0; } -static int seq_fdinfo_open(struct inode *inode, struct file *file) +static int proc_fdinfo_access_allowed(struct inode *inode) { bool allowed = false; struct task_struct *task = get_proc_task(inode); @@ -86,6 +86,16 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file) if (!allowed) return -EACCES; + return 0; +} + +static int seq_fdinfo_open(struct inode *inode, struct file *file) +{ + int ret = proc_fdinfo_access_allowed(inode); + + if (ret) + return ret; + return single_open(file, seq_show, inode); } @@ -348,12 +358,23 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx) proc_fdinfo_instantiate); } +static int proc_open_fdinfo(struct inode *inode, struct file *file) +{ + int ret = proc_fdinfo_access_allowed(inode); + + if (ret) + return ret; + + return 0; +} + const struct inode_operations proc_fdinfo_inode_operations = { .lookup = proc_lookupfdinfo, .setattr = proc_setattr, }; const struct file_operations proc_fdinfo_operations = { + .open = proc_open_fdinfo, .read = generic_read_dir, .iterate_shared = proc_readfdinfo, .llseek = generic_file_llseek, From 260364d112bc822005224667c0c9b1b17a53eafd Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 9 May 2022 17:34:28 -0700 Subject: [PATCH 307/491] arm[64]/memremap: don't abuse pfn_valid() to ensure presence of linear map The semantics of pfn_valid() is to check presence of the memory map for a PFN and not whether a PFN is covered by the linear map. The memory map may be present for NOMAP memory regions, but they won't be mapped in the linear mapping. Accessing such regions via __va() when they are memremap()'ed will cause a crash. On v5.4.y the crash happens on qemu-arm with UEFI [1]: <1>[ 0.084476] 8<--- cut here --- <1>[ 0.084595] Unable to handle kernel paging request at virtual address dfb76000 <1>[ 0.084938] pgd = (ptrval) <1>[ 0.085038] [dfb76000] *pgd=5f7fe801, *pte=00000000, *ppte=00000000 ... <4>[ 0.093923] [] (memcpy) from [] (dmi_setup+0x60/0x418) <4>[ 0.094204] [] (dmi_setup) from [] (arm_dmi_init+0x8/0x10) <4>[ 0.094408] [] (arm_dmi_init) from [] (do_one_initcall+0x50/0x228) <4>[ 0.094619] [] (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1f8) <4>[ 0.094841] [] (kernel_init_freeable) from [] (kernel_init+0x8/0x10c) <4>[ 0.095057] [] (kernel_init) from [] (ret_from_fork+0x14/0x2c) On kernels v5.10.y and newer the same crash won't reproduce on ARM because commit b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") changed the way memory regions are registered in the resource tree, but that merely covers up the problem. On ARM64 memory resources registered in yet another way and there the issue of wrong usage of pfn_valid() to ensure availability of the linear map is also covered. Implement arch_memremap_can_ram_remap() on ARM and ARM64 to prevent access to NOMAP regions via the linear mapping in memremap(). Link: https://lore.kernel.org/all/Yl65zxGgFzF1Okac@sirena.org.uk Link: https://lkml.kernel.org/r/20220426060107.7618-1-rppt@kernel.org Signed-off-by: Mike Rapoport Reported-by: "kernelci.org bot" Tested-by: Mark Brown Reviewed-by: Ard Biesheuvel Acked-by: Catalin Marinas Cc: Greg Kroah-Hartman Cc: Mark Brown Cc: Mark-PK Tsai Cc: Russell King Cc: Tony Lindgren Cc: Will Deacon Cc: [5.4+] Signed-off-by: Andrew Morton --- arch/arm/include/asm/io.h | 3 +++ arch/arm/mm/ioremap.c | 8 ++++++++ arch/arm64/include/asm/io.h | 4 ++++ arch/arm64/mm/ioremap.c | 8 ++++++++ 4 files changed, 23 insertions(+) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 0c70eb688a00c..2a0739a2350be 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -440,6 +440,9 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range(phys_addr_t addr, size_t size); extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); +extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags); +#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap #endif /* diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index aa08bcb72db93..290702328a337 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -493,3 +493,11 @@ void __init early_ioremap_init(void) { early_ioremap_setup(); } + +bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags) +{ + unsigned long pfn = PHYS_PFN(offset); + + return memblock_is_map_memory(pfn); +} diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 7fd836bea7eb4..3995652daf81a 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -192,4 +192,8 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); extern int valid_phys_addr_range(phys_addr_t addr, size_t size); extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); +extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags); +#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap + #endif /* __ASM_IO_H */ diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b7c81dacabf07..b21f91cd830db 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -99,3 +99,11 @@ void __init early_ioremap_init(void) { early_ioremap_setup(); } + +bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags) +{ + unsigned long pfn = PHYS_PFN(offset); + + return pfn_is_map_memory(pfn); +} From 5fcaa7caf5fa7f3ad9eaba1897f226d520f963cc Mon Sep 17 00:00:00 2001 From: Martyna Szapar-Mudlaw Date: Mon, 9 May 2022 17:34:29 -0700 Subject: [PATCH 308/491] mailmap: add entry for martyna.szapar-mudlaw@intel.com Separate linux.intel.com account was created for submitting and reviewing kernel patches, thus need to map previously used primary Intel e-mail address. Link: https://lkml.kernel.org/r/20220505132624.41802-1-martyna.szapar-mudlaw@linux.intel.com Signed-off-by: Martyna Szapar-Mudlaw Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 93458154ce7d4..36a275ab8c521 100644 --- a/.mailmap +++ b/.mailmap @@ -249,6 +249,7 @@ Mark Yao Martin Kepplinger Martin Kepplinger Martin Kepplinger +Martyna Szapar-Mudlaw Mathieu Othacehe Matthew Wilcox Matthew Wilcox From 2839b0999c20c9f6bf353849c69370e121e2fa1a Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Mon, 9 May 2022 17:34:29 -0700 Subject: [PATCH 309/491] mm/kfence: reset PG_slab and memcg_data before freeing __kfence_pool When kfence fails to initialize kfence pool, it frees the pool. But it does not reset memcg_data and PG_slab flag. Below is a BUG because of this. Let's fix it by resetting memcg_data and PG_slab flag before free. [ 0.089149] BUG: Bad page state in process swapper/0 pfn:3d8e06 [ 0.089149] page:ffffea46cf638180 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x3d8e06 [ 0.089150] memcg:ffffffff94a475d1 [ 0.089150] flags: 0x17ffffc0000200(slab|node=0|zone=2|lastcpupid=0x1fffff) [ 0.089151] raw: 0017ffffc0000200 ffffea46cf638188 ffffea46cf638188 0000000000000000 [ 0.089152] raw: 0000000000000000 0000000000000000 00000000ffffffff ffffffff94a475d1 [ 0.089152] page dumped because: page still charged to cgroup [ 0.089153] Modules linked in: [ 0.089153] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G B W 5.18.0-rc1+ #965 [ 0.089154] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 0.089154] Call Trace: [ 0.089155] [ 0.089155] dump_stack_lvl+0x49/0x5f [ 0.089157] dump_stack+0x10/0x12 [ 0.089158] bad_page.cold+0x63/0x94 [ 0.089159] check_free_page_bad+0x66/0x70 [ 0.089160] __free_pages_ok+0x423/0x530 [ 0.089161] __free_pages_core+0x8e/0xa0 [ 0.089162] memblock_free_pages+0x10/0x12 [ 0.089164] memblock_free_late+0x8f/0xb9 [ 0.089165] kfence_init+0x68/0x92 [ 0.089166] start_kernel+0x789/0x992 [ 0.089167] x86_64_start_reservations+0x24/0x26 [ 0.089168] x86_64_start_kernel+0xa9/0xaf [ 0.089170] secondary_startup_64_no_verify+0xd5/0xdb [ 0.089171] Link: https://lkml.kernel.org/r/YnPG3pQrqfcgOlVa@hyeyoo Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Fixes: 8f0b36497303 ("mm: kfence: fix objcgs vector allocation") Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Marco Elver Reviewed-by: Muchun Song Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton --- mm/kfence/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 9b2b5f56f4aef..11a954763be9c 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -621,6 +621,16 @@ static bool __init kfence_init_pool_early(void) * fails for the first page, and therefore expect addr==__kfence_pool in * most failure cases. */ + for (char *p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) { + struct slab *slab = virt_to_slab(p); + + if (!slab) + continue; +#ifdef CONFIG_MEMCG + slab->memcg_data = 0; +#endif + __folio_clear_slab(slab_folio(slab)); + } memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)); __kfence_pool = NULL; return false; From 41c240099fe09377b6b9f8272e45d2267c843d3e Mon Sep 17 00:00:00 2001 From: Joel Savitz Date: Mon, 9 May 2022 17:34:29 -0700 Subject: [PATCH 310/491] selftests: vm: Makefile: rename TARGETS to VMTARGETS The tools/testing/selftests/vm/Makefile uses the variable TARGETS internally to generate a list of platform-specific binary build targets suffixed with _{32,64}. When building the selftests using its own Makefile directly, such as via the following command run in a kernel tree: One receives an error such as the following: make: Entering directory '/root/linux/tools/testing/selftests' make --no-builtin-rules ARCH=x86 -C ../../.. headers_install make[1]: Entering directory '/root/linux' INSTALL ./usr/include make[1]: Leaving directory '/root/linux' make[1]: Entering directory '/root/linux/tools/testing/selftests/vm' make[1]: *** No rule to make target 'vm.c', needed by '/root/linux/tools/testing/selftests/vm/vm_64'. Stop. make[1]: Leaving directory '/root/linux/tools/testing/selftests/vm' make: *** [Makefile:175: all] Error 2 make: Leaving directory '/root/linux/tools/testing/selftests' The TARGETS variable passed to tools/testing/selftests/Makefile collides with the TARGETS used in tools/testing/selftests/vm/Makefile, so rename the latter to VMTARGETS, eliminating the collision with no functional change. Link: https://lkml.kernel.org/r/20220504213454.1282532-1-jsavitz@redhat.com Fixes: f21fda8f6453 ("selftests: vm: pkeys: fix multilib builds for x86") Signed-off-by: Joel Savitz Acked-by: Nico Pache Cc: Joel Savitz Cc: Shuah Khan Cc: Sandipan Das Cc: Dave Hansen Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 04a49e876a46c..5b1ecd00695b3 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -57,9 +57,9 @@ CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_prog CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c) CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie) -TARGETS := protection_keys -BINARIES_32 := $(TARGETS:%=%_32) -BINARIES_64 := $(TARGETS:%=%_64) +VMTARGETS := protection_keys +BINARIES_32 := $(VMTARGETS:%=%_32) +BINARIES_64 := $(VMTARGETS:%=%_64) ifeq ($(CAN_BUILD_WITH_NOPIE),1) CFLAGS += -no-pie @@ -112,7 +112,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave $(BINARIES_32): LDLIBS += -lrt -ldl -lm $(BINARIES_32): $(OUTPUT)/%_32: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ -$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t)))) +$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t)))) endif ifeq ($(CAN_BUILD_X86_64),1) @@ -120,7 +120,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave $(BINARIES_64): LDLIBS += -lrt -ldl $(BINARIES_64): $(OUTPUT)/%_64: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ -$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t)))) +$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t)))) endif # x86_64 users should be encouraged to install 32-bit libraries From 9039b8335276569670caaf23606335946625e764 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 9 May 2022 17:34:29 -0700 Subject: [PATCH 311/491] MAINTAINERS: add a mailing list for DAMON development This commit adds an open mailing list for DAMON in MAINTAINERS file. Link: https://lkml.kernel.org/r/20220503180741.137079-1-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2647adf30569d..df6c341aca440 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5438,6 +5438,7 @@ F: net/ax25/sysctl_net_ax25.c DATA ACCESS MONITOR M: SeongJae Park +L: damon@lists.linux.dev L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-damon From 91a7cda1f4b8bdf770000a3b60640576dafe0cec Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Fri, 6 May 2022 08:08:15 +0200 Subject: [PATCH 312/491] net: phy: Fix race condition on link status change This fixes the following error caused by a race condition between phydev->adjust_link() and a MDIO transaction in the phy interrupt handler. The issue was reproduced with the ethernet FEC driver and a micrel KSZ9031 phy. [ 146.195696] fec 2188000.ethernet eth0: MDIO read timeout [ 146.201779] ------------[ cut here ]------------ [ 146.206671] WARNING: CPU: 0 PID: 571 at drivers/net/phy/phy.c:942 phy_error+0x24/0x6c [ 146.214744] Modules linked in: bnep imx_vdoa imx_sdma evbug [ 146.220640] CPU: 0 PID: 571 Comm: irq/128-2188000 Not tainted 5.18.0-rc3-00080-gd569e86915b7 #9 [ 146.229563] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 146.236257] unwind_backtrace from show_stack+0x10/0x14 [ 146.241640] show_stack from dump_stack_lvl+0x58/0x70 [ 146.246841] dump_stack_lvl from __warn+0xb4/0x24c [ 146.251772] __warn from warn_slowpath_fmt+0x5c/0xd4 [ 146.256873] warn_slowpath_fmt from phy_error+0x24/0x6c [ 146.262249] phy_error from kszphy_handle_interrupt+0x40/0x48 [ 146.268159] kszphy_handle_interrupt from irq_thread_fn+0x1c/0x78 [ 146.274417] irq_thread_fn from irq_thread+0xf0/0x1dc [ 146.279605] irq_thread from kthread+0xe4/0x104 [ 146.284267] kthread from ret_from_fork+0x14/0x28 [ 146.289164] Exception stack(0xe6fa1fb0 to 0xe6fa1ff8) [ 146.294448] 1fa0: 00000000 00000000 00000000 00000000 [ 146.302842] 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 146.311281] 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 146.318262] irq event stamp: 12325 [ 146.321780] hardirqs last enabled at (12333): [] __up_console_sem+0x50/0x60 [ 146.330013] hardirqs last disabled at (12342): [] __up_console_sem+0x3c/0x60 [ 146.338259] softirqs last enabled at (12324): [] __do_softirq+0x2c0/0x624 [ 146.346311] softirqs last disabled at (12319): [] __irq_exit_rcu+0x138/0x178 [ 146.354447] ---[ end trace 0000000000000000 ]--- With the FEC driver phydev->adjust_link() calls fec_enet_adjust_link() calls fec_stop()/fec_restart() and both these function reset and temporary disable the FEC disrupting any MII transaction that could be happening at the same time. fec_enet_adjust_link() and phy_read() can be running at the same time when we have one additional interrupt before the phy_state_machine() is able to terminate. Thread 1 (phylib WQ) | Thread 2 (phy interrupt) | | phy_interrupt() <-- PHY IRQ | handle_interrupt() | phy_read() | phy_trigger_machine() | --> schedule phylib WQ | | phy_state_machine() | phy_check_link_status() | phy_link_change() | phydev->adjust_link() | fec_enet_adjust_link() | --> FEC reset | phy_interrupt() <-- PHY IRQ | phy_read() | Fix this by acquiring the phydev lock in phy_interrupt(). Link: https://lore.kernel.org/all/20220422152612.GA510015@francesco-nb.int.toradex.com/ Fixes: c974bdbc3e77 ("net: phy: Use threaded IRQ, to allow IRQ from sleeping devices") cc: Signed-off-by: Francesco Dolcini Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220506060815.327382-1-francesco.dolcini@toradex.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index beb2b66da1324..f122026c46826 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -970,8 +970,13 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) { struct phy_device *phydev = phy_dat; struct phy_driver *drv = phydev->drv; + irqreturn_t ret; - return drv->handle_interrupt(phydev); + mutex_lock(&phydev->lock); + ret = drv->handle_interrupt(phydev); + mutex_unlock(&phydev->lock); + + return ret; } /** From 630fd4822af2374cd75c682b7665dcb367613765 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 7 May 2022 16:45:50 +0300 Subject: [PATCH 313/491] net: dsa: flush switchdev workqueue on bridge join error path There is a race between switchdev_bridge_port_offload() and the dsa_port_switchdev_sync_attrs() call right below it. When switchdev_bridge_port_offload() finishes, FDB entries have been replayed by the bridge, but are scheduled for deferred execution later. However dsa_port_switchdev_sync_attrs -> dsa_port_can_apply_vlan_filtering() may impose restrictions on the vlan_filtering attribute and refuse offloading. When this happens, the delayed FDB entries will dereference dp->bridge, which is a NULL pointer because we have stopped the process of offloading this bridge. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Workqueue: dsa_ordered dsa_slave_switchdev_event_work pc : dsa_port_bridge_host_fdb_del+0x64/0x100 lr : dsa_slave_switchdev_event_work+0x130/0x1bc Call trace: dsa_port_bridge_host_fdb_del+0x64/0x100 dsa_slave_switchdev_event_work+0x130/0x1bc process_one_work+0x294/0x670 worker_thread+0x80/0x460 ---[ end trace 0000000000000000 ]--- Error: dsa_core: Must first remove VLAN uppers having VIDs also present in bridge. Fix the bug by doing what we do on the normal bridge leave path as well, which is to wait until the deferred FDB entries complete executing, then exit. The placement of dsa_flush_workqueue() after switchdev_bridge_port_unoffload() guarantees that both the FDB additions and deletions on rollback are waited for. Fixes: d7d0d423dbaa ("net: dsa: flush switchdev workqueue when leaving the bridge") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220507134550.1849834-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/port.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dsa/port.c b/net/dsa/port.c index cdc56ba11f52b..bdccb613285db 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -451,6 +451,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, switchdev_bridge_port_unoffload(brport_dev, dp, &dsa_slave_switchdev_notifier, &dsa_slave_switchdev_blocking_notifier); + dsa_flush_workqueue(); out_rollback_unbridge: dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); out_rollback: From 620239d9a32e9fe27c9204ec11e40058671aeeb6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 25 Apr 2022 15:54:27 -0400 Subject: [PATCH 314/491] ceph: fix setting of xattrs on async created inodes Currently when we create a file, we spin up an xattr buffer to send along with the create request. If we end up doing an async create however, then we currently pass down a zero-length xattr buffer. Fix the code to send down the xattr buffer in req->r_pagelist. If the xattrs span more than a page, however give up and don't try to do an async create. Cc: stable@vger.kernel.org URL: https://bugzilla.redhat.com/show_bug.cgi?id=2063929 Fixes: 9a8d03ca2e2c ("ceph: attempt to do async create when possible") Reported-by: John Fortin Reported-by: Sri Ramanujam Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 6c9e837aa1d3d..8c8226c0feacc 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -629,9 +629,15 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, iinfo.change_attr = 1; ceph_encode_timespec64(&iinfo.btime, &now); - iinfo.xattr_len = ARRAY_SIZE(xattr_buf); - iinfo.xattr_data = xattr_buf; - memset(iinfo.xattr_data, 0, iinfo.xattr_len); + if (req->r_pagelist) { + iinfo.xattr_len = req->r_pagelist->length; + iinfo.xattr_data = req->r_pagelist->mapped_tail; + } else { + /* fake it */ + iinfo.xattr_len = ARRAY_SIZE(xattr_buf); + iinfo.xattr_data = xattr_buf; + memset(iinfo.xattr_data, 0, iinfo.xattr_len); + } in.ino = cpu_to_le64(vino.ino); in.snapid = cpu_to_le64(CEPH_NOSNAP); @@ -743,6 +749,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = ceph_security_init_secctx(dentry, mode, &as_ctx); if (err < 0) goto out_ctx; + /* Async create can't handle more than a page of xattrs */ + if (as_ctx.pagelist && + !list_is_singular(&as_ctx.pagelist->head)) + try_async = false; } else if (!d_in_lookup(dentry)) { /* If it's not being looked up, it's negative */ return -ENOENT; From 642d51fb0775a41dd6bb3d99b5a40c24df131c20 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 5 May 2022 18:53:09 +0800 Subject: [PATCH 315/491] ceph: check folio PG_private bit instead of folio->private The pages in the file mapping maybe reclaimed and reused by other subsystems and the page->private maybe used as flags field or something else, if later that pages are used by page caches again the page->private maybe not cleared as expected. Here will check the PG_private bit instead of the folio->private. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/55421 Signed-off-by: Xiubo Li Reviewed-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index aa25bffd48237..b6edcf89a429f 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -85,7 +85,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) if (folio_test_dirty(folio)) { dout("%p dirty_folio %p idx %lu -- already dirty\n", mapping->host, folio, folio->index); - BUG_ON(!folio_get_private(folio)); + VM_BUG_ON_FOLIO(!folio_test_private(folio), folio); return false; } @@ -122,7 +122,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) * Reference snap context in folio->private. Also set * PagePrivate so that we get invalidate_folio callback. */ - BUG_ON(folio_get_private(folio)); + VM_BUG_ON_FOLIO(folio_test_private(folio), folio); folio_attach_private(folio, snapc); return ceph_fscache_dirty_folio(mapping, folio); @@ -150,7 +150,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset, } WARN_ON(!folio_test_locked(folio)); - if (folio_get_private(folio)) { + if (folio_test_private(folio)) { dout("%p invalidate_folio idx %lu full dirty page\n", inode, folio->index); @@ -729,8 +729,11 @@ static void writepages_finish(struct ceph_osd_request *req) /* clean all pages */ for (i = 0; i < req->r_num_ops; i++) { - if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) + if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) { + pr_warn("%s incorrect op %d req %p index %d tid %llu\n", + __func__, req->r_ops[i].op, req, i, req->r_tid); break; + } osd_data = osd_req_op_extent_osd_data(req, i); BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES); From 1809c30b6e5a83a1de1435fe01aaa4de4d626a7c Mon Sep 17 00:00:00 2001 From: Manuel Ullmann Date: Wed, 4 May 2022 21:30:44 +0200 Subject: [PATCH 316/491] net: atlantic: always deep reset on pm op, fixing up my null deref regression The impact of this regression is the same for resume that I saw on thaw: the kernel hangs and nothing except SysRq rebooting can be done. Fixes regression in commit cbe6c3a8f8f4 ("net: atlantic: invert deep par in pm functions, preventing null derefs"), where I disabled deep pm resets in suspend and resume, trying to make sense of the atl_resume_common() deep parameter in the first place. It turns out, that atlantic always has to deep reset on pm operations. Even though I expected that and tested resume, I screwed up by kexec-rebooting into an unpatched kernel, thus missing the breakage. This fixup obsoletes the deep parameter of atl_resume_common, but I leave the cleanup for the maintainers to post to mainline. Suspend and hibernation were successfully tested by the reporters. Fixes: cbe6c3a8f8f4 ("net: atlantic: invert deep par in pm functions, preventing null derefs") Link: https://lore.kernel.org/regressions/9-Ehc_xXSwdXcvZqKD5aSqsqeNj5Izco4MYEwnx5cySXVEc9-x_WC4C3kAoCqNTi-H38frroUK17iobNVnkLtW36V6VWGSQEOHXhmVMm5iQ=@protonmail.com/ Reported-by: Jordan Leppert Reported-by: Holger Hoffstaette Tested-by: Jordan Leppert Tested-by: Holger Hoffstaette CC: # 5.10+ Signed-off-by: Manuel Ullmann Link: https://lore.kernel.org/r/87bkw8dfmp.fsf@posteo.de Signed-off-by: Paolo Abeni --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 3a529ee8c8340..831833911a525 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -449,7 +449,7 @@ static int aq_pm_freeze(struct device *dev) static int aq_pm_suspend_poweroff(struct device *dev) { - return aq_suspend_common(dev, false); + return aq_suspend_common(dev, true); } static int aq_pm_thaw(struct device *dev) @@ -459,7 +459,7 @@ static int aq_pm_thaw(struct device *dev) static int aq_pm_resume_restore(struct device *dev) { - return atl_resume_common(dev, false); + return atl_resume_common(dev, true); } static const struct dev_pm_ops aq_pm_ops = { From ef3a6b70507a2add2cd2e01f5eb9b54d561bacb9 Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Tue, 10 May 2022 01:19:57 +0530 Subject: [PATCH 317/491] dma-buf: call dma_buf_stats_setup after dmabuf is in valid list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dma_buf_stats_setup() fails, it closes the dmabuf file which results into the calling of dma_buf_file_release() where it does list_del(&dmabuf->list_node) with out first adding it to the proper list. This is resulting into panic in the below path: __list_del_entry_valid+0x38/0xac dma_buf_file_release+0x74/0x158 __fput+0xf4/0x428 ____fput+0x14/0x24 task_work_run+0x178/0x24c do_notify_resume+0x194/0x264 work_pending+0xc/0x5f0 Fix it by moving the dma_buf_stats_setup() after dmabuf is added to the list. Fixes: bdb8d06dfefd ("dmabuf: Add the capability to expose DMA-BUF stats in sysfs") Signed-off-by: Charan Teja Reddy Tested-by: T.J. Mercier Acked-by: T.J. Mercier Cc: # 5.15.x+ Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/1652125797-2043-1-git-send-email-quic_charante@quicinc.com --- drivers/dma-buf/dma-buf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index df23239b04fc2..b1e25ae983027 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -543,10 +543,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) file->f_mode |= FMODE_LSEEK; dmabuf->file = file; - ret = dma_buf_stats_setup(dmabuf); - if (ret) - goto err_sysfs; - mutex_init(&dmabuf->lock); INIT_LIST_HEAD(&dmabuf->attachments); @@ -554,6 +550,10 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) list_add(&dmabuf->list_node, &db_list.head); mutex_unlock(&db_list.lock); + ret = dma_buf_stats_setup(dmabuf); + if (ret) + goto err_sysfs; + return dmabuf; err_sysfs: From 846a3351ddfe4a86eede4bb26a205c3f38ef84d3 Mon Sep 17 00:00:00 2001 From: Jing Xia Date: Tue, 10 May 2022 10:35:14 +0800 Subject: [PATCH 318/491] writeback: Avoid skipping inode writeback We have run into an issue that a task gets stuck in balance_dirty_pages_ratelimited() when perform I/O stress testing. The reason we observed is that an I_DIRTY_PAGES inode with lots of dirty pages is in b_dirty_time list and standard background writeback cannot writeback the inode. After studing the relevant code, the following scenario may lead to the issue: task1 task2 ----- ----- fuse_flush write_inode_now //in b_dirty_time writeback_single_inode __writeback_single_inode fuse_write_end filemap_dirty_folio __xa_set_mark:PAGECACHE_TAG_DIRTY lock inode->i_lock if mapping tagged PAGECACHE_TAG_DIRTY inode->i_state |= I_DIRTY_PAGES unlock inode->i_lock __mark_inode_dirty:I_DIRTY_PAGES lock inode->i_lock -was dirty,inode stays in -b_dirty_time unlock inode->i_lock if(!(inode->i_state & I_DIRTY_All)) -not true,so nothing done This patch moves the dirty inode to b_dirty list when the inode currently is not queued in b_io or b_more_io list at the end of writeback_single_inode. Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig CC: stable@vger.kernel.org Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") Signed-off-by: Jing Xia Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20220510023514.27399-1-jing.xia@unisoc.com --- fs/fs-writeback.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 591fe9cf16593..1fae0196292a1 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1712,6 +1712,10 @@ static int writeback_single_inode(struct inode *inode, */ if (!(inode->i_state & I_DIRTY_ALL)) inode_cgwb_move_to_attached(inode, wb); + else if (!(inode->i_state & I_SYNC_QUEUED) && + (inode->i_state & I_DIRTY)) + redirty_tail_locked(inode, wb); + spin_unlock(&wb->list_lock); inode_sync_complete(inode); out: From dc5306a8c0eace6c113aded2e36ae5e15fdca4d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 8 May 2022 03:22:17 -0700 Subject: [PATCH 319/491] decnet: Use container_of() for struct dn_neigh casts Clang's structure layout randomization feature gets upset when it sees struct neighbor (which is randomized) cast to struct dn_neigh: net/decnet/dn_route.c:1123:15: error: casting from randomized structure pointer type 'struct neighbour *' to 'struct dn_neigh *' gateway = ((struct dn_neigh *)neigh)->addr; ^ Update all the open-coded casts to use container_of() to do the conversion instead of depending on strict member ordering. Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202205041247.WKBEHGS5-lkp@intel.com Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Yajun Deng Cc: Zheng Yongjun Cc: Bill Wendling Cc: linux-decnet-user@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220508102217.2647184-1-keescook@chromium.org Signed-off-by: Paolo Abeni --- net/decnet/dn_dev.c | 4 ++-- net/decnet/dn_neigh.c | 3 ++- net/decnet/dn_route.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 0ee7d4c0c9554..a09ba642b5e76 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -854,7 +854,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) memcpy(msg->neighbor, dn_hiord, ETH_ALEN); if (dn_db->router) { - struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; + struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n); dn_dn2eth(msg->neighbor, dn->addr); } @@ -902,7 +902,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) { int n; struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; + struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n); struct sk_buff *skb; size_t size; unsigned char *ptr; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 94b306f6d5511..fbd98ac853ea0 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -426,7 +426,8 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb) if (!dn_db->router) { dn_db->router = neigh_clone(neigh); } else { - if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority) + if (msg->priority > container_of(dn_db->router, + struct dn_neigh, n)->priority) neigh_release(xchg(&dn_db->router, neigh_clone(neigh))); } } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 7e85f2a1ae254..d1d78a463a06b 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1120,7 +1120,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o /* Ok then, we assume its directly connected and move on */ select_source: if (neigh) - gateway = ((struct dn_neigh *)neigh)->addr; + gateway = container_of(neigh, struct dn_neigh, n)->addr; if (gateway == 0) gateway = fld.daddr; if (fld.saddr == 0) { @@ -1429,7 +1429,7 @@ static int dn_route_input_slow(struct sk_buff *skb) /* Use the default router if there is one */ neigh = neigh_clone(dn_db->router); if (neigh) { - gateway = ((struct dn_neigh *)neigh)->addr; + gateway = container_of(neigh, struct dn_neigh, n)->addr; goto make_route; } From 205f3991a273cac6008ef4db3d1c0dc54d14fb56 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 10 May 2022 11:27:21 +0100 Subject: [PATCH 320/491] arm64: vdso: fix makefile dependency on vdso.so There is currently no dependency for vdso*-wrap.S on vdso*.so, which means that you can get a build that uses a stale vdso*-wrap.o. In commit a5b8ca97fbf8, the file that includes the vdso.so was moved and renamed from arch/arm64/kernel/vdso/vdso.S to arch/arm64/kernel/vdso-wrap.S, when this happened the Makefile was not updated to force the dependcy on vdso.so. Fixes: a5b8ca97fbf8 ("arm64: do not descend to vdso directories twice") Signed-off-by: Joey Gouly Cc: Masahiro Yamada Cc: Vincenzo Frascino Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20220510102721.50811-1-joey.gouly@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 4 ++++ arch/arm64/kernel/vdso/Makefile | 3 --- arch/arm64/kernel/vdso32/Makefile | 3 --- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 986837d7ec82d..fa7981d0d9170 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -75,6 +75,10 @@ obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o +# Force dependency (vdso*-wrap.S includes vdso.so through incbin) +$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so +$(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so + obj-y += probes/ head-y := head.o extra-y += $(head-y) vmlinux.lds diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 172452f79e462..ac1964ebed1ef 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -52,9 +52,6 @@ GCOV_PROFILE := n targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) -# Force dependency (incbin is bad) -$(obj)/vdso.o : $(obj)/vdso.so - # Link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold_and_vdso_check) diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index ed181bedbffc5..05ba1aae1b6f2 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -131,9 +131,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) -# Force dependency (vdso.s includes vdso.so through incbin) -$(obj)/vdso.o: $(obj)/vdso.so - include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) From 7ff960a6fe399fdcbca6159063684671ae57eee9 Mon Sep 17 00:00:00 2001 From: Shunsuke Mie Date: Tue, 10 May 2022 19:27:23 +0900 Subject: [PATCH 321/491] virtio: fix virtio transitional ids This commit fixes the transitional PCI device ID. Fixes: d61914ea6ada ("virtio: update virtio id table, add transitional ids") Signed-off-by: Shunsuke Mie Link: https://lore.kernel.org/r/20220510102723.87666-1-mie@igel.co.jp Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 80d76b75bccd9..7aa2eb7662050 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -73,12 +73,12 @@ * Virtio Transitional IDs */ -#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ -#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ -#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ -#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ -#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ -#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ -#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ +#define VIRTIO_TRANS_ID_NET 0x1000 /* transitional virtio net */ +#define VIRTIO_TRANS_ID_BLOCK 0x1001 /* transitional virtio block */ +#define VIRTIO_TRANS_ID_BALLOON 0x1002 /* transitional virtio balloon */ +#define VIRTIO_TRANS_ID_CONSOLE 0x1003 /* transitional virtio console */ +#define VIRTIO_TRANS_ID_SCSI 0x1004 /* transitional virtio SCSI */ +#define VIRTIO_TRANS_ID_RNG 0x1005 /* transitional virtio rng */ +#define VIRTIO_TRANS_ID_9P 0x1009 /* transitional virtio 9p console */ #endif /* _LINUX_VIRTIO_IDS_H */ From c1ad35dd0548ce947d97aaf92f7f2f9a202951cf Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 May 2022 12:36:04 +0200 Subject: [PATCH 322/491] udf: Avoid using stale lengthOfImpUse udf_write_fi() uses lengthOfImpUse of the entry it is writing to. However this field has not yet been initialized so it either contains completely bogus value or value from last directory entry at that place. In either case this is wrong and can lead to filesystem corruption or kernel crashes. Reported-by: butt3rflyh4ck CC: stable@vger.kernel.org Fixes: 979a6e28dd96 ("udf: Get rid of 0-length arrays in struct fileIdentDesc") Signed-off-by: Jan Kara --- fs/udf/namei.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 0ed4861b038f6..b3d5f97f16cdb 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -75,11 +75,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, if (fileident) { if (adinicb || (offset + lfi < 0)) { - memcpy(udf_get_fi_ident(sfi), fileident, lfi); + memcpy(sfi->impUse + liu, fileident, lfi); } else if (offset >= 0) { memcpy(fibh->ebh->b_data + offset, fileident, lfi); } else { - memcpy(udf_get_fi_ident(sfi), fileident, -offset); + memcpy(sfi->impUse + liu, fileident, -offset); memcpy(fibh->ebh->b_data, fileident - offset, lfi + offset); } @@ -88,11 +88,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, offset += lfi; if (adinicb || (offset + padlen < 0)) { - memset(udf_get_fi_ident(sfi) + lfi, 0x00, padlen); + memset(sfi->impUse + liu + lfi, 0x00, padlen); } else if (offset >= 0) { memset(fibh->ebh->b_data + offset, 0x00, padlen); } else { - memset(udf_get_fi_ident(sfi) + lfi, 0x00, -offset); + memset(sfi->impUse + liu + lfi, 0x00, -offset); memset(fibh->ebh->b_data, 0x00, padlen + offset); } From a34ae6c0660d3b96b0055f68ef74dc9478852245 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 10 May 2022 12:36:26 +0200 Subject: [PATCH 323/491] ALSA: wavefront: Proper check of get_user() error The antient ISA wavefront driver reads its sample patch data (uploaded over an ioctl) via __get_user() with no good reason; likely just for some performance optimizations in the past. Let's change this to the standard get_user() and the error check for handling the fault case properly. Reported-by: Linus Torvalds Cc: Link: https://lore.kernel.org/r/20220510103626.16635-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/wavefront/wavefront_synth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index 69cbc79fbb716..2aaaa68071744 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -1094,7 +1094,8 @@ wavefront_send_sample (snd_wavefront_t *dev, if (dataptr < data_end) { - __get_user (sample_short, dataptr); + if (get_user(sample_short, dataptr)) + return -EFAULT; dataptr += skip; if (data_is_unsigned) { /* GUS ? */ From c46721e4604f260918e660550a16d1e28637d66c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Mon, 9 May 2022 10:35:10 -0600 Subject: [PATCH 324/491] MAINTAINERS: Add James and Mike as Arm64 performance events reviewers James, Mike and Leo have been doing all the reviews and development work for the Coresight perf tools for a couple of years now. As such remove my name and add James and Mike as official reviewers (Leo is already listed as such). Signed-off-by: Mathieu Poirier Cc: James Clark Cc: Leo Yan Cc: Mike Leach Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e8c52d0192a61..2da9d14dde851 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15475,7 +15475,8 @@ F: tools/perf/ PERFORMANCE EVENTS TOOLING ARM64 R: John Garry R: Will Deacon -R: Mathieu Poirier +R: James Clark +R: Mike Leach R: Leo Yan L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported From 575f00edea0a7117e6a4337800ebf62e2a1d09d6 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 24 Mar 2022 15:16:05 +0000 Subject: [PATCH 325/491] Documentation/process: Update ARM contact for embargoed hardware issues With Grant taking a prominent role in Linaro, I will take over as the process ambassador for ARM w.r.t. embargoed hardware issues. Signed-off-by: Catalin Marinas Cc: Grant Likely Cc: Jonathan Corbet Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 98d7bc868f2a3..95999302d279f 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -246,7 +246,7 @@ an involved disclosed party. The current ambassadors list: ============= ======================================================== AMD Tom Lendacky Ampere Darren Hart - ARM Grant Likely + ARM Catalin Marinas IBM Power Anton Blanchard IBM Z Christian Borntraeger Intel Tony Luck From 12a4d677b1c34717443470c1492fe520638ef39a Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 9 May 2022 22:45:19 +0800 Subject: [PATCH 326/491] net: phy: micrel: Fix incorrect variable type in micrel In lanphy_read_page_reg, calling __phy_read() might return a negative error code. Use 'int' to check the error code. Fixes: 7c2dcfa295b1 ("net: phy: micrel: Add support for LAN8804 PHY") Signed-off-by: Wan Jiabing Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220509144519.2343399-1-wanjiabing@vivo.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 9d7dafed39318..cd9aa353b653f 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1743,7 +1743,7 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr) { - u32 data; + int data; phy_lock_mdio_bus(phydev); __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); @@ -2444,8 +2444,7 @@ static int lan8804_config_init(struct phy_device *phydev) static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev) { - u16 tsu_irq_status; - int irq_status; + int irq_status, tsu_irq_status; irq_status = phy_read(phydev, LAN8814_INTS); if (irq_status > 0 && (irq_status & LAN8814_INT_LINK)) From 792ea6a074ae7ea5ab6f1b8b31f76bb0297de66c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 May 2022 09:56:05 +0200 Subject: [PATCH 327/491] genirq: Remove WARN_ON_ONCE() in generic_handle_domain_irq() Since commit 0953fb263714 ("irq: remove handle_domain_{irq,nmi}()"), generic_handle_domain_irq() warns if called outside hardirq context, even though the function calls down to handle_irq_desc(), which warns about the same, but conditionally on handle_enforce_irqctx(). The newly added warning is a false positive if the interrupt originates from any other irqchip than x86 APIC or ARM GIC/GICv3. Those are the only ones for which handle_enforce_irqctx() returns true. Per commit c16816acd086 ("genirq: Add protection against unsafe usage of generic_handle_irq()"): "In general calling generic_handle_irq() with interrupts disabled from non interrupt context is harmless. For some interrupt controllers like the x86 trainwrecks this is outright dangerous as it might corrupt state if an interrupt affinity change is pending." Examples for interrupt chips where the warning is a false positive are USB-attached GPIO controllers such as drivers/gpio/gpio-dln2.c: USB gadgets are incapable of directly signaling an interrupt because they cannot initiate a bus transaction by themselves. All communication on the bus is initiated by the host controller, which polls a gadget's Interrupt Endpoint in regular intervals. If an interrupt is pending, that information is passed up the stack in softirq context, from which a hardirq is synthesized via generic_handle_domain_irq(). Remove the warning to eliminate such false positives. Fixes: 0953fb263714 ("irq: remove handle_domain_{irq,nmi}()") Signed-off-by: Lukas Wunner Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: Mark Rutland Cc: Jakub Kicinski CC: Linus Walleij Cc: Bartosz Golaszewski Cc: Octavian Purdila Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220505113207.487861b2@kernel.org Link: https://lore.kernel.org/r/20220506203242.GA1855@wunner.de Link: https://lore.kernel.org/r/c3caf60bfa78e5fdbdf483096b7174da65d1813a.1652168866.git.lukas@wunner.de --- kernel/irq/irqdesc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 0099b87dd8530..d323b180b0f37 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -701,7 +701,6 @@ EXPORT_SYMBOL_GPL(generic_handle_irq_safe); */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) { - WARN_ON_ONCE(!in_hardirq()); return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); } EXPORT_SYMBOL_GPL(generic_handle_domain_irq); From cc28fac16ab7152168be8bee76708df9d65efd71 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 6 May 2022 13:55:28 -0700 Subject: [PATCH 328/491] scsi: lpfc: Fix split code for FLOGI on FCoE The refactoring code converted context information from SLI-3 to SLI-4. The conversion for the SLI-4 bit field tried to use the old (hacky) SLI3 high/low bit settings. Needless to say, it was incorrect. Explicitly set the context field to type FCFI and set it in the wqe. SLI-4 is now a proper bit field so no need for the shifting/anding. Link: https://lore.kernel.org/r/20220506205528.61590-1-jsmart2021@gmail.com Fixes: 6831ce129f19 ("scsi: lpfc: SLI path split: Refactor base ELS paths and the FLOGI path") Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index ef6e8cd8c26ae..872a26376ccbb 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1330,7 +1330,7 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) == LPFC_SLI_INTF_IF_TYPE_0) { /* FLOGI needs to be 3 for WQE FCFI */ - ct = ((SLI4_CT_FCFI >> 1) & 1) | (SLI4_CT_FCFI & 1); + ct = SLI4_CT_FCFI; bf_set(wqe_ct, &wqe->els_req.wqe_com, ct); /* Set the fcfi to the fcfi we registered with */ From 7752662071053adcdb6b6e7853834205dd60e1c0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 6 May 2022 13:55:48 -0700 Subject: [PATCH 329/491] scsi: lpfc: Correct BDE DMA address assignment for GEN_REQ_WQE Garbage FCoE CT frames are transmitted on the wire because of bad DMA ptr addresses filled in the GEN_REQ_WQE. The __lpfc_sli_prep_gen_req_s4() routine is using the wrong buffer for the payload address. Change the DMA buffer assignment from the bmp buffer to the bpl buffer. Link: https://lore.kernel.org/r/20220506205548.61644-1-jsmart2021@gmail.com Fixes: 61910d6a5243 ("scsi: lpfc: SLI path split: Refactor CT paths") Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index bda2a7ba4e77f..6adaf79e67cc0 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -10720,10 +10720,10 @@ __lpfc_sli_prep_gen_req_s4(struct lpfc_iocbq *cmdiocbq, struct lpfc_dmabuf *bmp, /* Words 0 - 2 */ bde = (struct ulp_bde64_le *)&cmdwqe->generic.bde; - bde->addr_low = cpu_to_le32(putPaddrLow(bmp->phys)); - bde->addr_high = cpu_to_le32(putPaddrHigh(bmp->phys)); + bde->addr_low = bpl->addr_low; + bde->addr_high = bpl->addr_high; bde->type_size = cpu_to_le32(xmit_len); - bde->type_size |= cpu_to_le32(ULP_BDE64_TYPE_BLP_64); + bde->type_size |= cpu_to_le32(ULP_BDE64_TYPE_BDE_64); /* Word 3 */ cmdwqe->gen_req.request_payload_len = xmit_len; From 0807ce0b010418a191e0e4009803b2d74c3245d5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 May 2022 11:13:16 +0800 Subject: [PATCH 330/491] net: stmmac: fix missing pci_disable_device() on error in stmmac_pci_probe() Switch to using pcim_enable_device() to avoid missing pci_disable_device(). Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220510031316.1780409-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index fcf17d8a0494b..644bb54f5f020 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -181,7 +181,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev, return -ENOMEM; /* Enable pci device */ - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); @@ -241,8 +241,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev) pcim_iounmap_regions(pdev, BIT(i)); break; } - - pci_disable_device(pdev); } static int __maybe_unused stmmac_pci_suspend(struct device *dev) From 309d7363ca3d9fcdb92ff2d958be14d7e8707f68 Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Tue, 10 May 2022 20:07:59 -0500 Subject: [PATCH 331/491] ALSA: hda/realtek: Add quirk for the Framework Laptop Some board revisions of the Framework Laptop have an ALC295 with a disconnected or faulty headset mic presence detect. The "dell-headset-multi" fixup addresses this issue, but also enables an inoperative "Headphone Mic" input device whenever a headset is connected. Adding a new quirk chain specific to the Framework Laptop resolves this issue. The one introduced here is based on the System76 "no headphone mic" quirk chain. The VID:PID f111:0001 have been allocated to Framework Computer for this board revision. Revision history: - v2: Moved to a custom quirk chain to suppress the "Headphone Mic" pincfg. Signed-off-by: Dustin L. Howett Link: https://lore.kernel.org/r/20220511010759.3554-1-dustin@howett.net Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 37ad7b6780a23..33c439578a610 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7046,6 +7046,7 @@ enum { ALC245_FIXUP_CS35L41_SPI_4, ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED, ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED, + ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE, }; static const struct hda_fixup alc269_fixups[] = { @@ -8848,6 +8849,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, }, + [ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x02a1112c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9338,6 +9348,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), + SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 /* Below is a quirk table taken from the old code. From 62e0ae0f4020250f961cf8d0103a4621be74e077 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:23 -0700 Subject: [PATCH 332/491] net: atlantic: fix "frag[0] not initialized" In aq_ring_rx_clean(), if buff->is_eop is not set AND buff->len < AQ_CFG_RX_HDR_SIZE, then hdr_len remains equal to buff->len and skb_add_rx_frag(xxx, *0*, ...) is not called. The loop following this code starts calling skb_add_rx_frag() starting with i=1 and thus frag[0] is never initialized. Since i is initialized to zero at the top of the primary loop, we can just reference and post-increment i instead of hardcoding the 0 when calling skb_add_rx_frag() the first time. Reported-by: Aashay Shringarpure Reported-by: Yi Chou Reported-by: Shervin Oloumi Signed-off-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 77e76c9efd32f..440423b0e8eab 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -446,7 +446,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, ALIGN(hdr_len, sizeof(long))); if (buff->len - hdr_len > 0) { - skb_add_rx_frag(skb, 0, buff->rxdata.page, + skb_add_rx_frag(skb, i++, buff->rxdata.page, buff->rxdata.pg_off + hdr_len, buff->len - hdr_len, AQ_CFG_RX_FRAME_MAX); @@ -455,7 +455,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; - i = 1U; do { next_ = buff_->next; buff_ = &self->buff_ring[next_]; From 79784d77ebbd3ec516b7a5ce555d979fb7946202 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:24 -0700 Subject: [PATCH 333/491] net: atlantic: reduce scope of is_rsc_complete Don't defer handling the err case outside the loop. That's pointless. And since is_rsc_complete is only used inside this loop, declare it inside the loop to reduce it's scope. Signed-off-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 440423b0e8eab..bc1952131799e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -346,7 +346,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int budget) { struct net_device *ndev = aq_nic_get_ndev(self->aq_nic); - bool is_rsc_completed = true; int err = 0; for (; (self->sw_head != self->hw_head) && budget; @@ -366,6 +365,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; do { + bool is_rsc_completed = true; + if (buff_->next >= self->size) { err = -EIO; goto err_exit; @@ -377,18 +378,16 @@ int aq_ring_rx_clean(struct aq_ring_s *self, next_, self->hw_head); - if (unlikely(!is_rsc_completed)) - break; + if (unlikely(!is_rsc_completed)) { + err = 0; + goto err_exit; + } buff->is_error |= buff_->is_error; buff->is_cso_err |= buff_->is_cso_err; } while (!buff_->is_eop); - if (!is_rsc_completed) { - err = 0; - goto err_exit; - } if (buff->is_error || (buff->is_lro && buff->is_cso_err)) { buff_ = buff; From 6aecbba12b5c90b26dc062af3b9de8c4b3a2f19f Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:25 -0700 Subject: [PATCH 334/491] net: atlantic: add check for MAX_SKB_FRAGS Enforce that the CPU can not get stuck in an infinite loop. Reported-by: Aashay Shringarpure Reported-by: Yi Chou Reported-by: Shervin Oloumi Signed-off-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index bc1952131799e..8201ce7adb777 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -363,6 +363,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, continue; if (!buff->is_eop) { + unsigned int frag_cnt = 0U; buff_ = buff; do { bool is_rsc_completed = true; @@ -371,6 +372,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, err = -EIO; goto err_exit; } + + frag_cnt++; next_ = buff_->next, buff_ = &self->buff_ring[next_]; is_rsc_completed = @@ -378,7 +381,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, next_, self->hw_head); - if (unlikely(!is_rsc_completed)) { + if (unlikely(!is_rsc_completed) || + frag_cnt > MAX_SKB_FRAGS) { err = 0; goto err_exit; } From 2120b7f4d128433ad8c5f503a9584deba0684901 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:26 -0700 Subject: [PATCH 335/491] net: atlantic: verify hw_head_ lies within TX buffer ring Bounds check hw_head index provided by NIC to verify it lies within the TX buffer ring. Reported-by: Aashay Shringarpure Reported-by: Yi Chou Reported-by: Shervin Oloumi Signed-off-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index d875ce3ec759b..15ede7285fb5d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -889,6 +889,13 @@ int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self, err = -ENXIO; goto err_exit; } + + /* Validate that the new hw_head_ is reasonable. */ + if (hw_head_ >= ring->size) { + err = -ENXIO; + goto err_exit; + } + ring->hw_head = hw_head_; err = aq_hw_err_from_flags(self); From 2c50c6867c85afee6f2b3bcbc50fc9d0083d1343 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 10 May 2022 09:05:06 +0200 Subject: [PATCH 336/491] s390/ctcm: fix variable dereferenced before check Found by cppcheck and smatch. smatch complains about drivers/s390/net/ctcm_sysfs.c:43 ctcm_buffer_write() warn: variable dereferenced before check 'priv' (see line 42) Fixes: 3c09e2647b5e ("ctcm: rename READ/WRITE defines to avoid redefinitions") Reported-by: Colin Ian King Signed-off-by: Alexandra Winter Signed-off-by: David S. Miller --- drivers/s390/net/ctcm_sysfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/ctcm_sysfs.c b/drivers/s390/net/ctcm_sysfs.c index ded1930a00b2d..e3813a7aa5e68 100644 --- a/drivers/s390/net/ctcm_sysfs.c +++ b/drivers/s390/net/ctcm_sysfs.c @@ -39,11 +39,12 @@ static ssize_t ctcm_buffer_write(struct device *dev, struct ctcm_priv *priv = dev_get_drvdata(dev); int rc; - ndev = priv->channel[CTCM_READ]->netdev; - if (!(priv && priv->channel[CTCM_READ] && ndev)) { + if (!(priv && priv->channel[CTCM_READ] && + priv->channel[CTCM_READ]->netdev)) { CTCM_DBF_TEXT(SETUP, CTC_DBF_ERROR, "bfnondev"); return -ENODEV; } + ndev = priv->channel[CTCM_READ]->netdev; rc = kstrtouint(buf, 0, &bs1); if (rc) From 0c0b20587b9f25a2ad14db7f80ebe49bdf29920a Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 10 May 2022 09:05:07 +0200 Subject: [PATCH 337/491] s390/ctcm: fix potential memory leak smatch complains about drivers/s390/net/ctcm_mpc.c:1210 ctcmpc_unpack_skb() warn: possible memory leak of 'mpcginfo' mpc_action_discontact() did not free mpcginfo. Consolidate the freeing in ctcmpc_unpack_skb(). Fixes: 293d984f0e36 ("ctcm: infrastructure for replaced ctc driver") Signed-off-by: Alexandra Winter Signed-off-by: David S. Miller --- drivers/s390/net/ctcm_mpc.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c index 88abfb5e8045c..8ac213a551418 100644 --- a/drivers/s390/net/ctcm_mpc.c +++ b/drivers/s390/net/ctcm_mpc.c @@ -626,8 +626,6 @@ static void mpc_rcvd_sweep_resp(struct mpcg_info *mpcginfo) ctcm_clear_busy_do(dev); } - kfree(mpcginfo); - return; } @@ -1192,10 +1190,10 @@ static void ctcmpc_unpack_skb(struct channel *ch, struct sk_buff *pskb) CTCM_FUNTAIL, dev->name); priv->stats.rx_dropped++; /* mpcginfo only used for non-data transfers */ - kfree(mpcginfo); if (do_debug_data) ctcmpc_dump_skb(pskb, -8); } + kfree(mpcginfo); } done: @@ -1977,7 +1975,6 @@ static void mpc_action_rcvd_xid0(fsm_instance *fsm, int event, void *arg) } break; } - kfree(mpcginfo); CTCM_PR_DEBUG("ctcmpc:%s() %s xid2:%i xid7:%i xidt_p2:%i \n", __func__, ch->id, grp->outstanding_xid2, @@ -2038,7 +2035,6 @@ static void mpc_action_rcvd_xid7(fsm_instance *fsm, int event, void *arg) mpc_validate_xid(mpcginfo); break; } - kfree(mpcginfo); return; } From 671bb35c8e746439f0ed70815968f9a4f20a8deb Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 10 May 2022 09:05:08 +0200 Subject: [PATCH 338/491] s390/lcs: fix variable dereferenced before check smatch complains about drivers/s390/net/lcs.c:1741 lcs_get_control() warn: variable dereferenced before check 'card->dev' (see line 1739) Fixes: 27eb5ac8f015 ("[PATCH] s390: lcs driver bug fixes and improvements [1/2]") Signed-off-by: Alexandra Winter Signed-off-by: David S. Miller --- drivers/s390/net/lcs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index bab9b34926c68..84c8981317b46 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1736,10 +1736,11 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) lcs_schedule_recovery(card); break; case LCS_CMD_STOPLAN: - pr_warn("Stoplan for %s initiated by LGW\n", - card->dev->name); - if (card->dev) + if (card->dev) { + pr_warn("Stoplan for %s initiated by LGW\n", + card->dev->name); netif_carrier_off(card->dev); + } break; default: LCS_DBF_TEXT(5, trace, "noLGWcmd"); From ee8348496c77e3737d0a6cda307a521f2cff954f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 10 May 2022 14:37:17 +0200 Subject: [PATCH 339/491] KVM: PPC: Book3S PR: Enable MSR_DR for switch_mmu_context() Commit 863771a28e27 ("powerpc/32s: Convert switch_mmu_context() to C") moved the switch_mmu_context() to C. While in principle a good idea, it meant that the function now uses the stack. The stack is not accessible from real mode though. So to keep calling the function, let's turn on MSR_DR while we call it. That way, all pointer references to the stack are handled virtually. In addition, make sure to save/restore r12 on the stack, as it may get clobbered by the C function. Fixes: 863771a28e27 ("powerpc/32s: Convert switch_mmu_context() to C") Cc: stable@vger.kernel.org # v5.14+ Reported-by: Matt Evans Signed-off-by: Alexander Graf Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220510123717.24508-1-graf@amazon.com --- arch/powerpc/kvm/book3s_32_sr.S | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S index e3ab9df6cf199..6cfcd20d46686 100644 --- a/arch/powerpc/kvm/book3s_32_sr.S +++ b/arch/powerpc/kvm/book3s_32_sr.S @@ -122,11 +122,27 @@ /* 0x0 - 0xb */ - /* 'current->mm' needs to be in r4 */ - tophys(r4, r2) - lwz r4, MM(r4) - tophys(r4, r4) - /* This only clobbers r0, r3, r4 and r5 */ + /* switch_mmu_context() needs paging, let's enable it */ + mfmsr r9 + ori r11, r9, MSR_DR + mtmsr r11 + sync + + /* switch_mmu_context() clobbers r12, rescue it */ + SAVE_GPR(12, r1) + + /* Calling switch_mmu_context(, current->mm, ); */ + lwz r4, MM(r2) bl switch_mmu_context + /* restore r12 */ + REST_GPR(12, r1) + + /* Disable paging again */ + mfmsr r9 + li r6, MSR_DR + andc r9, r9, r6 + mtmsr r9 + sync + .endm From a56f445f807b0276fc0660c330bf93a9ea78e8ea Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 May 2022 09:37:06 -0400 Subject: [PATCH 340/491] Revert "drm/amd/pm: keep the BACO feature enabled for suspend" This reverts commit eaa090538e8d21801c6d5f94590c3799e6a528b5. Commit ebc002e3ee78 ("drm/amdgpu: don't use BACO for reset in S3") stops using BACO for reset during suspend, so it's no longer necessary to leave BACO enabled during suspend. This fixes resume from suspend on the navy flounder dGPU in the ASUS ROG Strix G513QY. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2008 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1982 Reviewed-by: Lijo Lazar Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index f1544755d8b46..f10a0256413e6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1351,14 +1351,8 @@ static int smu_disable_dpms(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; int ret = 0; - /* - * TODO: (adev->in_suspend && !adev->in_s0ix) is added to pair - * the workaround which always reset the asic in suspend. - * It's likely that workaround will be dropped in the future. - * Then the change here should be dropped together. - */ bool use_baco = !smu->is_apu && - (((amdgpu_in_reset(adev) || (adev->in_suspend && !adev->in_s0ix)) && + ((amdgpu_in_reset(adev) && (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) || ((adev->in_runpm || adev->in_s4) && amdgpu_asic_supports_baco(adev))); From c65b364c52ba352177dde6944f5efaa29bd40b52 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 May 2022 10:32:26 -0400 Subject: [PATCH 341/491] drm/amdgpu/ctx: only reset stable pstate if the user changed it (v2) Check if the requested stable pstate matches the current one before changing it. This avoids changing the stable pstate on context destroy if the user never changed it in the first place via the IOCTL. v2: compare the current and requested rather than setting a flag (Lijo) Fixes: 8cda7a4f96e435 ("drm/amdgpu/UAPI: add new CTX OP to get/set stable pstates") Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 8f0e6d93bb9cb..c317078d1afd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -296,6 +296,7 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, { struct amdgpu_device *adev = ctx->adev; enum amd_dpm_forced_level level; + u32 current_stable_pstate; int r; mutex_lock(&adev->pm.stable_pstate_ctx_lock); @@ -304,6 +305,10 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, goto done; } + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r || (stable_pstate == current_stable_pstate)) + goto done; + switch (stable_pstate) { case AMDGPU_CTX_STABLE_PSTATE_NONE: level = AMD_DPM_FORCED_LEVEL_AUTO; From 7b145802ba545ecf9446ce6d67d6011b73dac0e0 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 10 May 2022 11:22:21 -0700 Subject: [PATCH 342/491] thermal: int340x: Mode setting with new OS handshake With the new OS handshake introduced by commit: "c7ff29763989 ("thermal: int340x: Update OS policy capability handshake")", the "enabled" thermal zone mode doesn't work in the same way as previously. The "enabled" mode fails with -EINVAL when the new handshake is used. To address this issue, when the new OS UUID mask is set: - When the mode is "enabled", return 0 as the firmware already has the latest policy mask. - When the mode is "disabled", update the firmware with the UUID mask of zero. This way, the firmware can take over the thermal control. Also reset the OS UUID mask, which allows user space to update with new set of policies. Fixes: c7ff29763989 ("thermal: int340x: Update OS policy capability handshake") Signed-off-by: Srinivas Pandruvada [ rjw: Changelog edits, removed unneeded parens ] Signed-off-by: Rafael J. Wysocki --- .../intel/int340x_thermal/int3400_thermal.c | 48 ++++++++++++------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index d97f496bab9be..79931ddc582a5 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -194,12 +194,31 @@ static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enab return result; } +static int set_os_uuid_mask(struct int3400_thermal_priv *priv, u32 mask) +{ + int cap = 0; + + /* + * Capability bits: + * Bit 0: set to 1 to indicate DPTF is active + * Bi1 1: set to 1 to active cooling is supported by user space daemon + * Bit 2: set to 1 to passive cooling is supported by user space daemon + * Bit 3: set to 1 to critical trip is handled by user space daemon + */ + if (mask) + cap = (priv->os_uuid_mask << 1) | 0x01; + + return int3400_thermal_run_osc(priv->adev->handle, + "b23ba85d-c8b7-3542-88de-8de2ffcfd698", + &cap); +} + static ssize_t current_uuid_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct int3400_thermal_priv *priv = dev_get_drvdata(dev); - int i; + int ret, i; for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; ++i) { if (!strncmp(buf, int3400_thermal_uuids[i], @@ -231,19 +250,7 @@ static ssize_t current_uuid_store(struct device *dev, } if (priv->os_uuid_mask) { - int cap, ret; - - /* - * Capability bits: - * Bit 0: set to 1 to indicate DPTF is active - * Bi1 1: set to 1 to active cooling is supported by user space daemon - * Bit 2: set to 1 to passive cooling is supported by user space daemon - * Bit 3: set to 1 to critical trip is handled by user space daemon - */ - cap = ((priv->os_uuid_mask << 1) | 0x01); - ret = int3400_thermal_run_osc(priv->adev->handle, - "b23ba85d-c8b7-3542-88de-8de2ffcfd698", - &cap); + ret = set_os_uuid_mask(priv, priv->os_uuid_mask); if (ret) return ret; } @@ -469,17 +476,26 @@ static int int3400_thermal_change_mode(struct thermal_zone_device *thermal, if (mode != thermal->mode) { int enabled; + enabled = mode == THERMAL_DEVICE_ENABLED; + + if (priv->os_uuid_mask) { + if (!enabled) { + priv->os_uuid_mask = 0; + result = set_os_uuid_mask(priv, priv->os_uuid_mask); + } + goto eval_odvp; + } + if (priv->current_uuid_index < 0 || priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID) return -EINVAL; - enabled = (mode == THERMAL_DEVICE_ENABLED); result = int3400_thermal_run_osc(priv->adev->handle, int3400_thermal_uuids[priv->current_uuid_index], &enabled); } - +eval_odvp: evaluate_odvp(priv); return result; From 8b796475fd7882663a870456466a4fb315cc1bd6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 10 May 2022 16:57:34 +0200 Subject: [PATCH 343/491] net/sched: act_pedit: really ensure the skb is writable Currently pedit tries to ensure that the accessed skb offset is writable via skb_unclone(). The action potentially allows touching any skb bytes, so it may end-up modifying shared data. The above causes some sporadic MPTCP self-test failures, due to this code: tc -n $ns2 filter add dev ns2eth$i egress \ protocol ip prio 1000 \ handle 42 fw \ action pedit munge offset 148 u8 invert \ pipe csum tcp \ index 100 The above modifies a data byte outside the skb head and the skb is a cloned one, carrying a TCP output packet. This change addresses the issue by keeping track of a rough over-estimate highest skb offset accessed by the action and ensuring such offset is really writable. Note that this may cause performance regressions in some scenarios, but hopefully pedit is not in the critical path. Fixes: db2c24175d14 ("act_pedit: access skb->data safely") Acked-by: Mat Martineau Tested-by: Geliang Tang Signed-off-by: Paolo Abeni Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/1fcf78e6679d0a287dd61bb0f04730ce33b3255d.1652194627.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_pedit.h | 1 + net/sched/act_pedit.c | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 748cf87a4d7ea..3e02709a1df65 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -14,6 +14,7 @@ struct tcf_pedit { struct tc_action common; unsigned char tcfp_nkeys; unsigned char tcfp_flags; + u32 tcfp_off_max_hint; struct tc_pedit_key *tcfp_keys; struct tcf_pedit_key_ex *tcfp_keys_ex; }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 31fcd279c1776..0eaaf1f45de17 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -149,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *pattr; struct tcf_pedit *p; int ret = 0, err; - int ksize; + int i, ksize; u32 index; if (!nla) { @@ -228,6 +228,18 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p->tcfp_nkeys = parm->nkeys; } memcpy(p->tcfp_keys, parm->keys, ksize); + p->tcfp_off_max_hint = 0; + for (i = 0; i < p->tcfp_nkeys; ++i) { + u32 cur = p->tcfp_keys[i].off; + + /* The AT option can read a single byte, we can bound the actual + * value with uchar max. + */ + cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift; + + /* Each key touches 4 bytes starting from the computed offset */ + p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4); + } p->tcfp_flags = parm->flags; goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -308,13 +320,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); + u32 max_offset; int i; - if (skb_unclone(skb, GFP_ATOMIC)) - return p->tcf_action; - spin_lock(&p->tcf_lock); + max_offset = (skb_transport_header_was_set(skb) ? + skb_transport_offset(skb) : + skb_network_offset(skb)) + + p->tcfp_off_max_hint; + if (skb_ensure_writable(skb, min(skb->len, max_offset))) + goto unlock; + tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { @@ -403,6 +420,7 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, p->tcf_qstats.overlimits++; done: bstats_update(&p->tcf_bstats, skb); +unlock: spin_unlock(&p->tcf_lock); return p->tcf_action; } From 3f95a7472d14abef284d8968734fe2ae7ff4845f Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Tue, 10 May 2022 13:48:46 -0700 Subject: [PATCH 344/491] i40e: i40e_main: fix a missing check on list iterator The bug is here: ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err); The list iterator 'ch' will point to a bogus position containing HEAD if the list is empty or no element is found. This case must be checked before any use of the iterator, otherwise it will lead to a invalid memory access. To fix this bug, use a new variable 'iter' as the list iterator, while use the origin variable 'ch' as a dedicated pointer to point to the found element. Cc: stable@vger.kernel.org Fixes: 1d8d80b4e4ff6 ("i40e: Add macvlan support on i40e") Signed-off-by: Xiaomeng Tong Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20220510204846.2166999-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 27 +++++++++++---------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6778df2177a11..98871f0149946 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7549,42 +7549,43 @@ static void i40e_free_macvlan_channels(struct i40e_vsi *vsi) static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev, struct i40e_fwd_adapter *fwd) { + struct i40e_channel *ch = NULL, *ch_tmp, *iter; int ret = 0, num_tc = 1, i, aq_err; - struct i40e_channel *ch, *ch_tmp; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - if (list_empty(&vsi->macvlan_list)) - return -EINVAL; - /* Go through the list and find an available channel */ - list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { - if (!i40e_is_channel_macvlan(ch)) { - ch->fwd = fwd; + list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) { + if (!i40e_is_channel_macvlan(iter)) { + iter->fwd = fwd; /* record configuration for macvlan interface in vdev */ for (i = 0; i < num_tc; i++) netdev_bind_sb_channel_queue(vsi->netdev, vdev, i, - ch->num_queue_pairs, - ch->base_queue); - for (i = 0; i < ch->num_queue_pairs; i++) { + iter->num_queue_pairs, + iter->base_queue); + for (i = 0; i < iter->num_queue_pairs; i++) { struct i40e_ring *tx_ring, *rx_ring; u16 pf_q; - pf_q = ch->base_queue + i; + pf_q = iter->base_queue + i; /* Get to TX ring ptr */ tx_ring = vsi->tx_rings[pf_q]; - tx_ring->ch = ch; + tx_ring->ch = iter; /* Get the RX ring ptr */ rx_ring = vsi->rx_rings[pf_q]; - rx_ring->ch = ch; + rx_ring->ch = iter; } + ch = iter; break; } } + if (!ch) + return -EINVAL; + /* Guarantee all rings are updated before we update the * MAC address filter. */ From 9c2136be0878c88c53dea26943ce40bb03ad8d8d Mon Sep 17 00:00:00 2001 From: Delyan Kratunov Date: Wed, 11 May 2022 18:28:36 +0000 Subject: [PATCH 345/491] sched/tracing: Append prev_state to tp args instead Commit fa2c3254d7cf (sched/tracing: Don't re-read p->state when emitting sched_switch event, 2022-01-20) added a new prev_state argument to the sched_switch tracepoint, before the prev task_struct pointer. This reordering of arguments broke BPF programs that use the raw tracepoint (e.g. tp_btf programs). The type of the second argument has changed and existing programs that assume a task_struct* argument (e.g. for bpf_task_storage access) will now fail to verify. If we instead append the new argument to the end, all existing programs would continue to work and can conditionally extract the prev_state argument on supported kernel versions. Fixes: fa2c3254d7cf (sched/tracing: Don't re-read p->state when emitting sched_switch event, 2022-01-20) Signed-off-by: Delyan Kratunov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt (Google) Link: https://lkml.kernel.org/r/c8a6930dfdd58a4a5755fc01732675472979732b.camel@fb.com --- include/trace/events/sched.h | 6 +++--- kernel/sched/core.c | 2 +- kernel/trace/fgraph.c | 4 ++-- kernel/trace/ftrace.c | 4 ++-- kernel/trace/trace_events.c | 8 ++++---- kernel/trace/trace_osnoise.c | 4 ++-- kernel/trace/trace_sched_switch.c | 4 ++-- kernel/trace/trace_sched_wakeup.c | 4 ++-- samples/trace_events/trace_custom_sched.h | 6 +++--- 9 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 65e7867563214..fbb99a61f714c 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -222,11 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt, TRACE_EVENT(sched_switch, TP_PROTO(bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next), + struct task_struct *next, + unsigned int prev_state), - TP_ARGS(preempt, prev_state, prev, next), + TP_ARGS(preempt, prev, next, prev_state), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 51efaabac3e43..d58c0389eb23c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6382,7 +6382,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) migrate_disable_switch(rq, prev); psi_sched_switch(prev, next, !task_on_rq_queued(prev)); - trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next); + trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); /* Also unlocks the rq: */ rq = context_switch(rq, prev, next, &rf); diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 8f4fb328133ab..a7e84c8543cb7 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -404,9 +404,9 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) static void ftrace_graph_probe_sched_switch(void *ignore, bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next) + struct task_struct *next, + unsigned int prev_state) { unsigned long long timestamp; int index; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4f1d2f5e72634..af899b058c8d0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7420,9 +7420,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) static void ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next) + struct task_struct *next, + unsigned int prev_state) { struct trace_array *tr = data; struct trace_pid_list *pid_list; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e11e167b78090..f97de82d1342a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -773,9 +773,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable) static void event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next) + struct task_struct *next, + unsigned int prev_state) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; @@ -799,9 +799,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, static void event_filter_pid_sched_switch_probe_post(void *data, bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next) + struct task_struct *next, + unsigned int prev_state) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index e9ae1f33a7f03..afb92e2f0aeab 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1168,9 +1168,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) */ static void trace_sched_switch_callback(void *data, bool preempt, - unsigned int prev_state, struct task_struct *p, - struct task_struct *n) + struct task_struct *n, + unsigned int prev_state) { struct osnoise_variables *osn_var = this_cpu_osn_var(); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 45796d8bd4b2d..c9ffdcfe622e1 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -22,8 +22,8 @@ static DEFINE_MUTEX(sched_register_mutex); static void probe_sched_switch(void *ignore, bool preempt, - unsigned int prev_state, - struct task_struct *prev, struct task_struct *next) + struct task_struct *prev, struct task_struct *next, + unsigned int prev_state) { int flags; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 46429f9a96faf..330aee1c1a49e 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -426,8 +426,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, static void notrace probe_wakeup_sched_switch(void *ignore, bool preempt, - unsigned int prev_state, - struct task_struct *prev, struct task_struct *next) + struct task_struct *prev, struct task_struct *next, + unsigned int prev_state) { struct trace_array_cpu *data; u64 T0, T1, delta; diff --git a/samples/trace_events/trace_custom_sched.h b/samples/trace_events/trace_custom_sched.h index 9fdd8e7c2a458..951388334a3fa 100644 --- a/samples/trace_events/trace_custom_sched.h +++ b/samples/trace_events/trace_custom_sched.h @@ -25,11 +25,11 @@ TRACE_CUSTOM_EVENT(sched_switch, * that the custom event is using. */ TP_PROTO(bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next), + struct task_struct *next, + unsigned int prev_state), - TP_ARGS(preempt, prev_state, prev, next), + TP_ARGS(preempt, prev, next, prev_state), /* * The next fields are where the customization happens. From 103a2f3255a95991252f8f13375c3a96a75011cd Mon Sep 17 00:00:00 2001 From: Itay Iellin Date: Sat, 7 May 2022 08:32:48 -0400 Subject: [PATCH 346/491] Bluetooth: Fix the creation of hdev->name Set a size limit of 8 bytes of the written buffer to "hdev->name" including the terminating null byte, as the size of "hdev->name" is 8 bytes. If an id value which is greater than 9999 is allocated, then the "snprintf(hdev->name, sizeof(hdev->name), "hci%d", id)" function call would lead to a truncation of the id value in decimal notation. Set an explicit maximum id parameter in the id allocation function call. The id allocation function defines the maximum allocated id value as the maximum id parameter value minus one. Therefore, HCI_MAX_ID is defined as 10000. Signed-off-by: Itay Iellin Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 3 +++ net/bluetooth/hci_core.c | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 8abd082453263..62d7b81b1cb74 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -36,6 +36,9 @@ /* HCI priority */ #define HCI_PRIO_MAX 7 +/* HCI maximum id value */ +#define HCI_MAX_ID 10000 + /* HCI Core structures */ struct inquiry_data { bdaddr_t bdaddr; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b4782a6c1025d..45c2dd2e15905 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2555,10 +2555,10 @@ int hci_register_dev(struct hci_dev *hdev) */ switch (hdev->dev_type) { case HCI_PRIMARY: - id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL); + id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL); break; case HCI_AMP: - id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL); + id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL); break; default: return -EINVAL; @@ -2567,7 +2567,7 @@ int hci_register_dev(struct hci_dev *hdev) if (id < 0) return id; - sprintf(hdev->name, "hci%d", id); + snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); hdev->id = id; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); From 00832b1d1a393dfb1b9491d085e5b27e8c25d103 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 11 May 2022 11:08:29 +0800 Subject: [PATCH 347/491] net: ethernet: mediatek: ppe: fix wrong size passed to memset() 'foe_table' is a pointer, the real size of struct mtk_foe_entry should be pass to memset(). Fixes: ba37b7caf1ed ("net: ethernet: mtk_eth_soc: add support for initializing the PPE") Signed-off-by: Yang Yingliang Acked-by: Felix Fietkau Link: https://lore.kernel.org/r/20220511030829.3308094-1-yangyingliang@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_ppe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 3ad10c793308e..66298e2235c91 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -395,7 +395,7 @@ static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 }; int i, k; - memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table)); + memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table)); if (!IS_ENABLED(CONFIG_SOC_MT7621)) return; From 6b77c06655b8a749c1a3d9ebc51e9717003f7e5a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 May 2022 20:17:51 -0700 Subject: [PATCH 348/491] net: bcmgenet: Check for Wake-on-LAN interrupt probe deferral The interrupt controller supplying the Wake-on-LAN interrupt line maybe modular on some platforms (irq-bcm7038-l1.c) and might be probed at a later time than the GENET driver. We need to specifically check for -EPROBE_DEFER and propagate that error to ensure that we eventually fetch the interrupt descriptor. Fixes: 9deb48b53e7f ("bcmgenet: add WOL IRQ check") Fixes: 5b1f0e62941b ("net: bcmgenet: Avoid touching non-existent interrupt") Signed-off-by: Florian Fainelli Reviewed-by: Stefan Wahren Link: https://lore.kernel.org/r/20220511031752.2245566-1-f.fainelli@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index bf1ec8fdc2adc..e87e46c47387e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3999,6 +3999,10 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } priv->wol_irq = platform_get_irq_optional(pdev, 2); + if (priv->wol_irq == -EPROBE_DEFER) { + err = priv->wol_irq; + goto err; + } priv->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->base)) { From 6fed53de560768bde6d701a7c79c253b45b259e3 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Tue, 10 May 2022 21:51:48 +0800 Subject: [PATCH 349/491] drm/vc4: hdmi: Fix build error for implicit function declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/vc4/vc4_hdmi.c: In function ‘vc4_hdmi_connector_detect’: drivers/gpu/drm/vc4/vc4_hdmi.c:228:7: error: implicit declaration of function ‘gpiod_get_value_cansleep’; did you mean ‘gpio_get_value_cansleep’? [-Werror=implicit-function-declaration] if (gpiod_get_value_cansleep(vc4_hdmi->hpd_gpio)) ^~~~~~~~~~~~~~~~~~~~~~~~ gpio_get_value_cansleep CC [M] drivers/gpu/drm/vc4/vc4_validate.o CC [M] drivers/gpu/drm/vc4/vc4_v3d.o CC [M] drivers/gpu/drm/vc4/vc4_validate_shaders.o CC [M] drivers/gpu/drm/vc4/vc4_debugfs.o drivers/gpu/drm/vc4/vc4_hdmi.c: In function ‘vc4_hdmi_bind’: drivers/gpu/drm/vc4/vc4_hdmi.c:2883:23: error: implicit declaration of function ‘devm_gpiod_get_optional’; did you mean ‘devm_clk_get_optional’? [-Werror=implicit-function-declaration] vc4_hdmi->hpd_gpio = devm_gpiod_get_optional(dev, "hpd", GPIOD_IN); ^~~~~~~~~~~~~~~~~~~~~~~ devm_clk_get_optional drivers/gpu/drm/vc4/vc4_hdmi.c:2883:59: error: ‘GPIOD_IN’ undeclared (first use in this function); did you mean ‘GPIOF_IN’? vc4_hdmi->hpd_gpio = devm_gpiod_get_optional(dev, "hpd", GPIOD_IN); ^~~~~~~~ GPIOF_IN drivers/gpu/drm/vc4/vc4_hdmi.c:2883:59: note: each undeclared identifier is reported only once for each function it appears in cc1: all warnings being treated as errors Fixes: 6800234ceee0 ("drm/vc4: hdmi: Convert to gpiod") Signed-off-by: Hui Tang Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220510135148.247719-1-tanghui20@huawei.com --- drivers/gpu/drm/vc4/vc4_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 6c58b0fd13fbb..98b78ec6b37d6 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include From 2de7689c7caa55949da5a56be02ec41a86ac1725 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 11 May 2022 17:20:30 +0100 Subject: [PATCH 350/491] arm64: cpufeature: remove duplicate ID_AA64ISAR2_EL1 entry The ID register table should have one entry per ID register but currently has two entries for ID_AA64ISAR2_EL1. Only one entry has an override, and get_arm64_ftr_reg() can end up choosing the other, causing the override to be ignored. Fix this by removing the duplicate entry. While here, also make the check in sort_ftr_regs() more strict so that duplicate entries can't be added in the future. Fixes: def8c222f054 ("arm64: Add support of PAuth QARMA3 architected algorithm") Signed-off-by: Kristina Martsenko Reviewed-by: Vladimir Murzin Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20220511162030.1403386-1-kristina.martsenko@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d72c4b4d389c4..2cb9cc9e0eff1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -654,7 +654,6 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, &id_aa64isar1_override), - ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, &id_aa64isar2_override), @@ -810,7 +809,7 @@ static void __init sort_ftr_regs(void) * to sys_id for subsequent binary search in get_arm64_ftr_reg() * to work correctly. */ - BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id); + BUG_ON(arm64_ftr_regs[i].sys_id <= arm64_ftr_regs[i - 1].sys_id); } } From 810c2f0a3f86158c1e02e74947b66d811473434a Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 11 May 2022 14:57:47 +0300 Subject: [PATCH 351/491] mlxsw: Avoid warning during ip6gre device removal IPv6 addresses which are used for tunnels are stored in a hash table with reference counting. When a new GRE tunnel is configured, the driver is notified and configures it in hardware. Currently, any change in the tunnel is not applied in the driver. It means that if the remote address is changed, the driver is not aware of this change and the first address will be used. This behavior results in a warning [1] in scenarios such as the following: # ip link add name gre1 type ip6gre local 2000::3 remote 2000::fffe tos inherit ttl inherit # ip link set name gre1 type ip6gre local 2000::3 remote 2000::ffff ttl inherit # ip link delete gre1 The change of the address is not applied in the driver. Currently, the driver uses the remote address which is stored in the 'parms' of the overlay device. When the tunnel is removed, the new IPv6 address is used, the driver tries to release it, but as it is not aware of the change, this address is not configured and it warns about releasing non existing IPv6 address. Fix it by using the IPv6 address which is cached in the IPIP entry, this address is the last one that the driver used, so even in cases such the above, the first address will be released, without any warning. [1]: WARNING: CPU: 1 PID: 2197 at drivers/net/ethernet/mellanox/mlxsw/spectrum.c:2920 mlxsw_sp_ipv6_addr_put+0x146/0x220 [mlxsw_spectrum] ... CPU: 1 PID: 2197 Comm: ip Not tainted 5.17.0-rc8-custom-95062-gc1e5ded51a9a #84 Hardware name: Mellanox Technologies Ltd. MSN4700/VMOD0010, BIOS 5.11 07/12/2021 RIP: 0010:mlxsw_sp_ipv6_addr_put+0x146/0x220 [mlxsw_spectrum] ... Call Trace: mlxsw_sp2_ipip_rem_addr_unset_gre6+0xf1/0x120 [mlxsw_spectrum] mlxsw_sp_netdevice_ipip_ol_event+0xdb/0x640 [mlxsw_spectrum] mlxsw_sp_netdevice_event+0xc4/0x850 [mlxsw_spectrum] raw_notifier_call_chain+0x3c/0x50 call_netdevice_notifiers_info+0x2f/0x80 unregister_netdevice_many+0x311/0x6d0 rtnl_dellink+0x136/0x360 rtnetlink_rcv_msg+0x12f/0x380 netlink_rcv_skb+0x49/0xf0 netlink_unicast+0x233/0x340 netlink_sendmsg+0x202/0x440 ____sys_sendmsg+0x1f3/0x220 ___sys_sendmsg+0x70/0xb0 __sys_sendmsg+0x54/0xa0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: e846efe2737b ("mlxsw: spectrum: Add hash table for IPv6 address mapping") Reported-by: Maksym Yaremchuk Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20220511115747.238602-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 01cf5a6a26bd3..a2ee695a3f178 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -568,10 +568,8 @@ static int mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry) { - struct __ip6_tnl_parm parms6; - - parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev); - return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, &parms6.raddr, + return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, + &ipip_entry->parms.daddr.addr6, &ipip_entry->dip_kvdl_index); } @@ -579,10 +577,7 @@ static void mlxsw_sp2_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_ipip_entry *ipip_entry) { - struct __ip6_tnl_parm parms6; - - parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev); - mlxsw_sp_ipv6_addr_put(mlxsw_sp, &parms6.raddr); + mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipip_entry->parms.daddr.addr6); } static const struct mlxsw_sp_ipip_ops mlxsw_sp2_ipip_gre6_ops = { From 51f559d66527e238f9a5f82027bff499784d4eac Mon Sep 17 00:00:00 2001 From: Shreyas K K Date: Thu, 12 May 2022 16:31:34 +0530 Subject: [PATCH 352/491] arm64: Enable repeat tlbi workaround on KRYO4XX gold CPUs Add KRYO4XX gold/big cores to the list of CPUs that need the repeat TLBI workaround. Apply this to the affected KRYO4XX cores (rcpe to rfpe). The variant and revision bits are implementation defined and are different from the their Cortex CPU counterparts on which they are based on, i.e., (r0p0 to r3p0) is equivalent to (rcpe to rfpe). Signed-off-by: Shreyas K K Reviewed-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/20220512110134.12179-1-quic_shrekk@quicinc.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 3 +++ arch/arm64/kernel/cpu_errata.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 466cb9e89047f..d27db84d585ed 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -189,6 +189,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1024718 | +----------------+-----------------+-----------------+-----------------------------+ +| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1286807 | ++----------------+-----------------+-----------------+-----------------------------+ + +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 4c9b5b4b7a0bc..a0f3d0aaa3c53 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_ARM64_ERRATUM_1286807 { ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */ + ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, #endif {}, From 2d2d5cb6ca8424fa849ebb4edb8e8022c13860c7 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Thu, 12 May 2022 02:18:33 -0700 Subject: [PATCH 353/491] io_uring: fix ordering of args in io_uring_queue_async_work Fix arg ordering in TP_ARGS macro, which fixes the output. Fixes: 502c87d65564c ("io-uring: Make tracepoints consistent.") Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220512091834.728610-2-dylany@fb.com Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index cddf5b6fbeb45..80d2588a090c4 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -147,7 +147,7 @@ TRACE_EVENT(io_uring_queue_async_work, TP_PROTO(void *ctx, void * req, unsigned long long user_data, u8 opcode, unsigned int flags, struct io_wq_work *work, int rw), - TP_ARGS(ctx, req, user_data, flags, opcode, work, rw), + TP_ARGS(ctx, req, user_data, opcode, flags, work, rw), TP_STRUCT__entry ( __field( void *, ctx ) From b28cb0cd2c5e80a8c0feb408a0e4b0dbb6d132c5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 May 2022 14:51:22 +0000 Subject: [PATCH 354/491] KVM: x86/mmu: Update number of zapped pages even if page list is stable When zapping obsolete pages, update the running count of zapped pages regardless of whether or not the list has become unstable due to zapping a shadow page with its own child shadow pages. If the VM is backed by mostly 4kb pages, KVM can zap an absurd number of SPTEs without bumping the batch count and thus without yielding. In the worst case scenario, this can cause a soft lokcup. watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [dirty_log_perf_:13020] RIP: 0010:workingset_activation+0x19/0x130 mark_page_accessed+0x266/0x2e0 kvm_set_pfn_accessed+0x31/0x40 mmu_spte_clear_track_bits+0x136/0x1c0 drop_spte+0x1a/0xc0 mmu_page_zap_pte+0xef/0x120 __kvm_mmu_prepare_zap_page+0x205/0x5e0 kvm_mmu_zap_all_fast+0xd7/0x190 kvm_mmu_invalidate_zap_pages_in_memslot+0xe/0x10 kvm_page_track_flush_slot+0x5c/0x80 kvm_arch_flush_shadow_memslot+0xe/0x10 kvm_set_memslot+0x1a8/0x5d0 __kvm_set_memory_region+0x337/0x590 kvm_vm_ioctl+0xb08/0x1040 Fixes: fbb158cb88b6 ("KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch""") Reported-by: David Matlack Reviewed-by: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220511145122.3133334-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 311e4e1d7870e..56ebc4fb7f917 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5665,6 +5665,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; int nr_zapped, batch = 0; + bool unstable; restart: list_for_each_entry_safe_reverse(sp, node, @@ -5696,11 +5697,12 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) goto restart; } - if (__kvm_mmu_prepare_zap_page(kvm, sp, - &kvm->arch.zapped_obsolete_pages, &nr_zapped)) { - batch += nr_zapped; + unstable = __kvm_mmu_prepare_zap_page(kvm, sp, + &kvm->arch.zapped_obsolete_pages, &nr_zapped); + batch += nr_zapped; + + if (unstable) goto restart; - } } /* From 4031cd95cba70c72e4cadc2d46624bcd31e5a6c0 Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Tue, 10 May 2022 13:13:00 +0800 Subject: [PATCH 355/491] usb: typec: tcpci_mt6360: Update for BMC PHY setting Update MT6360 BMC PHY Tx/Rx setting for the compatibility. Macpaul reported this CtoDP cable attention message cannot be received from MT6360 TCPC. But actually, attention message really sent from UFP_D device. After RD's comment, there may be BMC PHY Tx/Rx setting causes this issue. Below's the detailed TCPM log and DP attention message didn't received from 6360 TCPCI. [ 1206.367775] Identity: 0000:0000.0000 [ 1206.416570] Alternate mode 0: SVID 0xff01, VDO 1: 0x00000405 [ 1206.447378] AMS DFP_TO_UFP_ENTER_MODE start [ 1206.447383] PD TX, header: 0x1d6f [ 1206.449393] PD TX complete, status: 0 [ 1206.454110] PD RX, header: 0x184f [1] [ 1206.456867] Rx VDM cmd 0xff018144 type 1 cmd 4 len 1 [ 1206.456872] AMS DFP_TO_UFP_ENTER_MODE finished [ 1206.456873] cc:=4 [ 1206.473100] AMS STRUCTURED_VDMS start [ 1206.473103] PD TX, header: 0x2f6f [ 1206.475397] PD TX complete, status: 0 [ 1206.480442] PD RX, header: 0x2a4f [1] [ 1206.483145] Rx VDM cmd 0xff018150 type 1 cmd 16 len 2 [ 1206.483150] AMS STRUCTURED_VDMS finished [ 1206.483151] cc:=4 [ 1206.505643] AMS STRUCTURED_VDMS start [ 1206.505646] PD TX, header: 0x216f [ 1206.507933] PD TX complete, status: 0 [ 1206.512664] PD RX, header: 0x1c4f [1] [ 1206.515456] Rx VDM cmd 0xff018151 type 1 cmd 17 len 1 [ 1206.515460] AMS STRUCTURED_VDMS finished [ 1206.515461] cc:=4 Fixes: e1aefcdd394fd ("usb typec: mt6360: Add support for mt6360 Type-C driver") Cc: stable Reported-by: Macpaul Lin Tested-by: Macpaul Lin Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: ChiYuan Huang Signed-off-by: Fabien Parent Link: https://lore.kernel.org/r/1652159580-30959-1-git-send-email-u0084500@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci_mt6360.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpci_mt6360.c b/drivers/usb/typec/tcpm/tcpci_mt6360.c index f1bd9e09bc87f..8a952eaf90163 100644 --- a/drivers/usb/typec/tcpm/tcpci_mt6360.c +++ b/drivers/usb/typec/tcpm/tcpci_mt6360.c @@ -15,6 +15,9 @@ #include "tcpci.h" +#define MT6360_REG_PHYCTRL1 0x80 +#define MT6360_REG_PHYCTRL3 0x82 +#define MT6360_REG_PHYCTRL7 0x86 #define MT6360_REG_VCONNCTRL1 0x8C #define MT6360_REG_MODECTRL2 0x8F #define MT6360_REG_SWRESET 0xA0 @@ -22,6 +25,8 @@ #define MT6360_REG_DRPCTRL1 0xA2 #define MT6360_REG_DRPCTRL2 0xA3 #define MT6360_REG_I2CTORST 0xBF +#define MT6360_REG_PHYCTRL11 0xCA +#define MT6360_REG_RXCTRL1 0xCE #define MT6360_REG_RXCTRL2 0xCF #define MT6360_REG_CTDCTRL2 0xEC @@ -106,6 +111,27 @@ static int mt6360_tcpc_init(struct tcpci *tcpci, struct tcpci_data *tdata) if (ret) return ret; + /* BMC PHY */ + ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL1, 0x3A70); + if (ret) + return ret; + + ret = regmap_write(regmap, MT6360_REG_PHYCTRL3, 0x82); + if (ret) + return ret; + + ret = regmap_write(regmap, MT6360_REG_PHYCTRL7, 0x36); + if (ret) + return ret; + + ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL11, 0x3C60); + if (ret) + return ret; + + ret = regmap_write(regmap, MT6360_REG_RXCTRL1, 0xE8); + if (ret) + return ret; + /* Set shipping mode off, AUTOIDLE on */ return regmap_write(regmap, MT6360_REG_MODECTRL2, 0x7A); } From 5f0b5f4d50fa0faa8c76ef9d42a42e8d43f98b44 Mon Sep 17 00:00:00 2001 From: Schspa Shi Date: Sun, 8 May 2022 23:02:47 +0800 Subject: [PATCH 356/491] usb: gadget: fix race when gadget driver register via ioctl The usb_gadget_register_driver can be called multi time by to threads via USB_RAW_IOCTL_RUN ioctl syscall, which will lead to multiple registrations. Call trace: driver_register+0x220/0x3a0 drivers/base/driver.c:171 usb_gadget_register_driver_owner+0xfb/0x1e0 drivers/usb/gadget/udc/core.c:1546 raw_ioctl_run drivers/usb/gadget/legacy/raw_gadget.c:513 [inline] raw_ioctl+0x1883/0x2730 drivers/usb/gadget/legacy/raw_gadget.c:1220 ioctl USB_RAW_IOCTL_RUN This routine allows two processes to register the same driver instance via ioctl syscall. which lead to a race condition. Please refer to the following scenarios. T1 T2 ------------------------------------------------------------------ usb_gadget_register_driver_owner driver_register driver_register driver_find driver_find bus_add_driver bus_add_driver priv alloced drv->p = priv; kobject_init_and_add // refcount = 1; //couldn't find an available UDC or it's busy priv alloced drv->priv = priv; kobject_init_and_add ---> refcount = 1 <------ // register success ===================== another ioctl/process ====================== driver_register driver_find k = kset_find_obj() ---> refcount = 2 <------ driver_unregister // drv->p become T2's priv ---> refcount = 1 <------ kobject_put(k) ---> refcount = 0 <------ return priv->driver; --------UAF here---------- There will be UAF in this scenario. We can fix it by adding a new STATE_DEV_REGISTERING device state to avoid double register. Reported-by: syzbot+dc7c3ca638e773db07f6@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/000000000000e66c2805de55b15a@google.com/ Reviewed-by: Andrey Konovalov Signed-off-by: Schspa Shi Link: https://lore.kernel.org/r/20220508150247.38204-1-schspa@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/raw_gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 8d40a1f2ec57d..e9440f7bf019d 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -145,6 +145,7 @@ enum dev_state { STATE_DEV_INVALID = 0, STATE_DEV_OPENED, STATE_DEV_INITIALIZED, + STATE_DEV_REGISTERING, STATE_DEV_RUNNING, STATE_DEV_CLOSED, STATE_DEV_FAILED @@ -508,6 +509,7 @@ static int raw_ioctl_run(struct raw_dev *dev, unsigned long value) ret = -EINVAL; goto out_unlock; } + dev->state = STATE_DEV_REGISTERING; spin_unlock_irqrestore(&dev->lock, flags); ret = usb_gadget_probe_driver(&dev->driver); From c237566b78ad8c72bc0431c5d6171db8d12e6f94 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 12 May 2022 14:49:30 +0800 Subject: [PATCH 357/491] usb: xhci-mtk: fix fs isoc's transfer error Due to the scheduler allocates the optimal bandwidth for FS ISOC endpoints, this may be not enough actually and causes data transfer error, so come up with an estimate that is no less than the worst case bandwidth used for any one mframe, but may be an over-estimate. Fixes: 451d3912586a ("usb: xhci-mtk: update fs bus bandwidth by bw_budget_table") Cc: stable@vger.kernel.org Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20220512064931.31670-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index f3139ce7b0a93..953d2cd1d4cc1 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -464,7 +464,7 @@ static int check_fs_bus_bw(struct mu3h_sch_ep_info *sch_ep, int offset) */ for (j = 0; j < sch_ep->num_budget_microframes; j++) { k = XHCI_MTK_BW_INDEX(base + j); - tmp = tt->fs_bus_bw[k] + sch_ep->bw_budget_table[j]; + tmp = tt->fs_bus_bw[k] + sch_ep->bw_cost_per_microframe; if (tmp > FS_PAYLOAD_MAX) return -ESCH_BW_OVERFLOW; } @@ -538,19 +538,17 @@ static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset) static void update_sch_tt(struct mu3h_sch_ep_info *sch_ep, bool used) { struct mu3h_sch_tt *tt = sch_ep->sch_tt; + int bw_updated; u32 base; - int i, j, k; + int i, j; + + bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1); for (i = 0; i < sch_ep->num_esit; i++) { base = sch_ep->offset + i * sch_ep->esit; - for (j = 0; j < sch_ep->num_budget_microframes; j++) { - k = XHCI_MTK_BW_INDEX(base + j); - if (used) - tt->fs_bus_bw[k] += sch_ep->bw_budget_table[j]; - else - tt->fs_bus_bw[k] -= sch_ep->bw_budget_table[j]; - } + for (j = 0; j < sch_ep->num_budget_microframes; j++) + tt->fs_bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated; } if (used) From 1645eee0d7f623660e6ce0f1aef4591788a0c9da Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 12 May 2022 14:49:31 +0800 Subject: [PATCH 358/491] usb: xhci-mtk: remove bandwidth budget table The bandwidth budget table is introduced to trace ideal bandwidth used by each INT/ISOC endpoint, but in fact the endpoint may consume more bandwidth and cause data transfer error, so it's better to leave some margin. Obviously it's difficult to find the best margin for all cases, instead take use of the worst-case scenario. Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20220512064931.31670-2-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 74 ++++++--------------------------- drivers/usb/host/xhci-mtk.h | 2 - 2 files changed, 12 insertions(+), 64 deletions(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 953d2cd1d4cc1..06a6b19acaae6 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -19,11 +19,6 @@ #define HS_BW_BOUNDARY 6144 /* usb2 spec section11.18.1: at most 188 FS bytes per microframe */ #define FS_PAYLOAD_MAX 188 -/* - * max number of microframes for split transfer, - * for fs isoc in : 1 ss + 1 idle + 7 cs - */ -#define TT_MICROFRAMES_MAX 9 #define DBG_BUF_EN 64 @@ -242,28 +237,17 @@ static void drop_tt(struct usb_device *udev) static struct mu3h_sch_ep_info * create_sch_ep(struct xhci_hcd_mtk *mtk, struct usb_device *udev, - struct usb_host_endpoint *ep, struct xhci_ep_ctx *ep_ctx) + struct usb_host_endpoint *ep) { struct mu3h_sch_ep_info *sch_ep; struct mu3h_sch_bw_info *bw_info; struct mu3h_sch_tt *tt = NULL; - u32 len_bw_budget_table; bw_info = get_bw_info(mtk, udev, ep); if (!bw_info) return ERR_PTR(-ENODEV); - if (is_fs_or_ls(udev->speed)) - len_bw_budget_table = TT_MICROFRAMES_MAX; - else if ((udev->speed >= USB_SPEED_SUPER) - && usb_endpoint_xfer_isoc(&ep->desc)) - len_bw_budget_table = get_esit(ep_ctx); - else - len_bw_budget_table = 1; - - sch_ep = kzalloc(struct_size(sch_ep, bw_budget_table, - len_bw_budget_table), - GFP_KERNEL); + sch_ep = kzalloc(sizeof(*sch_ep), GFP_KERNEL); if (!sch_ep) return ERR_PTR(-ENOMEM); @@ -295,8 +279,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx, u32 mult; u32 esit_pkts; u32 max_esit_payload; - u32 *bwb_table = sch_ep->bw_budget_table; - int i; ep_type = CTX_TO_EP_TYPE(le32_to_cpu(ep_ctx->ep_info2)); maxpkt = MAX_PACKET_DECODED(le32_to_cpu(ep_ctx->ep_info2)); @@ -332,7 +314,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx, */ sch_ep->pkts = max_burst + 1; sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts; - bwb_table[0] = sch_ep->bw_cost_per_microframe; } else if (sch_ep->speed >= USB_SPEED_SUPER) { /* usb3_r1 spec section4.4.7 & 4.4.8 */ sch_ep->cs_count = 0; @@ -349,7 +330,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx, if (ep_type == INT_IN_EP || ep_type == INT_OUT_EP) { sch_ep->pkts = esit_pkts; sch_ep->num_budget_microframes = 1; - bwb_table[0] = maxpkt * sch_ep->pkts; } if (ep_type == ISOC_IN_EP || ep_type == ISOC_OUT_EP) { @@ -366,15 +346,8 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx, DIV_ROUND_UP(esit_pkts, sch_ep->pkts); sch_ep->repeat = !!(sch_ep->num_budget_microframes > 1); - sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts; - - for (i = 0; i < sch_ep->num_budget_microframes - 1; i++) - bwb_table[i] = sch_ep->bw_cost_per_microframe; - - /* last one <= bw_cost_per_microframe */ - bwb_table[i] = maxpkt * esit_pkts - - i * sch_ep->bw_cost_per_microframe; } + sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts; } else if (is_fs_or_ls(sch_ep->speed)) { sch_ep->pkts = 1; /* at most one packet for each microframe */ @@ -384,28 +357,7 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx, */ sch_ep->cs_count = DIV_ROUND_UP(maxpkt, FS_PAYLOAD_MAX); sch_ep->num_budget_microframes = sch_ep->cs_count; - sch_ep->bw_cost_per_microframe = - (maxpkt < FS_PAYLOAD_MAX) ? maxpkt : FS_PAYLOAD_MAX; - - /* init budget table */ - if (ep_type == ISOC_OUT_EP) { - for (i = 0; i < sch_ep->num_budget_microframes; i++) - bwb_table[i] = sch_ep->bw_cost_per_microframe; - } else if (ep_type == INT_OUT_EP) { - /* only first one consumes bandwidth, others as zero */ - bwb_table[0] = sch_ep->bw_cost_per_microframe; - } else { /* INT_IN_EP or ISOC_IN_EP */ - bwb_table[0] = 0; /* start split */ - bwb_table[1] = 0; /* idle */ - /* - * due to cs_count will be updated according to cs - * position, assign all remainder budget array - * elements as @bw_cost_per_microframe, but only first - * @num_budget_microframes elements will be used later - */ - for (i = 2; i < TT_MICROFRAMES_MAX; i++) - bwb_table[i] = sch_ep->bw_cost_per_microframe; - } + sch_ep->bw_cost_per_microframe = min_t(u32, maxpkt, FS_PAYLOAD_MAX); } } @@ -422,7 +374,7 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw, for (j = 0; j < sch_ep->num_budget_microframes; j++) { k = XHCI_MTK_BW_INDEX(base + j); - bw = sch_bw->bus_bw[k] + sch_ep->bw_budget_table[j]; + bw = sch_bw->bus_bw[k] + sch_ep->bw_cost_per_microframe; if (bw > max_bw) max_bw = bw; } @@ -433,18 +385,16 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw, static void update_bus_bw(struct mu3h_sch_bw_info *sch_bw, struct mu3h_sch_ep_info *sch_ep, bool used) { + int bw_updated; u32 base; - int i, j, k; + int i, j; + + bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1); for (i = 0; i < sch_ep->num_esit; i++) { base = sch_ep->offset + i * sch_ep->esit; - for (j = 0; j < sch_ep->num_budget_microframes; j++) { - k = XHCI_MTK_BW_INDEX(base + j); - if (used) - sch_bw->bus_bw[k] += sch_ep->bw_budget_table[j]; - else - sch_bw->bus_bw[k] -= sch_ep->bw_budget_table[j]; - } + for (j = 0; j < sch_ep->num_budget_microframes; j++) + sch_bw->bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated; } } @@ -708,7 +658,7 @@ static int add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, xhci_dbg(xhci, "%s %s\n", __func__, decode_ep(ep, udev->speed)); - sch_ep = create_sch_ep(mtk, udev, ep, ep_ctx); + sch_ep = create_sch_ep(mtk, udev, ep); if (IS_ERR_OR_NULL(sch_ep)) return -ENOMEM; diff --git a/drivers/usb/host/xhci-mtk.h b/drivers/usb/host/xhci-mtk.h index ffd4b493b4ba7..1174a510dd388 100644 --- a/drivers/usb/host/xhci-mtk.h +++ b/drivers/usb/host/xhci-mtk.h @@ -83,7 +83,6 @@ struct mu3h_sch_bw_info { * times; 1: distribute the (bMaxBurst+1)*(Mult+1) packets * according to @pkts and @repeat. normal mode is used by * default - * @bw_budget_table: table to record bandwidth budget per microframe */ struct mu3h_sch_ep_info { u32 esit; @@ -109,7 +108,6 @@ struct mu3h_sch_ep_info { u32 pkts; u32 cs_count; u32 burst_mode; - u32 bw_budget_table[]; }; #define MU3C_U3_PORT_MAX 4 From b7be130c5d52e5224ac7d89568737b37b4c4b785 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 May 2022 19:17:31 -0700 Subject: [PATCH 359/491] net: dsa: bcm_sf2: Fix Wake-on-LAN with mac_link_down() After commit 2d1f90f9ba83 ("net: dsa/bcm_sf2: fix incorrect usage of state->link") the interface suspend path would call our mac_link_down() call back which would forcibly set the link down, thus preventing Wake-on-LAN packets from reaching our management port. Fix this by looking at whether the port is enabled for Wake-on-LAN and not clearing the link status in that case to let packets go through. Fixes: 2d1f90f9ba83 ("net: dsa/bcm_sf2: fix incorrect usage of state->link") Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20220512021731.2494261-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index cf82b1fa97252..87e81c636339f 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -809,6 +809,9 @@ static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port, struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 reg, offset; + if (priv->wol_ports_mask & BIT(port)) + return; + if (port != core_readl(priv, CORE_IMP0_PRT_ID)) { if (priv->type == BCM4908_DEVICE_ID || priv->type == BCM7445_DEVICE_ID) From f3c46e41b32b6266cf60b0985c61748f53bf1c61 Mon Sep 17 00:00:00 2001 From: Guangguan Wang Date: Thu, 12 May 2022 11:08:20 +0800 Subject: [PATCH 360/491] net/smc: non blocking recvmsg() return -EAGAIN when no data and signal_pending Non blocking sendmsg will return -EAGAIN when any signal pending and no send space left, while non blocking recvmsg return -EINTR when signal pending and no data received. This may makes confused. As TCP returns -EAGAIN in the conditions described above. Align the behavior of smc with TCP. Fixes: 846e344eb722 ("net/smc: add receive timeout check") Signed-off-by: Guangguan Wang Reviewed-by: Tony Lu Acked-by: Karsten Graul Link: https://lore.kernel.org/r/20220512030820.73848-1-guangguan.wang@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 51e8eb2933ff4..338b9ef806e82 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -355,12 +355,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, } break; } + if (!timeo) + return -EAGAIN; if (signal_pending(current)) { read_done = sock_intr_errno(timeo); break; } - if (!timeo) - return -EAGAIN; } if (!smc_rx_data_available(conn)) { From 1fa89ffbc04545b7582518e57f4b63e2a062870f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 12 May 2022 05:47:09 +0000 Subject: [PATCH 361/491] net: sfc: ef10: fix memory leak in efx_ef10_mtd_probe() In the NIC ->probe() callback, ->mtd_probe() callback is called. If NIC has 2 ports, ->probe() is called twice and ->mtd_probe() too. In the ->mtd_probe(), which is efx_ef10_mtd_probe() it allocates and initializes mtd partiion. But mtd partition for sfc is shared data. So that allocated mtd partition data from last called efx_ef10_mtd_probe() will not be used. Therefore it must be freed. But it doesn't free a not used mtd partition data in efx_ef10_mtd_probe(). kmemleak reports: unreferenced object 0xffff88811ddb0000 (size 63168): comm "systemd-udevd", pid 265, jiffies 4294681048 (age 348.586s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmalloc_order_trace+0x19/0x120 [] __kmalloc+0x20e/0x250 [] efx_ef10_mtd_probe+0x11f/0x270 [sfc] [] efx_pci_probe.cold.17+0x3df/0x53d [sfc] [] local_pci_probe+0xdc/0x170 [] pci_device_probe+0x235/0x680 [] really_probe+0x1c2/0x8f0 [] __driver_probe_device+0x2ab/0x460 [] driver_probe_device+0x4a/0x120 [] __driver_attach+0x16e/0x320 [] bus_for_each_dev+0x110/0x190 [] bus_add_driver+0x39e/0x560 [] driver_register+0x18e/0x310 [] 0xffffffffc02e2055 [] do_one_initcall+0xc3/0x450 [] do_init_module+0x1b4/0x700 Acked-by: Martin Habets Fixes: 8127d661e77f ("sfc: Add support for Solarflare SFC9100 family") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20220512054709.12513-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef10.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 50d535981a35f..f8edb3f1b73ad 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3579,6 +3579,11 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx) n_parts++; } + if (!n_parts) { + kfree(parts); + return 0; + } + rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts)); fail: if (rc) From 3740651bf7e200109dd42d5b2fb22226b26f960a Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 12 May 2022 12:18:30 +0300 Subject: [PATCH 362/491] tls: Fix context leak on tls_device_down The commit cited below claims to fix a use-after-free condition after tls_device_down. Apparently, the description wasn't fully accurate. The context stayed alive, but ctx->netdev became NULL, and the offload was torn down without a proper fallback, so a bug was present, but a different kind of bug. Due to misunderstanding of the issue, the original patch dropped the refcount_dec_and_test line for the context to avoid the alleged premature deallocation. That line has to be restored, because it matches the refcount_inc_not_zero from the same function, otherwise the contexts that survived tls_device_down are leaked. This patch fixes the described issue by restoring refcount_dec_and_test. After this change, there is no leak anymore, and the fallback to software kTLS still works. Fixes: c55dcdd435aa ("net/tls: Fix use-after-free after the TLS device goes down and up") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220512091830.678684-1-maximmi@nvidia.com Signed-off-by: Jakub Kicinski --- net/tls/tls_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index af875ad4a822d..3919fe2c58c5c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1347,7 +1347,10 @@ static int tls_device_down(struct net_device *netdev) /* Device contexts for RX and TX will be freed in on sk_destruct * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. + * Now release the ref taken above. */ + if (refcount_dec_and_test(&ctx->refcount)) + tls_device_free_ctx(ctx); } up_write(&device_offload_lock); From 8b3b2392ed68bcd17c7eb84ca615ce1e5f115b99 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 12 May 2022 20:09:56 +0200 Subject: [PATCH 363/491] ALSA: hda/realtek: Add quirk for TongFang devices with pop noise When audio stops playing there is an audible "pop"-noise when using headphones on the TongFang GMxMRxx, GKxNRxx, GMxZGxx, GMxTGxx and GMxAGxx. This quirk fixes this mostly. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220512180956.281804-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 33c439578a610..ce2cb1986677b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9341,6 +9341,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS), + SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1111, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1119, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1129, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), From 16287397ec5c08aa58db6acf7dbc55470d78087d Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 3 May 2022 13:50:10 +0200 Subject: [PATCH 364/491] crypto: qcom-rng - fix infinite loop on requests not multiple of WORD_SZ The commit referenced in the Fixes tag removed the 'break' from the else branch in qcom_rng_read(), causing an infinite loop whenever 'max' is not a multiple of WORD_SZ. This can be reproduced e.g. by running: kcapi-rng -b 67 >/dev/null There are many ways to fix this without adding back the 'break', but they all seem more awkward than simply adding it back, so do just that. Tested on a machine with Qualcomm Amberwing processor. Fixes: a680b1832ced ("crypto: qcom-rng - ensure buffer for generate is completely filled") Cc: stable@vger.kernel.org Signed-off-by: Ondrej Mosnacek Reviewed-by: Brian Masney Signed-off-by: Herbert Xu --- drivers/crypto/qcom-rng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c index 11f30fd48c141..031b5f701a0a3 100644 --- a/drivers/crypto/qcom-rng.c +++ b/drivers/crypto/qcom-rng.c @@ -65,6 +65,7 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max) } else { /* copy only remaining bytes */ memcpy(data, &val, max - currsize); + break; } } while (currsize < max); From 280abe14b6e0a38de9cc86fe6a019523aadd8f70 Mon Sep 17 00:00:00 2001 From: Adrian-Ken Rueegsegger Date: Mon, 9 May 2022 11:06:37 +0200 Subject: [PATCH 365/491] x86/mm: Fix marking of unused sub-pmd ranges The unused part precedes the new range spanned by the start, end parameters of vmemmap_use_new_sub_pmd(). This means it actually goes from ALIGN_DOWN(start, PMD_SIZE) up to start. Use the correct address when applying the mark using memset. Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges") Signed-off-by: Adrian-Ken Rueegsegger Signed-off-by: Thomas Gleixner Reviewed-by: Oscar Salvador Reviewed-by: David Hildenbrand Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220509090637.24152-2-ken@codelabs.ch --- arch/x86/mm/init_64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 96d34ebb20a9e..e2942335d1437 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -902,6 +902,8 @@ static void __meminit vmemmap_use_sub_pmd(unsigned long start, unsigned long end static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) { + const unsigned long page = ALIGN_DOWN(start, PMD_SIZE); + vmemmap_flush_unused_pmd(); /* @@ -914,8 +916,7 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long * Mark with PAGE_UNUSED the unused parts of the new memmap range */ if (!IS_ALIGNED(start, PMD_SIZE)) - memset((void *)start, PAGE_UNUSED, - start - ALIGN_DOWN(start, PMD_SIZE)); + memset((void *)page, PAGE_UNUSED, start - page); /* * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of From 30b338ff7998b6ed7a90815870cd5db725f87168 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 12 May 2022 10:10:31 -0500 Subject: [PATCH 366/491] net: ipa: certain dropped packets aren't accounted for If an RX endpoint receives packets containing status headers, and a packet in the buffer is not dropped, ipa_endpoint_skb_copy() is responsible for wrapping the packet data in an SKB and forwarding it to ipa_modem_skb_rx() for further processing. If ipa_endpoint_skb_copy() gets a null pointer from build_skb(), it just returns early. But in the process it doesn't record that as a dropped packet in the network device statistics. Instead, call ipa_modem_skb_rx() whether or not the SKB pointer is NULL; that function ensures the statistics are properly updated. Fixes: 1b65bbcc9a710 ("net: ipa: skip SKB copy if no netdev") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_endpoint.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 888e94278a84f..cea7b2e2ce969 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1150,13 +1150,12 @@ static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint, return; skb = __dev_alloc_skb(len, GFP_ATOMIC); - if (!skb) - return; - - /* Copy the data into the socket buffer and receive it */ - skb_put(skb, len); - memcpy(skb->data, data, len); - skb->truesize += extra; + if (skb) { + /* Copy the data into the socket buffer and receive it */ + skb_put(skb, len); + memcpy(skb->data, data, len); + skb->truesize += extra; + } ipa_modem_skb_rx(endpoint->netdev, skb); } From d8290cbe1111105f92f0c8ab455bec8bf98d0630 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 12 May 2022 10:10:32 -0500 Subject: [PATCH 367/491] net: ipa: record proper RX transaction count Each time we are notified that some number of transactions on an RX channel has completed, we record the number of bytes that have been transferred since the previous notification. We also track the number of transactions completed, but that is not currently being calculated correctly; we're currently counting the number of such notifications, but each notification can represent many transaction completions. Fix this. Fixes: 650d1603825d8 ("soc: qcom: ipa: the generic software interface") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/gsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index bc981043cc808..a701178a1d139 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1367,9 +1367,10 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) struct gsi_event *event_done; struct gsi_event *event; struct gsi_trans *trans; + u32 trans_count = 0; u32 byte_count = 0; - u32 old_index; u32 event_avail; + u32 old_index; trans_info = &channel->trans_info; @@ -1390,6 +1391,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) do { trans->len = __le16_to_cpu(event->len); byte_count += trans->len; + trans_count++; /* Move on to the next event and transaction */ if (--event_avail) @@ -1401,7 +1403,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) /* We record RX bytes when they are received */ channel->byte_count += byte_count; - channel->trans_count++; + channel->trans_count += trans_count; } /* Initialize a ring, including allocating DMA memory for its entries */ From 8d017efb1eaad69f148bb99ee2c7020abdedfad1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 12 May 2022 10:10:33 -0500 Subject: [PATCH 368/491] net: ipa: get rid of a duplicate initialization In ipa_qmi_ready(), the "ipa" local variable is set when initialized, but then set again just before it's first used. One or the other is enough, so get rid of the first one. References: https://lore.kernel.org/lkml/200de1bd-0f01-c334-ca18-43eed783dfac@intel.com/ Reported-by: kernel test robot Fixes: 530f9216a953 ("soc: qcom: ipa: AP/modem communications") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_qmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c index 90f3aec55b365..ec010cf2e816a 100644 --- a/drivers/net/ipa/ipa_qmi.c +++ b/drivers/net/ipa/ipa_qmi.c @@ -125,7 +125,7 @@ static void ipa_qmi_indication(struct ipa_qmi *ipa_qmi) */ static void ipa_qmi_ready(struct ipa_qmi *ipa_qmi) { - struct ipa *ipa = container_of(ipa_qmi, struct ipa, qmi); + struct ipa *ipa; int ret; /* We aren't ready until the modem and microcontroller are */ From 04c494e68a1340cb5c70d4704ac32d863dc64293 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 May 2022 14:14:56 -0700 Subject: [PATCH 369/491] Revert "tcp/dccp: get rid of inet_twsk_purge()" This reverts commits: 0dad4087a86a2cbe177404dc73f18ada26a2c390 ("tcp/dccp: get rid of inet_twsk_purge()") d507204d3c5cc57d9a8bdf0a477615bb59ea1611 ("tcp/dccp: add tw->tw_bslot") As Leonard pointed out, a newly allocated netns can happen to reuse a freed 'struct net'. While TCP TW timers were covered by my patches, other things were not: 1) Lookups in rx path (INET_MATCH() and INET6_MATCH()), as they look at 4-tuple plus the 'struct net' pointer. 2) /proc/net/tcp[6] and inet_diag, same reason. 3) hashinfo->bhash[], same reason. Fixing all this seems risky, lets instead revert. In the future, we might have a per netns tcp hash table, or a per netns list of timewait sockets... Fixes: 0dad4087a86a ("tcp/dccp: get rid of inet_twsk_purge()") Signed-off-by: Eric Dumazet Reported-by: Leonard Crestez Tested-by: Leonard Crestez Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 3 +- net/dccp/ipv4.c | 6 ++++ net/dccp/ipv6.c | 6 ++++ net/ipv4/inet_timewait_sock.c | 58 ++++++++++++++++++++++++++++---- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/tcp_ipv6.c | 6 ++++ 6 files changed, 73 insertions(+), 8 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 463ae5d33eb09..5b47545f22d39 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -71,7 +71,6 @@ struct inet_timewait_sock { tw_tos : 8; u32 tw_txhash; u32 tw_priority; - u32 tw_bslot; /* bind bucket slot */ struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; }; @@ -110,6 +109,8 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); +void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family); + static inline struct net *twsk_net(const struct inet_timewait_sock *twsk) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ae662567a6cb6..0ea29270d7e53 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1030,9 +1030,15 @@ static void __net_exit dccp_v4_exit_net(struct net *net) inet_ctl_sock_destroy(pn->v4_ctl_sk); } +static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&dccp_hashinfo, AF_INET); +} + static struct pernet_operations dccp_v4_ops = { .init = dccp_v4_init_net, .exit = dccp_v4_exit_net, + .exit_batch = dccp_v4_exit_batch, .id = &dccp_v4_pernet_id, .size = sizeof(struct dccp_v4_pernet), }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index eab3bd1ee9a0a..fa663518fa0e4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1115,9 +1115,15 @@ static void __net_exit dccp_v6_exit_net(struct net *net) inet_ctl_sock_destroy(pn->v6_ctl_sk); } +static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&dccp_hashinfo, AF_INET6); +} + static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, + .exit_batch = dccp_v6_exit_batch, .id = &dccp_v6_pernet_id, .size = sizeof(struct dccp_v6_pernet), }; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 9e0bbd0265601..0ec501845cb3b 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -52,7 +52,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) spin_unlock(lock); /* Disassociate with bind bucket. */ - bhead = &hashinfo->bhash[tw->tw_bslot]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); inet_twsk_bind_unhash(tw, hashinfo); @@ -111,12 +112,8 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - /* Cache inet_bhashfn(), because 'struct net' might be no longer - * available later in inet_twsk_kill(). - */ - tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num, - hashinfo->bhash_size); - bhead = &hashinfo->bhash[tw->tw_bslot]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); @@ -257,3 +254,50 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); + +void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) +{ + struct inet_timewait_sock *tw; + struct sock *sk; + struct hlist_nulls_node *node; + unsigned int slot; + + for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; +restart_rcu: + cond_resched(); + rcu_read_lock(); +restart: + sk_nulls_for_each_rcu(sk, node, &head->chain) { + if (sk->sk_state != TCP_TIME_WAIT) + continue; + tw = inet_twsk(sk); + if ((tw->tw_family != family) || + refcount_read(&twsk_net(tw)->ns.count)) + continue; + + if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) + continue; + + if (unlikely((tw->tw_family != family) || + refcount_read(&twsk_net(tw)->ns.count))) { + inet_twsk_put(tw); + goto restart; + } + + rcu_read_unlock(); + local_bh_disable(); + inet_twsk_deschedule_put(tw); + local_bh_enable(); + goto restart_rcu; + } + /* If the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto restart; + rcu_read_unlock(); + } +} +EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f9cec624068df..457f5b5d5d4a9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3173,6 +3173,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; + inet_twsk_purge(&tcp_hashinfo, AF_INET); + list_for_each_entry(net, net_exit_list, exit_list) tcp_fastopen_ctx_destroy(net); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 13678d3908fac..faaddaf43c90b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2207,9 +2207,15 @@ static void __net_exit tcpv6_net_exit(struct net *net) inet_ctl_sock_destroy(net->ipv6.tcp_sk); } +static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&tcp_hashinfo, AF_INET6); +} + static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, + .exit_batch = tcpv6_net_exit_batch, }; int __init tcpv6_init(void) From 370704e707a5f2d3c9a1d4ed8bd8cd67507d7bb5 Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Fri, 13 May 2022 16:58:16 +0530 Subject: [PATCH 370/491] dma-buf: ensure unique directory name for dmabuf stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dmabuf file uses get_next_ino()(through dma_buf_getfile() -> alloc_anon_inode()) to get an inode number and uses the same as a directory name under /sys/kernel/dmabuf/buffers/. This directory is used to collect the dmabuf stats and it is created through dma_buf_stats_setup(). At current, failure to create this directory entry can make the dma_buf_export() to fail. Now, as the get_next_ino() can definitely give a repetitive inode no causing the directory entry creation to fail with -EEXIST. This is a problem on the systems where dmabuf stats functionality is enabled on the production builds can make the dma_buf_export(), though the dmabuf memory is allocated successfully, to fail just because it couldn't create stats entry. This issue we are able to see on the snapdragon system within 13 days where there already exists a directory with inode no "122602" so dma_buf_stats_setup() failed with -EEXIST as it is trying to create the same directory entry. To make the dentry name as unique, use the dmabuf fs specific inode which is based on the simple atomic variable increment. There is tmpfs subsystem too which relies on its own inode generation rather than relying on the get_next_ino() for the same reason of avoiding the duplicate inodes[1]. [1] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/patch/?id=e809d5f0b5c912fe981dce738f3283b2010665f0 Signed-off-by: Charan Teja Kalla Cc: # 5.15.x+ Reviewed-by: Greg Kroah-Hartman Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/1652441296-1986-1-git-send-email-quic_charante@quicinc.com Signed-off-by: Christian König --- drivers/dma-buf/dma-buf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index b1e25ae983027..53297a0d9c573 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -407,6 +407,7 @@ static inline int is_dma_buf_file(struct file *file) static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) { + static atomic64_t dmabuf_inode = ATOMIC64_INIT(0); struct file *file; struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); @@ -416,6 +417,13 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) inode->i_size = dmabuf->size; inode_set_bytes(inode, dmabuf->size); + /* + * The ->i_ino acquired from get_next_ino() is not unique thus + * not suitable for using it as dentry name by dmabuf stats. + * Override ->i_ino with the unique and dmabuffs specific + * value. + */ + inode->i_ino = atomic64_add_return(1, &dmabuf_inode); file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf", flags, &dma_buf_fops); if (IS_ERR(file)) From 1d6595b4cd47acfd824550f48f10b54a6f0e93ee Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 2 Mar 2022 10:24:22 -0500 Subject: [PATCH 371/491] drm/vmwgfx: Fix fencing on SVGAv3 Port of the vmwgfx to SVGAv3 lacked support for fencing. SVGAv3 removed FIFO's and replaced them with command buffers and extra registers. The initial version of SVGAv3 lacked support for most advanced features (e.g. 3D) which made fences unnecessary. That is no longer the case, especially as 3D support is being turned on. Switch from FIFO commands and capabilities to command buffers and extra registers to enable fences on SVGAv3. Fixes: 2cd80dbd3551 ("drm/vmwgfx: Add basic support for SVGA3") Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Link: https://patchwork.freedesktop.org/patch/msgid/20220302152426.885214-5-zack@kde.org --- drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 8 ++++++++ drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 28 ++++++++++++++++++++------- drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 26 +++++++++++++++++-------- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 +++++--- 5 files changed, 53 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c index a3bfbb6c3e14a..bf1b394753da6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c @@ -528,7 +528,7 @@ int vmw_cmd_send_fence(struct vmw_private *dev_priv, uint32_t *seqno) *seqno = atomic_add_return(1, &dev_priv->marker_seq); } while (*seqno == 0); - if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE)) { + if (!vmw_has_fences(dev_priv)) { /* * Don't request hardware to send a fence. The diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index ea3ecdda561dc..6de0b9ef5c773 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1679,4 +1679,12 @@ static inline void vmw_irq_status_write(struct vmw_private *vmw, outl(status, vmw->io_start + SVGA_IRQSTATUS_PORT); } +static inline bool vmw_has_fences(struct vmw_private *vmw) +{ + if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS | + SVGA_CAP_CMD_BUFFERS_2)) != 0) + return true; + return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0; +} + #endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 59d6a2dd4c2e4..66cc35dc223e7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -82,6 +82,22 @@ fman_from_fence(struct vmw_fence_obj *fence) return container_of(fence->base.lock, struct vmw_fence_manager, lock); } +static u32 vmw_fence_goal_read(struct vmw_private *vmw) +{ + if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0) + return vmw_read(vmw, SVGA_REG_FENCE_GOAL); + else + return vmw_fifo_mem_read(vmw, SVGA_FIFO_FENCE_GOAL); +} + +static void vmw_fence_goal_write(struct vmw_private *vmw, u32 value) +{ + if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0) + vmw_write(vmw, SVGA_REG_FENCE_GOAL, value); + else + vmw_fifo_mem_write(vmw, SVGA_FIFO_FENCE_GOAL, value); +} + /* * Note on fencing subsystem usage of irqs: * Typically the vmw_fences_update function is called @@ -392,7 +408,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, if (likely(!fman->seqno_valid)) return false; - goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL); + goal_seqno = vmw_fence_goal_read(fman->dev_priv); if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP)) return false; @@ -400,9 +416,8 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, list_for_each_entry(fence, &fman->fence_list, head) { if (!list_empty(&fence->seq_passed_actions)) { fman->seqno_valid = true; - vmw_fifo_mem_write(fman->dev_priv, - SVGA_FIFO_FENCE_GOAL, - fence->base.seqno); + vmw_fence_goal_write(fman->dev_priv, + fence->base.seqno); break; } } @@ -434,13 +449,12 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence) if (dma_fence_is_signaled_locked(&fence->base)) return false; - goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL); + goal_seqno = vmw_fence_goal_read(fman->dev_priv); if (likely(fman->seqno_valid && goal_seqno - fence->base.seqno < VMW_FENCE_WRAP)) return false; - vmw_fifo_mem_write(fman->dev_priv, SVGA_FIFO_FENCE_GOAL, - fence->base.seqno); + vmw_fence_goal_write(fman->dev_priv, fence->base.seqno); fman->seqno_valid = true; return true; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c index c5191de365ca1..fe4732bf2c9d2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c @@ -32,6 +32,14 @@ #define VMW_FENCE_WRAP (1 << 24) +static u32 vmw_irqflag_fence_goal(struct vmw_private *vmw) +{ + if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0) + return SVGA_IRQFLAG_REG_FENCE_GOAL; + else + return SVGA_IRQFLAG_FENCE_GOAL; +} + /** * vmw_thread_fn - Deferred (process context) irq handler * @@ -96,7 +104,7 @@ static irqreturn_t vmw_irq_handler(int irq, void *arg) wake_up_all(&dev_priv->fifo_queue); if ((masked_status & (SVGA_IRQFLAG_ANY_FENCE | - SVGA_IRQFLAG_FENCE_GOAL)) && + vmw_irqflag_fence_goal(dev_priv))) && !test_and_set_bit(VMW_IRQTHREAD_FENCE, dev_priv->irqthread_pending)) ret = IRQ_WAKE_THREAD; @@ -137,8 +145,7 @@ bool vmw_seqno_passed(struct vmw_private *dev_priv, if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP)) return true; - if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE) && - vmw_fifo_idle(dev_priv, seqno)) + if (!vmw_has_fences(dev_priv) && vmw_fifo_idle(dev_priv, seqno)) return true; /** @@ -160,6 +167,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, unsigned long timeout) { struct vmw_fifo_state *fifo_state = dev_priv->fifo; + bool fifo_down = false; uint32_t count = 0; uint32_t signal_seq; @@ -176,12 +184,14 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, */ if (fifo_idle) { - down_read(&fifo_state->rwsem); if (dev_priv->cman) { ret = vmw_cmdbuf_idle(dev_priv->cman, interruptible, 10*HZ); if (ret) goto out_err; + } else if (fifo_state) { + down_read(&fifo_state->rwsem); + fifo_down = true; } } @@ -218,12 +228,12 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, } } finish_wait(&dev_priv->fence_queue, &__wait); - if (ret == 0 && fifo_idle) + if (ret == 0 && fifo_idle && fifo_state) vmw_fence_write(dev_priv, signal_seq); wake_up_all(&dev_priv->fence_queue); out_err: - if (fifo_idle) + if (fifo_down) up_read(&fifo_state->rwsem); return ret; @@ -266,13 +276,13 @@ void vmw_seqno_waiter_remove(struct vmw_private *dev_priv) void vmw_goal_waiter_add(struct vmw_private *dev_priv) { - vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FENCE_GOAL, + vmw_generic_waiter_add(dev_priv, vmw_irqflag_fence_goal(dev_priv), &dev_priv->goal_queue_waiters); } void vmw_goal_waiter_remove(struct vmw_private *dev_priv) { - vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FENCE_GOAL, + vmw_generic_waiter_remove(dev_priv, vmw_irqflag_fence_goal(dev_priv), &dev_priv->goal_queue_waiters); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index bbd2f4ec08ec1..93431e8f66060 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1344,7 +1344,6 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv, ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb, mode_cmd, is_bo_proxy); - /* * vmw_create_bo_proxy() adds a reference that is no longer * needed @@ -1385,13 +1384,16 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, ret = vmw_user_lookup_handle(dev_priv, file_priv, mode_cmd->handles[0], &surface, &bo); - if (ret) + if (ret) { + DRM_ERROR("Invalid buffer object handle %u (0x%x).\n", + mode_cmd->handles[0], mode_cmd->handles[0]); goto err_out; + } if (!bo && !vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) { - DRM_ERROR("Surface size cannot exceed %dx%d", + DRM_ERROR("Surface size cannot exceed %dx%d\n", dev_priv->texture_max_width, dev_priv->texture_max_height); goto err_out; From 3059d9b9f6aa433a55b9d0d21b566396d5497c33 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 2 Mar 2022 10:24:24 -0500 Subject: [PATCH 372/491] drm/vmwgfx: Initialize drm_mode_fb_cmd2 Transition to drm_mode_fb_cmd2 from drm_mode_fb_cmd left the structure unitialized. drm_mode_fb_cmd2 adds a few additional members, e.g. flags and modifiers which were never initialized. Garbage in those members can cause random failures during the bringup of the fbcon. Initializing the structure fixes random blank screens after bootup due to flags/modifiers mismatches during the fbcon bring up. Fixes: dabdcdc9822a ("drm/vmwgfx: Switch to mode_cmd2") Signed-off-by: Zack Rusin Cc: Daniel Vetter Cc: # v4.10+ Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Link: https://patchwork.freedesktop.org/patch/msgid/20220302152426.885214-7-zack@kde.org --- drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index 8ee34576c7d08..adf17c740656d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -483,7 +483,7 @@ static int vmw_fb_kms_detach(struct vmw_fb_par *par, static int vmw_fb_kms_framebuffer(struct fb_info *info) { - struct drm_mode_fb_cmd2 mode_cmd; + struct drm_mode_fb_cmd2 mode_cmd = {0}; struct vmw_fb_par *par = info->par; struct fb_var_screeninfo *var = &info->var; struct drm_framebuffer *cur_fb; From 21d1d192890ced87f2f04f8f4dea92406e0b162a Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 18 Mar 2022 13:43:31 -0400 Subject: [PATCH 373/491] drm/vmwgfx: Disable command buffers on svga3 without gbobjects With very limited vram on svga3 it's difficult to handle all the surface migrations. Without gbobjects, i.e. the ability to store surfaces in guest mobs, there's no reason to support intermediate svga2 features, especially because we can fall back to fb traces and svga3 will never support those in-between features. On svga3 we wither want to use fb traces or screen targets (i.e. gbobjects), nothing in between. This fixes presentation on a lot of fusion/esxi tech previews where the exposed svga3 caps haven't been finalized yet. Signed-off-by: Zack Rusin Fixes: 2cd80dbd3551 ("drm/vmwgfx: Add basic support for SVGA3") Cc: # v5.14+ Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20220318174332.440068-5-zack@kde.org --- drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c index bf1b394753da6..162dfeb1cc5ad 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c @@ -675,11 +675,14 @@ int vmw_cmd_emit_dummy_query(struct vmw_private *dev_priv, */ bool vmw_cmd_supported(struct vmw_private *vmw) { - if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS | - SVGA_CAP_CMD_BUFFERS_2)) != 0) - return true; + bool has_cmdbufs = + (vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS | + SVGA_CAP_CMD_BUFFERS_2)) != 0; + if (vmw_is_svga_v3(vmw)) + return (has_cmdbufs && + (vmw->capabilities & SVGA_CAP_GBOBJECTS) != 0); /* * We have FIFO cmd's */ - return vmw->fifo_mem != NULL; + return has_cmdbufs || vmw->fifo_mem != NULL; } From d031a8866e709c9d1ee5537a321b6192b4d2dc5b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 14 Apr 2022 17:52:39 +0200 Subject: [PATCH 374/491] gfs2: Fix filesystem block deallocation for short writes When a write cannot be carried out in full, gfs2_iomap_end() releases blocks that have been allocated for this write but haven't been used. To compute the end of the allocation, gfs2_iomap_end() incorrectly rounded the end of the attempted write down to the next block boundary to arrive at the end of the allocation. It would have to round up, but the end of the allocation is also available as iomap->offset + iomap->length, so just use that instead. In addition, use round_up() for computing the start of the unused range. Fixes: 64bc06bb32ee ("gfs2: iomap buffered write support") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 39080b2d6cf86..b6697333bb2b9 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1153,13 +1153,12 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, if (length != written && (iomap->flags & IOMAP_F_NEW)) { /* Deallocate blocks that were just allocated. */ - loff_t blockmask = i_blocksize(inode) - 1; - loff_t end = (pos + length) & ~blockmask; + loff_t hstart = round_up(pos + written, i_blocksize(inode)); + loff_t hend = iomap->offset + iomap->length; - pos = (pos + written + blockmask) & ~blockmask; - if (pos < end) { - truncate_pagecache_range(inode, pos, end - 1); - punch_hole(ip, pos, end - pos); + if (hstart < hend) { + truncate_pagecache_range(inode, hstart, hend - 1); + punch_hole(ip, hstart, hend - hstart); } } From 42e4c3bdcae7833eeeaed7bf0c000c2de17dd291 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 27 Apr 2022 13:53:42 +0200 Subject: [PATCH 375/491] gfs2: Variable rename Instead of counting the number of bytes read from the filesystem, functions gfs2_file_direct_read and gfs2_file_read_iter count the number of bytes written into the user buffer. Conversely, functions gfs2_file_direct_write and gfs2_file_buffered_write count the number of bytes read from the user buffer. This is nothing but confusing, so change the read functions to count how many bytes they have read, and the write functions to count how many bytes they have written. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 48f01323c37c1..4d36c01727ad8 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -807,7 +807,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, struct file *file = iocb->ki_filp; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); size_t prev_count = 0, window_size = 0; - size_t written = 0; + size_t read = 0; ssize_t ret; /* @@ -839,11 +839,11 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, pagefault_disable(); to->nofault = true; ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, - IOMAP_DIO_PARTIAL, written); + IOMAP_DIO_PARTIAL, read); to->nofault = false; pagefault_enable(); if (ret > 0) - written = ret; + read = ret; if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { size_t leftover; @@ -863,7 +863,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, gfs2_holder_uninit(gh); if (ret < 0) return ret; - return written; + return read; } static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, @@ -873,7 +873,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, struct inode *inode = file->f_mapping->host; struct gfs2_inode *ip = GFS2_I(inode); size_t prev_count = 0, window_size = 0; - size_t read = 0; + size_t written = 0; ssize_t ret; /* @@ -906,13 +906,13 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, from->nofault = true; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, - IOMAP_DIO_PARTIAL, read); + IOMAP_DIO_PARTIAL, written); from->nofault = false; if (ret == -ENOTBLK) ret = 0; if (ret > 0) - read = ret; + written = ret; if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { size_t leftover; @@ -933,7 +933,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, gfs2_holder_uninit(gh); if (ret < 0) return ret; - return read; + return written; } static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -941,7 +941,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct gfs2_inode *ip; struct gfs2_holder gh; size_t prev_count = 0, window_size = 0; - size_t written = 0; + size_t read = 0; ssize_t ret; /* @@ -962,7 +962,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (ret >= 0) { if (!iov_iter_count(to)) return ret; - written = ret; + read = ret; } else if (ret != -EFAULT) { if (ret != -EAGAIN) return ret; @@ -980,7 +980,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ret = generic_file_read_iter(iocb, to); pagefault_enable(); if (ret > 0) - written += ret; + read += ret; if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { size_t leftover; @@ -998,7 +998,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); - return written ? written : ret; + return read ? read : ret; } static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, @@ -1012,7 +1012,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, struct gfs2_holder *statfs_gh = NULL; size_t prev_count = 0, window_size = 0; size_t orig_count = iov_iter_count(from); - size_t read = 0; + size_t written = 0; ssize_t ret; /* @@ -1050,13 +1050,13 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, current->backing_dev_info = NULL; if (ret > 0) { iocb->ki_pos += ret; - read += ret; + written += ret; } if (inode == sdp->sd_rindex) gfs2_glock_dq_uninit(statfs_gh); - from->count = orig_count - read; + from->count = orig_count - written; if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { size_t leftover; @@ -1077,8 +1077,8 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, gfs2_holder_uninit(gh); if (statfs_gh) kfree(statfs_gh); - from->count = orig_count - read; - return read ? read : ret; + from->count = orig_count - written; + return written ? written : ret; } /** From 6d22ff471070e21e24667be70764ee5abdfe5608 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 5 May 2022 12:37:49 +0200 Subject: [PATCH 376/491] gfs2: Clean up use of fault_in_iov_iter_{read,write}able No need to store the return value of the fault_in functions in separate variables. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4d36c01727ad8..acc0c1d415647 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -846,12 +846,10 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, read = ret; if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { - size_t leftover; - gfs2_holder_allow_demote(gh); - leftover = fault_in_iov_iter_writeable(to, window_size); + window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(gh); - if (leftover != window_size) { + if (window_size) { if (gfs2_holder_queued(gh)) goto retry_under_glock; goto retry; @@ -915,12 +913,10 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, written = ret; if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { - size_t leftover; - gfs2_holder_allow_demote(gh); - leftover = fault_in_iov_iter_readable(from, window_size); + window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); - if (leftover != window_size) { + if (window_size) { if (gfs2_holder_queued(gh)) goto retry_under_glock; goto retry; @@ -983,12 +979,10 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) read += ret; if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { - size_t leftover; - gfs2_holder_allow_demote(&gh); - leftover = fault_in_iov_iter_writeable(to, window_size); + window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(&gh); - if (leftover != window_size) { + if (window_size) { if (gfs2_holder_queued(&gh)) goto retry_under_glock; goto retry; @@ -1058,13 +1052,11 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, from->count = orig_count - written; if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { - size_t leftover; - gfs2_holder_allow_demote(gh); - leftover = fault_in_iov_iter_readable(from, window_size); + window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); - if (leftover != window_size) { - from->count = min(from->count, window_size - leftover); + if (window_size) { + from->count = min(from->count, window_size); if (gfs2_holder_queued(gh)) goto retry_under_glock; goto retry; From 72382264502d9348ead372f82ecc3044de5c82d2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 5 May 2022 12:53:26 +0200 Subject: [PATCH 377/491] gfs2: Pull return value test out of should_fault_in_pages Pull the return value test of the previous read or write operation out of should_fault_in_pages(). In a following patch, we'll fault in pages before the I/O and there will be no return value to check. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index acc0c1d415647..ea87bef7314d0 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -770,7 +770,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret ? ret : ret1; } -static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i, +static inline bool should_fault_in_pages(struct iov_iter *i, size_t *prev_count, size_t *window_size) { @@ -779,8 +779,6 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i, if (likely(!count)) return false; - if (ret <= 0 && ret != -EFAULT) - return false; if (!iter_is_iovec(i)) return false; @@ -842,10 +840,12 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, IOMAP_DIO_PARTIAL, read); to->nofault = false; pagefault_enable(); + if (ret <= 0 && ret != -EFAULT) + goto out_unlock; if (ret > 0) read = ret; - if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { + if (should_fault_in_pages(to, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(gh); @@ -855,6 +855,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, goto retry; } } +out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); out_uninit: @@ -899,20 +900,23 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, goto out_uninit; /* Silently fall back to buffered I/O when writing beyond EOF */ if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode)) - goto out; + goto out_unlock; retry_under_glock: from->nofault = true; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, IOMAP_DIO_PARTIAL, written); from->nofault = false; - - if (ret == -ENOTBLK) - ret = 0; + if (ret <= 0) { + if (ret == -ENOTBLK) + ret = 0; + if (ret != -EFAULT) + goto out_unlock; + } if (ret > 0) written = ret; - if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { + if (should_fault_in_pages(from, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); @@ -922,7 +926,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, goto retry; } } -out: +out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); out_uninit: @@ -975,10 +979,12 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) pagefault_disable(); ret = generic_file_read_iter(iocb, to); pagefault_enable(); + if (ret <= 0 && ret != -EFAULT) + goto out_unlock; if (ret > 0) read += ret; - if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { + if (should_fault_in_pages(to, &prev_count, &window_size)) { gfs2_holder_allow_demote(&gh); window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(&gh); @@ -988,6 +994,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) goto retry; } } +out_unlock: if (gfs2_holder_queued(&gh)) gfs2_glock_dq(&gh); out_uninit: @@ -1050,8 +1057,11 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, if (inode == sdp->sd_rindex) gfs2_glock_dq_uninit(statfs_gh); + if (ret <= 0 && ret != -EFAULT) + goto out_unlock; + from->count = orig_count - written; - if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { + if (should_fault_in_pages(from, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); From 324d116c5a5c8204dc00e63f725a3c5ed09afb53 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 5 May 2022 13:32:23 +0200 Subject: [PATCH 378/491] gfs2: Align read and write chunks to the page cache Align the chunks that reads and writes are carried out in to the page cache rather than the user buffers. This will be more efficient in general, especially for allocating writes. Optimizing the case that the user buffer is gfs2 backed isn't very useful; we only need to make sure we won't deadlock. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index ea87bef7314d0..11c46407d4a87 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -771,6 +771,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, } static inline bool should_fault_in_pages(struct iov_iter *i, + struct kiocb *iocb, size_t *prev_count, size_t *window_size) { @@ -783,15 +784,13 @@ static inline bool should_fault_in_pages(struct iov_iter *i, return false; size = PAGE_SIZE; - offs = offset_in_page(i->iov[0].iov_base + i->iov_offset); + offs = offset_in_page(iocb->ki_pos); if (*prev_count != count || !*window_size) { size_t nr_dirtied; - size = ALIGN(offs + count, PAGE_SIZE); - size = min_t(size_t, size, SZ_1M); nr_dirtied = max(current->nr_dirtied_pause - current->nr_dirtied, 8); - size = min(size, nr_dirtied << PAGE_SHIFT); + size = min_t(size_t, SZ_1M, nr_dirtied << PAGE_SHIFT); } *prev_count = count; @@ -845,7 +844,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, if (ret > 0) read = ret; - if (should_fault_in_pages(to, &prev_count, &window_size)) { + if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(gh); @@ -916,7 +915,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, if (ret > 0) written = ret; - if (should_fault_in_pages(from, &prev_count, &window_size)) { + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); @@ -984,7 +983,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (ret > 0) read += ret; - if (should_fault_in_pages(to, &prev_count, &window_size)) { + if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { gfs2_holder_allow_demote(&gh); window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(&gh); @@ -1061,7 +1060,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, goto out_unlock; from->count = orig_count - written; - if (should_fault_in_pages(from, &prev_count, &window_size)) { + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); From fa5dfa645d85910d747f4e0c97f19e5e97d1c270 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2022 23:37:30 +0200 Subject: [PATCH 379/491] gfs2: buffered write prefaulting In gfs2_file_buffered_write, to increase the likelihood that all the user memory we're trying to write will be resident in memory, carry out the write in chunks and fault in each chunk of user memory before trying to write it. Otherwise, some workloads will trigger frequent short "internal" writes, causing filesystem blocks to be allocated and then partially deallocated again when writing into holes, which is wasteful and breaks reservations. Neither the chunked writes nor any of the short "internal" writes are user visible. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 11c46407d4a87..5eda1bcc85e37 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -778,7 +778,7 @@ static inline bool should_fault_in_pages(struct iov_iter *i, size_t count = iov_iter_count(i); size_t size, offs; - if (likely(!count)) + if (!count) return false; if (!iter_is_iovec(i)) return false; @@ -1033,7 +1033,20 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { retry_under_glock: + gfs2_holder_allow_demote(gh); + window_size -= fault_in_iov_iter_readable(from, window_size); + gfs2_holder_disallow_demote(gh); + if (!window_size) { + ret = -EFAULT; + goto out_unlock; + } + if (!gfs2_holder_queued(gh)) + goto retry; + from->count = min(from->count, window_size); + } + if (inode == sdp->sd_rindex) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -1060,17 +1073,8 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, goto out_unlock; from->count = orig_count - written; - if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { - gfs2_holder_allow_demote(gh); - window_size -= fault_in_iov_iter_readable(from, window_size); - gfs2_holder_disallow_demote(gh); - if (window_size) { - from->count = min(from->count, window_size); - if (gfs2_holder_queued(gh)) - goto retry_under_glock; - goto retry; - } - } + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) + goto retry_under_glock; out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); From e1fa9ea85ce89207d2ac0316da35a4a7736801f9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2022 18:27:12 +0200 Subject: [PATCH 380/491] gfs2: Stop using glock holder auto-demotion for now We're having unresolved issues with the glock holder auto-demotion mechanism introduced in commit dc732906c245. This mechanism was assumed to be essential for avoiding frequent short reads and writes until commit 296abc0d91d8 ("gfs2: No short reads or writes upon glock contention"). Since then, when the inode glock is lost, it is simply re-acquired and the operation is resumed. This means that apart from the performance penalty, we might as well drop the inode glock before faulting in pages, and re-acquire it afterwards. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 46 ++++++++++++++-------------------------------- 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 5eda1bcc85e37..2556ae1f92ea2 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -832,7 +832,6 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; -retry_under_glock: pagefault_disable(); to->nofault = true; ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, @@ -845,14 +844,10 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, read = ret; if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { - gfs2_holder_allow_demote(gh); + gfs2_glock_dq(gh); window_size -= fault_in_iov_iter_writeable(to, window_size); - gfs2_holder_disallow_demote(gh); - if (window_size) { - if (gfs2_holder_queued(gh)) - goto retry_under_glock; + if (window_size) goto retry; - } } out_unlock: if (gfs2_holder_queued(gh)) @@ -900,7 +895,6 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, /* Silently fall back to buffered I/O when writing beyond EOF */ if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode)) goto out_unlock; -retry_under_glock: from->nofault = true; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, @@ -916,14 +910,10 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, written = ret; if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { - gfs2_holder_allow_demote(gh); + gfs2_glock_dq(gh); window_size -= fault_in_iov_iter_readable(from, window_size); - gfs2_holder_disallow_demote(gh); - if (window_size) { - if (gfs2_holder_queued(gh)) - goto retry_under_glock; + if (window_size) goto retry; - } } out_unlock: if (gfs2_holder_queued(gh)) @@ -974,7 +964,6 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; -retry_under_glock: pagefault_disable(); ret = generic_file_read_iter(iocb, to); pagefault_enable(); @@ -984,14 +973,10 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) read += ret; if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { - gfs2_holder_allow_demote(&gh); + gfs2_glock_dq(&gh); window_size -= fault_in_iov_iter_writeable(to, window_size); - gfs2_holder_disallow_demote(&gh); - if (window_size) { - if (gfs2_holder_queued(&gh)) - goto retry_under_glock; + if (window_size) goto retry; - } } out_unlock: if (gfs2_holder_queued(&gh)) @@ -1030,22 +1015,17 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh); retry: - ret = gfs2_glock_nq(gh); - if (ret) - goto out_uninit; if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { -retry_under_glock: - gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_readable(from, window_size); - gfs2_holder_disallow_demote(gh); if (!window_size) { ret = -EFAULT; - goto out_unlock; + goto out_uninit; } - if (!gfs2_holder_queued(gh)) - goto retry; from->count = min(from->count, window_size); } + ret = gfs2_glock_nq(gh); + if (ret) + goto out_uninit; if (inode == sdp->sd_rindex) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -1073,8 +1053,10 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, goto out_unlock; from->count = orig_count - written; - if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) - goto retry_under_glock; + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { + gfs2_glock_dq(gh); + goto retry; + } out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); From 725f22a1477c9c15aa67ad3af96fe28ec4fe72d2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 13 May 2022 10:13:07 -0700 Subject: [PATCH 381/491] block/mq-deadline: Set the fifo_time member also if inserting at head Before commit 322cff70d46c the fifo_time member of requests on a dispatch list was not used. Commit 322cff70d46c introduces code that reads the fifo_time member of requests on dispatch lists. Hence this patch that sets the fifo_time member when adding a request to a dispatch list. Cc: Christoph Hellwig Cc: Ming Lei Cc: Damien Le Moal Fixes: 322cff70d46c ("block/mq-deadline: Prioritize high-priority requests") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220513171307.32564-1-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/mq-deadline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 3ed5eaf3446a2..6ed602b2f80a5 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -742,6 +742,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, if (at_head) { list_add(&rq->queuelist, &per_prio->dispatch); + rq->fifo_time = jiffies; } else { deadline_add_rq_rb(per_prio, rq); From fa8785e5931367e2b43f2c507f26bcf3e281c0ca Mon Sep 17 00:00:00 2001 From: IotaHydrae Date: Wed, 4 May 2022 19:59:04 +0800 Subject: [PATCH 382/491] pinctrl: sunxi: fix f1c100s uart2 function Change suniv f1c100s pinctrl,PD14 multiplexing function lvds1 to uart2 When the pin PD13 and PD14 is setting up to uart2 function in dts, there's an error occurred: 1c20800.pinctrl: unsupported function uart2 on pin PD14 Because 'uart2' is not any one multiplexing option of PD14, and pinctrl don't know how to configure it. So change the pin PD14 lvds1 function to uart2. Signed-off-by: IotaHydrae Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/tencent_70C1308DDA794C81CAEF389049055BACEC09@qq.com Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c index 2801ca7062732..68a5b627fb9b2 100644 --- a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c +++ b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c @@ -204,7 +204,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = { SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), SUNXI_FUNCTION(0x2, "lcd"), /* D20 */ - SUNXI_FUNCTION(0x3, "lvds1"), /* RX */ + SUNXI_FUNCTION(0x3, "uart2"), /* RX */ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)), SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 15), SUNXI_FUNCTION(0x0, "gpio_in"), From e199975b775a37750903025915f7bc0ccda829e5 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 4 May 2022 18:07:36 +0100 Subject: [PATCH 383/491] pinctrl: sunxi: f1c100s: Fix signal name comment for PA2 SPI pin The manual describes function 0x6 of pin PA2 as "SPI1_CLK", so change the comment to reflect that. Signed-off-by: Andre Przywara Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20220504170736.2669595-1-andre.przywara@arm.com Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c index 68a5b627fb9b2..b8fc88a23cf4b 100644 --- a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c +++ b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c @@ -51,7 +51,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = { SUNXI_FUNCTION(0x3, "pwm0"), /* PWM0 */ SUNXI_FUNCTION(0x4, "i2s"), /* IN */ SUNXI_FUNCTION(0x5, "uart1"), /* RX */ - SUNXI_FUNCTION(0x6, "spi1")), /* MOSI */ + SUNXI_FUNCTION(0x6, "spi1")), /* CLK */ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 3), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), From 95d686517884a403412b000361cee2b08b2ed1e6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 12 May 2022 16:26:41 -0700 Subject: [PATCH 384/491] mptcp: fix subflow accounting on close If the PM closes a fully established MPJ subflow or the subflow creation errors out in it's early stage the subflows counter is not bumped accordingly. This change adds the missing accounting, additionally taking care of updating accordingly the 'accept_subflow' flag. Fixes: a88c9e496937 ("mptcp: do not block subflows creation on errors") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 5 ++--- net/mptcp/protocol.h | 14 ++++++++++++++ net/mptcp/subflow.c | 12 +++++++++--- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 01809eef29b4b..aa51b100e0335 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -178,14 +178,13 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, struct mptcp_pm_data *pm = &msk->pm; bool update_subflows; - update_subflows = (ssk->sk_state == TCP_CLOSE) && - (subflow->request_join || subflow->mp_join); + update_subflows = subflow->request_join || subflow->mp_join; if (!READ_ONCE(pm->work_pending) && !update_subflows) return; spin_lock_bh(&pm->lock); if (update_subflows) - pm->subflows--; + __mptcp_pm_close_subflow(msk); /* Even if this subflow is not really established, tell the PM to try * to pick the next ones, if possible. diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3c1a3036550f8..f4ce28bb0fdcc 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -833,6 +833,20 @@ unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk); +/* called under PM lock */ +static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk) +{ + if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk)) + WRITE_ONCE(msk->pm.accept_subflow, true); +} + +static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk) +{ + spin_lock_bh(&msk->pm.lock); + __mptcp_pm_close_subflow(msk); + spin_unlock_bh(&msk->pm.lock); +} + void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk); void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index aba260f547daa..8c37087f0d846 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1422,20 +1422,20 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, struct sockaddr_storage addr; int remote_id = remote->id; int local_id = loc->id; + int err = -ENOTCONN; struct socket *sf; struct sock *ssk; u32 remote_token; int addrlen; int ifindex; u8 flags; - int err; if (!mptcp_is_fully_established(sk)) - return -ENOTCONN; + goto err_out; err = mptcp_subflow_create_socket(sk, &sf); if (err) - return err; + goto err_out; ssk = sf->sk; subflow = mptcp_subflow_ctx(ssk); @@ -1492,6 +1492,12 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, failed: subflow->disposable = 1; sock_release(sf); + +err_out: + /* we account subflows before the creation, and this failures will not + * be caught by sk_state_change() + */ + mptcp_pm_close_subflow(msk); return err; } From e274f71540082b015568eaf0b08ec70ac08dc4ad Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 12 May 2022 16:26:42 -0700 Subject: [PATCH 385/491] selftests: mptcp: add subflow limits test-cases Add and delete a bunch of endpoints and verify the respect of configured limits. This covers the codepath introduced by the previous patch. Fixes: 69c6ce7b6eca ("selftests: mptcp: add implicit endpoint test case") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 48 ++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 7314257d248a7..48ef112f42c2e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1444,6 +1444,33 @@ chk_prio_nr() [ "${dump_stats}" = 1 ] && dump_stats } +chk_subflow_nr() +{ + local need_title="$1" + local msg="$2" + local subflow_nr=$3 + local cnt1 + local cnt2 + + if [ -n "${need_title}" ]; then + printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}" + else + printf "%-${nr_blank}s %s" " " "${msg}" + fi + + cnt1=$(ss -N $ns1 -tOni | grep -c token) + cnt2=$(ss -N $ns2 -tOni | grep -c token) + if [ "$cnt1" != "$subflow_nr" -o "$cnt2" != "$subflow_nr" ]; then + echo "[fail] got $cnt1:$cnt2 subflows expected $subflow_nr" + fail_test + dump_stats=1 + else + echo "[ ok ]" + fi + + [ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint ) +} + chk_link_usage() { local ns=$1 @@ -2556,7 +2583,7 @@ fastclose_tests() fi } -implicit_tests() +endpoint_tests() { # userspace pm type prevents add_addr if reset "implicit EP"; then @@ -2578,6 +2605,23 @@ implicit_tests() $ns2 10.0.2.2 id 1 flags signal wait fi + + if reset "delete and re-add"; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow & + + wait_mpj $ns2 + pm_nl_del_endpoint $ns2 2 10.0.2.2 + sleep 0.5 + chk_subflow_nr needtitle "after delete" 1 + + pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + wait_mpj $ns2 + chk_subflow_nr "" "after re-add" 2 + wait + fi } # [$1: error message] @@ -2624,7 +2668,7 @@ all_tests_sorted=( d@deny_join_id0_tests m@fullmesh_tests z@fastclose_tests - I@implicit_tests + I@endpoint_tests ) all_tests_args="" From 9500acc631dbb8b73166e25700e656b11f6007b6 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Thu, 12 May 2022 22:49:00 +0530 Subject: [PATCH 386/491] net: macb: Increment rx bd head after allocating skb and buffer In gem_rx_refill rx_prepared_head is incremented at the beginning of the while loop preparing the skb and data buffers. If the skb or data buffer allocation fails, this BD will be unusable BDs until the head loops back to the same BD (and obviously buffer allocation succeeds). In the unlikely event that there's a string of allocation failures, there will be an equal number of unusable BDs and an inconsistent RX BD chain. Hence increment the head at the end of the while loop to be clean. Fixes: 4df95131ea80 ("net/macb: change RX path for GEM") Signed-off-by: Harini Katakam Signed-off-by: Michal Simek Signed-off-by: Radhey Shyam Pandey Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220512171900.32593-1-harini.katakam@xilinx.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index e475be29845c6..61284baa0496e 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1219,7 +1219,6 @@ static void gem_rx_refill(struct macb_queue *queue) /* Make hw descriptor updates visible to CPU */ rmb(); - queue->rx_prepared_head++; desc = macb_rx_desc(queue, entry); if (!queue->rx_skbuff[entry]) { @@ -1258,6 +1257,7 @@ static void gem_rx_refill(struct macb_queue *queue) dma_wmb(); desc->addr &= ~MACB_BIT(RX_USED); } + queue->rx_prepared_head++; } /* Make descriptor updates visible to hardware */ From 14ea4a470494528c7e88da5c4116c24eb027059f Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 12 May 2022 15:41:43 +0300 Subject: [PATCH 387/491] Revert "can: m_can: pci: use custom bit timings for Elkhart Lake" This reverts commit 0e8ffdf3b86dfd44b651f91b12fcae76c25c453b. Commit 0e8ffdf3b86d ("can: m_can: pci: use custom bit timings for Elkhart Lake") broke the test case using bitrate switching. | ip link set can0 up type can bitrate 500000 dbitrate 4000000 fd on | ip link set can1 up type can bitrate 500000 dbitrate 4000000 fd on | candump can0 & | cangen can1 -I 0x800 -L 64 -e -fb \ | -D 11223344deadbeef55667788feedf00daabbccdd44332211 -n 1 -v -v Above commit does everything correctly according to the datasheet. However datasheet wasn't correct. I got confirmation from hardware engineers that the actual CAN hardware on Intel Elkhart Lake is based on M_CAN version v3.2.0. Datasheet was mirroring values from an another specification which was based on earlier M_CAN version leading to wrong bit timings. Therefore revert the commit and switch back to common bit timings. Fixes: ea4c1787685d ("can: m_can: pci: use custom bit timings for Elkhart Lake") Link: https://lore.kernel.org/all/20220512124144.536850-1-jarkko.nikula@linux.intel.com Signed-off-by: Jarkko Nikula Reported-by: Chee Hou Ong Reported-by: Aman Kumar Reported-by: Pallavi Kumari Cc: # v5.16+ Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_pci.c | 48 +++---------------------------- 1 file changed, 4 insertions(+), 44 deletions(-) diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index b56a54d6c5a9c..8f184a852a0a7 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -18,14 +18,9 @@ #define M_CAN_PCI_MMIO_BAR 0 +#define M_CAN_CLOCK_FREQ_EHL 200000000 #define CTL_CSR_INT_CTL_OFFSET 0x508 -struct m_can_pci_config { - const struct can_bittiming_const *bit_timing; - const struct can_bittiming_const *data_timing; - unsigned int clock_freq; -}; - struct m_can_pci_priv { struct m_can_classdev cdev; @@ -89,40 +84,9 @@ static struct m_can_ops m_can_pci_ops = { .read_fifo = iomap_read_fifo, }; -static const struct can_bittiming_const m_can_bittiming_const_ehl = { - .name = KBUILD_MODNAME, - .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ - .tseg1_max = 64, - .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ - .tseg2_max = 128, - .sjw_max = 128, - .brp_min = 1, - .brp_max = 512, - .brp_inc = 1, -}; - -static const struct can_bittiming_const m_can_data_bittiming_const_ehl = { - .name = KBUILD_MODNAME, - .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ - .tseg1_max = 16, - .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 1, - .brp_max = 32, - .brp_inc = 1, -}; - -static const struct m_can_pci_config m_can_pci_ehl = { - .bit_timing = &m_can_bittiming_const_ehl, - .data_timing = &m_can_data_bittiming_const_ehl, - .clock_freq = 200000000, -}; - static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) { struct device *dev = &pci->dev; - const struct m_can_pci_config *cfg; struct m_can_classdev *mcan_class; struct m_can_pci_priv *priv; void __iomem *base; @@ -150,8 +114,6 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) if (!mcan_class) return -ENOMEM; - cfg = (const struct m_can_pci_config *)id->driver_data; - priv = cdev_to_priv(mcan_class); priv->base = base; @@ -163,9 +125,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) mcan_class->dev = &pci->dev; mcan_class->net->irq = pci_irq_vector(pci, 0); mcan_class->pm_clock_support = 1; - mcan_class->bit_timing = cfg->bit_timing; - mcan_class->data_timing = cfg->data_timing; - mcan_class->can.clock.freq = cfg->clock_freq; + mcan_class->can.clock.freq = id->driver_data; mcan_class->ops = &m_can_pci_ops; pci_set_drvdata(pci, mcan_class); @@ -218,8 +178,8 @@ static SIMPLE_DEV_PM_OPS(m_can_pci_pm_ops, m_can_pci_suspend, m_can_pci_resume); static const struct pci_device_id m_can_pci_id_table[] = { - { PCI_VDEVICE(INTEL, 0x4bc1), (kernel_ulong_t)&m_can_pci_ehl, }, - { PCI_VDEVICE(INTEL, 0x4bc2), (kernel_ulong_t)&m_can_pci_ehl, }, + { PCI_VDEVICE(INTEL, 0x4bc1), M_CAN_CLOCK_FREQ_EHL, }, + { PCI_VDEVICE(INTEL, 0x4bc2), M_CAN_CLOCK_FREQ_EHL, }, { } /* Terminating Entry */ }; MODULE_DEVICE_TABLE(pci, m_can_pci_id_table); From d6da7881020f9b37edb80ada12ce9b50b9232dc1 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 12 May 2022 15:41:44 +0300 Subject: [PATCH 388/491] can: m_can: remove support for custom bit timing, take #2 Now when Intel Elkhart Lake uses again common bit timing and there are no other users for custom bit timing, we can bring back the changes done by the commit 0ddd83fbebbc ("can: m_can: remove support for custom bit timing"). This effectively reverts commit ea768b2ffec6 ("Revert "can: m_can: remove support for custom bit timing"") while taking into account commit ea22ba40debe ("can: m_can: make custom bittiming fields const") and commit 7d4a101c0bd3 ("can: dev: add sanity check in can_set_static_ctrlmode()"). Link: https://lore.kernel.org/all/20220512124144.536850-2-jarkko.nikula@linux.intel.com Signed-off-by: Jarkko Nikula Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 24 ++++++------------------ drivers/net/can/m_can/m_can.h | 3 --- 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index b3b5bc1c803b3..088bb1bcf1efb 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1495,34 +1495,22 @@ static int m_can_dev_setup(struct m_can_classdev *cdev) err = can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); if (err) return err; - cdev->can.bittiming_const = cdev->bit_timing ? - cdev->bit_timing : &m_can_bittiming_const_30X; - - cdev->can.data_bittiming_const = cdev->data_timing ? - cdev->data_timing : - &m_can_data_bittiming_const_30X; + cdev->can.bittiming_const = &m_can_bittiming_const_30X; + cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X; break; case 31: /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */ err = can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); if (err) return err; - cdev->can.bittiming_const = cdev->bit_timing ? - cdev->bit_timing : &m_can_bittiming_const_31X; - - cdev->can.data_bittiming_const = cdev->data_timing ? - cdev->data_timing : - &m_can_data_bittiming_const_31X; + cdev->can.bittiming_const = &m_can_bittiming_const_31X; + cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X; break; case 32: case 33: /* Support both MCAN version v3.2.x and v3.3.0 */ - cdev->can.bittiming_const = cdev->bit_timing ? - cdev->bit_timing : &m_can_bittiming_const_31X; - - cdev->can.data_bittiming_const = cdev->data_timing ? - cdev->data_timing : - &m_can_data_bittiming_const_31X; + cdev->can.bittiming_const = &m_can_bittiming_const_31X; + cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X; cdev->can.ctrlmode_supported |= (m_can_niso_supported(cdev) ? diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index 2c5d409971686..d18b515e6ccc7 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -85,9 +85,6 @@ struct m_can_classdev { struct sk_buff *tx_skb; struct phy *transceiver; - const struct can_bittiming_const *bit_timing; - const struct can_bittiming_const *data_timing; - struct m_can_ops *ops; int version; From 5163373af195f10e0d99a8de3465c4ed36bdc337 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 May 2022 22:14:24 +0100 Subject: [PATCH 389/491] KVM: arm64: vgic-v3: Consistently populate ID_AA64PFR0_EL1.GIC When adding support for the slightly wonky Apple M1, we had to populate ID_AA64PFR0_EL1.GIC==1 to present something to the guest, as the HW itself doesn't advertise the feature. However, we gated this on the in-kernel irqchip being created. This causes some trouble for QEMU, which snapshots the state of the registers before creating a virtual GIC, and then tries to restore these registers once the GIC has been created. Obviously, between the two stages, ID_AA64PFR0_EL1.GIC has changed value, and the write fails. The fix is to actually emulate the HW, and always populate the field if the HW is capable of it. Fixes: 562e530fd770 ("KVM: arm64: Force ID_AA64PFR0_EL1.GIC=1 when exposing a virtual GICv3") Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Reported-by: Peter Maydell Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20220503211424.3375263-1-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7b45c040cc27f..adf408c09cdb8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1123,8 +1123,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); - if (irqchip_in_kernel(vcpu->kvm) && - vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + if (kvm_vgic_global_state.type == VGIC_V3) { val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1); } From 2e40316753ee552fb598e8da8ca0d20a04e67453 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 13 May 2022 09:26:07 +0000 Subject: [PATCH 390/491] KVM: arm64: Don't hypercall before EL2 init Will reported the following splat when running with Protected KVM enabled: [ 2.427181] ------------[ cut here ]------------ [ 2.427668] WARNING: CPU: 3 PID: 1 at arch/arm64/kvm/mmu.c:489 __create_hyp_private_mapping+0x118/0x1ac [ 2.428424] Modules linked in: [ 2.429040] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.18.0-rc2-00084-g8635adc4efc7 #1 [ 2.429589] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 [ 2.430286] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 2.430734] pc : __create_hyp_private_mapping+0x118/0x1ac [ 2.431091] lr : create_hyp_exec_mappings+0x40/0x80 [ 2.431377] sp : ffff80000803baf0 [ 2.431597] x29: ffff80000803bb00 x28: 0000000000000000 x27: 0000000000000000 [ 2.432156] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 [ 2.432561] x23: ffffcd96c343b000 x22: 0000000000000000 x21: ffff80000803bb40 [ 2.433004] x20: 0000000000000004 x19: 0000000000001800 x18: 0000000000000000 [ 2.433343] x17: 0003e68cf7efdd70 x16: 0000000000000004 x15: fffffc81f602a2c8 [ 2.434053] x14: ffffdf8380000000 x13: ffffcd9573200000 x12: ffffcd96c343b000 [ 2.434401] x11: 0000000000000004 x10: ffffcd96c1738000 x9 : 0000000000000004 [ 2.434812] x8 : ffff80000803bb40 x7 : 7f7f7f7f7f7f7f7f x6 : 544f422effff306b [ 2.435136] x5 : 000000008020001e x4 : ffff207d80a88c00 x3 : 0000000000000005 [ 2.435480] x2 : 0000000000001800 x1 : 000000014f4ab800 x0 : 000000000badca11 [ 2.436149] Call trace: [ 2.436600] __create_hyp_private_mapping+0x118/0x1ac [ 2.437576] create_hyp_exec_mappings+0x40/0x80 [ 2.438180] kvm_init_vector_slots+0x180/0x194 [ 2.458941] kvm_arch_init+0x80/0x274 [ 2.459220] kvm_init+0x48/0x354 [ 2.459416] arm_init+0x20/0x2c [ 2.459601] do_one_initcall+0xbc/0x238 [ 2.459809] do_initcall_level+0x94/0xb4 [ 2.460043] do_initcalls+0x54/0x94 [ 2.460228] do_basic_setup+0x1c/0x28 [ 2.460407] kernel_init_freeable+0x110/0x178 [ 2.460610] kernel_init+0x20/0x1a0 [ 2.460817] ret_from_fork+0x10/0x20 [ 2.461274] ---[ end trace 0000000000000000 ]--- Indeed, the Protected KVM mode promotes __create_hyp_private_mapping() to a hypercall as EL1 no longer has access to the hypervisor's stage-1 page-table. However, the call from kvm_init_vector_slots() happens after pKVM has been initialized on the primary CPU, but before it has been initialized on secondaries. As such, if the KVM initcall procedure is migrated from one CPU to another in this window, the hypercall may end up running on a CPU for which EL2 has not been initialized. Fortunately, the pKVM hypervisor doesn't rely on the host to re-map the vectors in the private range, so the hypercall in question is in fact superfluous. Skip it when pKVM is enabled. Reported-by: Will Deacon Signed-off-by: Quentin Perret [maz: simplified the checks slightly] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513092607.35233-1-qperret@google.com --- arch/arm64/kvm/arm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 523bc934fe2f6..a66d83540c15a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1436,7 +1436,8 @@ static int kvm_init_vector_slots(void) base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); - if (kvm_system_needs_idmapped_vectors() && !has_vhe()) { + if (kvm_system_needs_idmapped_vectors() && + !is_protected_kvm_enabled()) { err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), __BP_HARDEN_HYP_VECS_SZ, &base); if (err) From 42226c989789d8da4af1de0c31070c96726d990c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 May 2022 18:08:58 -0700 Subject: [PATCH 391/491] Linux 5.18-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2284d1ca25039..5033c0577c6da 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Superb Owl # *DOCUMENTATION* From 54395a33718af1c04b5098203335b25382291a16 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Tue, 10 May 2022 17:08:47 -0700 Subject: [PATCH 392/491] drm/i915/dmc: Add MMIO range restrictions Bspec has added some steps that check forDMC MMIO range before programming them v2: Fix for CI v3: move register defines to .h (Anusha) - Check MMIO restrictions per pipe - Add MMIO restricton for v1 dmc header as well (Lucas) v4: s/_PICK/_PICK_EVEN and use it only for Pipe DMC scenario. - clean up sanity check logic.(Lucas) - Add MMIO range for RKL as well.(Anusha) v5: Use DISPLAY_VER instead of per platform check (Lucas) BSpec: 49193 Cc: stable@vger.kernel.org Cc: Lucas De Marchi Signed-off-by: Anusha Srivatsa Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220511000847.1068302-1-anusha.srivatsa@intel.com (cherry picked from commit 21c47196aec3a93f913a7515e1e7b30e6c54d6c6) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dmc.c | 44 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 16 +++++++++ 2 files changed, 60 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 7616a3906b9ec..1b774dcfb2819 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -367,6 +367,44 @@ static void dmc_set_fw_offset(struct intel_dmc *dmc, } } +static bool dmc_mmio_addr_sanity_check(struct intel_dmc *dmc, + const u32 *mmioaddr, u32 mmio_count, + int header_ver, u8 dmc_id) +{ + struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc); + u32 start_range, end_range; + int i; + + if (dmc_id >= DMC_FW_MAX) { + drm_warn(&i915->drm, "Unsupported firmware id %u\n", dmc_id); + return false; + } + + if (header_ver == 1) { + start_range = DMC_MMIO_START_RANGE; + end_range = DMC_MMIO_END_RANGE; + } else if (dmc_id == DMC_FW_MAIN) { + start_range = TGL_MAIN_MMIO_START; + end_range = TGL_MAIN_MMIO_END; + } else if (DISPLAY_VER(i915) >= 13) { + start_range = ADLP_PIPE_MMIO_START; + end_range = ADLP_PIPE_MMIO_END; + } else if (DISPLAY_VER(i915) >= 12) { + start_range = TGL_PIPE_MMIO_START(dmc_id); + end_range = TGL_PIPE_MMIO_END(dmc_id); + } else { + drm_warn(&i915->drm, "Unknown mmio range for sanity check"); + return false; + } + + for (i = 0; i < mmio_count; i++) { + if (mmioaddr[i] < start_range || mmioaddr[i] > end_range) + return false; + } + + return true; +} + static u32 parse_dmc_fw_header(struct intel_dmc *dmc, const struct intel_dmc_header_base *dmc_header, size_t rem_size, u8 dmc_id) @@ -436,6 +474,12 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, return 0; } + if (!dmc_mmio_addr_sanity_check(dmc, mmioaddr, mmio_count, + dmc_header->header_ver, dmc_id)) { + drm_err(&i915->drm, "DMC firmware has Wrong MMIO Addresses\n"); + return 0; + } + for (i = 0; i < mmio_count; i++) { dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]); dmc_info->mmiodata[i] = mmiodata[i]; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a9354f8f110d8..fe960c2043621 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5501,6 +5501,22 @@ /* MMIO address range for DMC program (0x80000 - 0x82FFF) */ #define DMC_MMIO_START_RANGE 0x80000 #define DMC_MMIO_END_RANGE 0x8FFFF +#define DMC_V1_MMIO_START_RANGE 0x80000 +#define TGL_MAIN_MMIO_START 0x8F000 +#define TGL_MAIN_MMIO_END 0x8FFFF +#define _TGL_PIPEA_MMIO_START 0x92000 +#define _TGL_PIPEA_MMIO_END 0x93FFF +#define _TGL_PIPEB_MMIO_START 0x96000 +#define _TGL_PIPEB_MMIO_END 0x97FFF +#define ADLP_PIPE_MMIO_START 0x5F000 +#define ADLP_PIPE_MMIO_END 0x5FFFF + +#define TGL_PIPE_MMIO_START(dmc_id) _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_START,\ + _TGL_PIPEB_MMIO_START) + +#define TGL_PIPE_MMIO_END(dmc_id) _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_END,\ + _TGL_PIPEB_MMIO_END) + #define SKL_DMC_DC3_DC5_COUNT _MMIO(0x80030) #define SKL_DMC_DC5_DC6_COUNT _MMIO(0x8002C) #define BXT_DMC_DC3_DC5_COUNT _MMIO(0x80038) From 89e96d822bd51f7afe2d3e95a34099480b5c3d55 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 25 Apr 2022 17:30:45 -0700 Subject: [PATCH 393/491] i915/guc/reset: Make __guc_reset_context aware of guilty engines There are 2 ways an engine can get reset in i915 and the method of reset affects how KMD labels a context as guilty/innocent. (1) GuC initiated engine-reset: GuC resets a hung engine and notifies KMD. The context that hung on the engine is marked guilty and all other contexts are innocent. The innocent contexts are resubmitted. (2) GT based reset: When an engine heartbeat fails to tick, KMD initiates a gt/chip reset. All active contexts are marked as guilty and discarded. In order to correctly mark the contexts as guilty/innocent, pass a mask of engines that were reset to __guc_reset_context. Fixes: eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Alan Previn Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220426003045.3929439-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit 303760aa914b7f5ac9602dbb4b471a2ad52eeb3e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 16 ++++++++-------- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 82713264b96c1..b7c6d4462ec55 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -806,7 +806,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) __intel_engine_reset(engine, stalled_mask & engine->mask); local_bh_enable(); - intel_uc_reset(>->uc, true); + intel_uc_reset(>->uc, ALL_ENGINES); intel_ggtt_restore_fences(gt->ggtt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index bf7079480d472..2488d1197f3e5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -438,7 +438,7 @@ int intel_guc_global_policies_update(struct intel_guc *guc); void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq); void intel_guc_submission_reset_prepare(struct intel_guc *guc); -void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); +void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled); void intel_guc_submission_reset_finish(struct intel_guc *guc); void intel_guc_submission_cancel_requests(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1ce7e04aa837b..28f9aac0201dd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1590,9 +1590,9 @@ __unwind_incomplete_requests(struct intel_context *ce) spin_unlock_irqrestore(&sched_engine->lock, flags); } -static void __guc_reset_context(struct intel_context *ce, bool stalled) +static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) { - bool local_stalled; + bool guilty; struct i915_request *rq; unsigned long flags; u32 head; @@ -1620,7 +1620,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) if (!intel_context_is_pinned(ce)) goto next_context; - local_stalled = false; + guilty = false; rq = intel_context_find_active_request(ce); if (!rq) { head = ce->ring->tail; @@ -1628,14 +1628,14 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) } if (i915_request_started(rq)) - local_stalled = true; + guilty = stalled & ce->engine->mask; GEM_BUG_ON(i915_active_is_idle(&ce->active)); head = intel_ring_wrap(ce->ring, rq->head); - __i915_request_reset(rq, local_stalled && stalled); + __i915_request_reset(rq, guilty); out_replay: - guc_reset_state(ce, head, local_stalled && stalled); + guc_reset_state(ce, head, guilty); next_context: if (i != number_children) ce = list_next_entry(ce, parallel.child_link); @@ -1645,7 +1645,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) intel_context_put(parent); } -void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) +void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) { struct intel_context *ce; unsigned long index; @@ -4013,7 +4013,7 @@ static void guc_context_replay(struct intel_context *ce) { struct i915_sched_engine *sched_engine = ce->engine->sched_engine; - __guc_reset_context(ce, true); + __guc_reset_context(ce, ce->engine->mask); tasklet_hi_schedule(&sched_engine->tasklet); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index da199aa6989fb..8eb34de2f20c0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -593,7 +593,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc) __uc_sanitize(uc); } -void intel_uc_reset(struct intel_uc *uc, bool stalled) +void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled) { struct intel_guc *guc = &uc->guc; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 866b462821c00..a8f38c2c60e23 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -42,7 +42,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc); void intel_uc_driver_remove(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc); void intel_uc_reset_prepare(struct intel_uc *uc); -void intel_uc_reset(struct intel_uc *uc, bool stalled); +void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled); void intel_uc_reset_finish(struct intel_uc *uc); void intel_uc_cancel_requests(struct intel_uc *uc); void intel_uc_suspend(struct intel_uc *uc); From e6175a2ed1f18bf2f649625bf725e07adcfa6a28 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 13 May 2022 23:34:02 +0300 Subject: [PATCH 394/491] xfrm: fix "disable_policy" flag use when arriving from different devices In IPv4 setting the "disable_policy" flag on a device means no policy should be enforced for traffic originating from the device. This was implemented by seting the DST_NOPOLICY flag in the dst based on the originating device. However, dsts are cached in nexthops regardless of the originating devices, in which case, the DST_NOPOLICY flag value may be incorrect. Consider the following setup: +------------------------------+ | ROUTER | +-------------+ | +-----------------+ | | ipsec src |----|-|ipsec0 | | +-------------+ | |disable_policy=0 | +----+ | | +-----------------+ |eth1|-|----- +-------------+ | +-----------------+ +----+ | | noipsec src |----|-|eth0 | | +-------------+ | |disable_policy=1 | | | +-----------------+ | +------------------------------+ Where ROUTER has a default route towards eth1. dst entries for traffic arriving from eth0 would have DST_NOPOLICY and would be cached and therefore can be reused by traffic originating from ipsec0, skipping policy check. Fix by setting a IPSKB_NOPOLICY flag in IPCB and observing it instead of the DST in IN/FWD IPv4 policy checks. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Shmulik Ladkani Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/ip.h | 1 + include/net/xfrm.h | 14 +++++++++++++- net/ipv4/route.c | 23 ++++++++++++++++++----- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 3984f2c39c4ba..0161137914cf9 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -56,6 +56,7 @@ struct inet_skb_parm { #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) #define IPSKB_L3SLAVE BIT(7) +#define IPSKB_NOPOLICY BIT(8) u16 frag_max_size; }; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6fb899ff5afce..d2efddce65d46 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1093,6 +1093,18 @@ static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb, return false; } +static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb, + int dir, unsigned short family) +{ + if (dir != XFRM_POLICY_OUT && family == AF_INET) { + /* same dst may be used for traffic originating from + * devices with different policy settings. + */ + return IPCB(skb)->flags & IPSKB_NOPOLICY; + } + return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY); +} + static inline int __xfrm_policy_check2(struct sock *sk, int dir, struct sk_buff *skb, unsigned int family, int reverse) @@ -1104,7 +1116,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, return __xfrm_policy_check(sk, ndir, skb, family); return __xfrm_check_nopolicy(net, skb, dir) || - (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || + __xfrm_check_dev_nopolicy(skb, dir, family) || __xfrm_policy_check(sk, ndir, skb, family); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98c6f34295931..fe5d14ef5c4d4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1726,6 +1726,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct in_device *in_dev = __in_dev_get_rcu(dev); unsigned int flags = RTCF_MULTICAST; struct rtable *rth; + bool no_policy; u32 itag = 0; int err; @@ -1736,8 +1737,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (our) flags |= RTCF_LOCAL; + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, - IN_DEV_ORCONF(in_dev, NOPOLICY), false); + no_policy, false); if (!rth) return -ENOBUFS; @@ -1795,7 +1800,7 @@ static int __mkroute_input(struct sk_buff *skb, struct rtable *rth; int err; struct in_device *out_dev; - bool do_cache; + bool do_cache, no_policy; u32 itag = 0; /* get a working reference to the output device */ @@ -1840,6 +1845,10 @@ static int __mkroute_input(struct sk_buff *skb, } } + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + fnhe = find_exception(nhc, daddr); if (do_cache) { if (fnhe) @@ -1852,8 +1861,7 @@ static int __mkroute_input(struct sk_buff *skb, } } - rth = rt_dst_alloc(out_dev->dev, 0, res->type, - IN_DEV_ORCONF(in_dev, NOPOLICY), + rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy, IN_DEV_ORCONF(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; @@ -2228,6 +2236,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct rtable *rth; struct flowi4 fl4; bool do_cache = true; + bool no_policy; /* IP on this device is disabled. */ @@ -2346,6 +2355,10 @@ out: return err; RT_CACHE_STAT_INC(in_brd); local_input: + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + do_cache &= res->fi && !itag; if (do_cache) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); @@ -2360,7 +2373,7 @@ out: return err; rth = rt_dst_alloc(ip_rt_get_dev(net, res), flags | RTCF_LOCAL, res->type, - IN_DEV_ORCONF(in_dev, NOPOLICY), false); + no_policy, false); if (!rth) goto e_nobufs; From 024a7ad9eb4df626ca8c77fef4f67fd0ebd559d2 Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Fri, 13 May 2022 20:16:45 +0800 Subject: [PATCH 395/491] ALSA: hda/realtek: fix right sounds and mute/micmute LEDs for HP machine The HP EliteBook 630 is using ALC236 codec which used 0x02 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20220513121648.28584-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ce2cb1986677b..ad292df7d805c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9091,6 +9091,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8995, "HP EliteBook 855 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x89a4, "HP ProBook 440 G9", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89a6, "HP ProBook 450 G9", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89ac, "HP EliteBook 640 G9", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89ae, "HP EliteBook 650 G9", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89c3, "Zbook Studio G9", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), From 5c62383c06837b5719cd5447a5758b791279e653 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 May 2022 12:31:12 +0200 Subject: [PATCH 396/491] ALSA: usb-audio: Restore Rane SL-1 quirk At cleaning up and moving the device rename from the quirk table to its own table, we removed the entry for Rane SL-1 as we thought it's only for renaming. It turned out, however, that the quirk is required for matching with the device that declares itself as no standard audio but only as vendor-specific. Restore the quirk entry for Rane SL-1 to fix the regression. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215887 Fixes: 5436f59bc5bc ("ALSA: usb-audio: Move device rename and profile quirks to an internal table") Cc: Link: https://lore.kernel.org/r/20220516103112.12950-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 0ea39565e6232..40a5e3eb4ef26 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3235,6 +3235,15 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +/* Rane SL-1 */ +{ + USB_DEVICE(0x13e5, 0x0001), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + } +}, + /* disabled due to regression for other devices; * see https://bugzilla.kernel.org/show_bug.cgi?id=199905 */ From 4d42d54a7d6aa6d29221d3fd4f2ae9503e94f011 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 13 May 2022 11:27:06 +0200 Subject: [PATCH 397/491] net/sched: act_pedit: sanitize shift argument before usage syzbot was able to trigger an Out-of-Bound on the pedit action: UBSAN: shift-out-of-bounds in net/sched/act_pedit.c:238:43 shift exponent 1400735974 is too large for 32-bit type 'unsigned int' CPU: 0 PID: 3606 Comm: syz-executor151 Not tainted 5.18.0-rc5-syzkaller-00165-g810c2f0a3f86 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 ubsan_epilogue+0xb/0x50 lib/ubsan.c:151 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x187 lib/ubsan.c:322 tcf_pedit_init.cold+0x1a/0x1f net/sched/act_pedit.c:238 tcf_action_init_1+0x414/0x690 net/sched/act_api.c:1367 tcf_action_init+0x530/0x8d0 net/sched/act_api.c:1432 tcf_action_add+0xf9/0x480 net/sched/act_api.c:1956 tc_ctl_action+0x346/0x470 net/sched/act_api.c:2015 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5993 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2502 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:725 ____sys_sendmsg+0x6e2/0x800 net/socket.c:2413 ___sys_sendmsg+0xf3/0x170 net/socket.c:2467 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2496 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fe36e9e1b59 Code: 28 c3 e8 2a 14 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffef796fe88 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fe36e9e1b59 RDX: 0000000000000000 RSI: 0000000020000300 RDI: 0000000000000003 RBP: 00007fe36e9a5d00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe36e9a5d90 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 The 'shift' field is not validated, and any value above 31 will trigger out-of-bounds. The issue predates the git history, but syzbot was able to trigger it only after the commit mentioned in the fixes tag, and this change only applies on top of such commit. Address the issue bounding the 'shift' value to the maximum allowed by the relevant operator. Reported-and-tested-by: syzbot+8ed8fc4c57e9dcf23ca6@syzkaller.appspotmail.com Fixes: 8b796475fd78 ("net/sched: act_pedit: really ensure the skb is writable") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 0eaaf1f45de17..211c757bfc3c4 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -232,6 +232,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, for (i = 0; i < p->tcfp_nkeys; ++i) { u32 cur = p->tcfp_keys[i].off; + /* sanitize the shift value for any later use */ + p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1, + p->tcfp_keys[i].shift); + /* The AT option can read a single byte, we can bound the actual * value with uchar max. */ From 396ef64113a8ba01c46315d67a99db8dde3eef51 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 May 2022 14:26:13 +0200 Subject: [PATCH 398/491] netfilter: flowtable: fix excessive hw offload attempts after failure If a flow cannot be offloaded, the code currently repeatedly tries again as quickly as possible, which can significantly increase system load. Fix this by limiting flow timeout update and hardware offload retry to once per second. Fixes: c07531c01d82 ("netfilter: flowtable: Remove redundant hw refresh bit") Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 3db256da919ba..20b4a14e5d4ea 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -335,8 +335,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, u32 timeout; timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); - if (READ_ONCE(flow->timeout) != timeout) + if (timeout - READ_ONCE(flow->timeout) > HZ) WRITE_ONCE(flow->timeout, timeout); + else + return; if (likely(!nf_flowtable_hw_offload(flow_table))) return; From 45ca3e61999e9a30ca2b7cfbf9da8a9f8d13be31 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 May 2022 14:26:14 +0200 Subject: [PATCH 399/491] netfilter: nft_flow_offload: skip dst neigh lookup for ppp devices The dst entry does not contain a valid hardware address, so skip the lookup in order to avoid running into errors here. The proper hardware address is filled in from nft_dev_path_info Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 900d48c810a12..d88de26aad758 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route, route->tuple[dir].xmit_type = nft_xmit_type(dst_cache); } +static bool nft_is_valid_ether_device(const struct net_device *dev) +{ + if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || + dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) + return false; + + return true; +} + static int nft_dev_fill_forward_path(const struct nf_flow_route *route, const struct dst_entry *dst_cache, const struct nf_conn *ct, @@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route, struct neighbour *n; u8 nud_state; + if (!nft_is_valid_ether_device(dev)) + goto out; + n = dst_neigh_lookup(dst_cache, daddr); if (!n) return -1; @@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route, if (!(nud_state & NUD_VALID)) return -1; +out: return dev_fill_forward_path(dev, ha, stack); } @@ -78,15 +91,6 @@ struct nft_forward_info { enum flow_offload_xmit_type xmit_type; }; -static bool nft_is_valid_ether_device(const struct net_device *dev) -{ - if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || - dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) - return false; - - return true; -} - static void nft_dev_path_info(const struct net_device_path_stack *stack, struct nft_forward_info *info, unsigned char *ha, struct nf_flowtable *flowtable) From cf2df74e202d81b09f09d84c2d8903e0e87e9274 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 May 2022 14:26:15 +0200 Subject: [PATCH 400/491] net: fix dev_fill_forward_path with pppoe + bridge When calling dev_fill_forward_path on a pppoe device, the provided destination address is invalid. In order for the bridge fdb lookup to succeed, the pppoe code needs to update ctx->daddr to the correct value. Fix this by storing the address inside struct net_device_path_ctx Fixes: f6efc675c9dd ("net: ppp: resolve forwarding path for bridge pppoe devices") Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso --- drivers/net/ppp/pppoe.c | 1 + include/linux/netdevice.h | 2 +- net/core/dev.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 3619520340b74..e172743948ed7 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx, path->encap.proto = htons(ETH_P_PPP_SES); path->encap.id = be16_to_cpu(po->num); memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN); + memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN); path->dev = ctx->dev; ctx->dev = dev; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b1fbe21650bb5..f736c020cde27 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -900,7 +900,7 @@ struct net_device_path_stack { struct net_device_path_ctx { const struct net_device *dev; - const u8 *daddr; + u8 daddr[ETH_ALEN]; int num_vlans; struct { diff --git a/net/core/dev.c b/net/core/dev.c index 1461c2d9dec80..2771fd22dc6ae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -681,11 +681,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, const struct net_device *last_dev; struct net_device_path_ctx ctx = { .dev = dev, - .daddr = daddr, }; struct net_device_path *path; int ret = 0; + memcpy(ctx.daddr, daddr, sizeof(ctx.daddr)); stack->num_paths = 0; while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) { last_dev = ctx.dev; From 2456074935003b66c40f78df6adfc722435d43ea Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 May 2022 14:26:16 +0200 Subject: [PATCH 401/491] netfilter: nft_flow_offload: fix offload with pppoe + vlan When running a combination of PPPoE on top of a VLAN, we need to set info->outdev to the PPPoE device, otherwise PPPoE encap is skipped during software offload. Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index d88de26aad758..187b8cb9a5103 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -123,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack, info->indev = NULL; break; } - info->outdev = path->dev; + if (!info->outdev) + info->outdev = path->dev; info->encap[info->num_encaps].id = path->encap.id; info->encap[info->num_encaps].proto = path->encap.proto; info->num_encaps++; From da2172a9bfec858ceeb0271b9d444378490398c8 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Fri, 13 May 2022 15:52:31 -0700 Subject: [PATCH 402/491] ptp: ocp: have adjtime handle negative delta_ns correctly delta_ns is a s64, but it was being passed ptp_ocp_adjtime_coarse as an u64. Also, it turns out that timespec64_add_ns() only handles positive values, so perform the math with set_normalized_timespec(). Fixes: 90f8f4c0e3ce ("ptp: ocp: Add ptp_ocp_adjtime_coarse for large adjustments") Suggested-by: Vadim Fedorenko Signed-off-by: Jonathan Lemon Acked-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20220513225231.1412-1-jonathan.lemon@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index dd45471f67808..36c0e188216b1 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -841,7 +841,7 @@ __ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u32 adj_val) } static void -ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, u64 delta_ns) +ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, s64 delta_ns) { struct timespec64 ts; unsigned long flags; @@ -850,7 +850,8 @@ ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, u64 delta_ns) spin_lock_irqsave(&bp->lock, flags); err = __ptp_ocp_gettime_locked(bp, &ts, NULL); if (likely(!err)) { - timespec64_add_ns(&ts, delta_ns); + set_normalized_timespec64(&ts, ts.tv_sec, + ts.tv_nsec + delta_ns); __ptp_ocp_settime_locked(bp, &ts); } spin_unlock_irqrestore(&bp->lock, flags); From a3b69dd0ad6265c29c4b6fb381cd76fb3bebdf8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sun, 15 May 2022 14:58:15 +0200 Subject: [PATCH 403/491] Revert "PCI: aardvark: Rewrite IRQ code to chained IRQ handler" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 1571d67dc190e50c6c56e8f88cdc39f7cc53166e. This commit broke support for setting interrupt affinity. It looks like that it is related to the chained IRQ handler. Revert this commit until issue with setting interrupt affinity is fixed. Fixes: 1571d67dc190 ("PCI: aardvark: Rewrite IRQ code to chained IRQ handler") Link: https://lore.kernel.org/r/20220515125815.30157-1-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pci-aardvark.c | 48 ++++++++++++--------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 09d9bf465d727..ffec82c8a523f 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -272,7 +272,6 @@ struct advk_pcie { u32 actions; } wins[OB_WIN_COUNT]; u8 wins_count; - int irq; struct irq_domain *rp_irq_domain; struct irq_domain *irq_domain; struct irq_chip irq_chip; @@ -1570,26 +1569,21 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie) } } -static void advk_pcie_irq_handler(struct irq_desc *desc) +static irqreturn_t advk_pcie_irq_handler(int irq, void *arg) { - struct advk_pcie *pcie = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_desc_get_chip(desc); - u32 val, mask, status; + struct advk_pcie *pcie = arg; + u32 status; - chained_irq_enter(chip, desc); + status = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG); + if (!(status & PCIE_IRQ_CORE_INT)) + return IRQ_NONE; - val = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG); - mask = advk_readl(pcie, HOST_CTRL_INT_MASK_REG); - status = val & ((~mask) & PCIE_IRQ_ALL_MASK); + advk_pcie_handle_int(pcie); - if (status & PCIE_IRQ_CORE_INT) { - advk_pcie_handle_int(pcie); + /* Clear interrupt */ + advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG); - /* Clear interrupt */ - advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG); - } - - chained_irq_exit(chip, desc); + return IRQ_HANDLED; } static int advk_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) @@ -1669,7 +1663,7 @@ static int advk_pcie_probe(struct platform_device *pdev) struct advk_pcie *pcie; struct pci_host_bridge *bridge; struct resource_entry *entry; - int ret; + int ret, irq; bridge = devm_pci_alloc_host_bridge(dev, sizeof(struct advk_pcie)); if (!bridge) @@ -1755,9 +1749,17 @@ static int advk_pcie_probe(struct platform_device *pdev) if (IS_ERR(pcie->base)) return PTR_ERR(pcie->base); - pcie->irq = platform_get_irq(pdev, 0); - if (pcie->irq < 0) - return pcie->irq; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = devm_request_irq(dev, irq, advk_pcie_irq_handler, + IRQF_SHARED | IRQF_NO_THREAD, "advk-pcie", + pcie); + if (ret) { + dev_err(dev, "Failed to register interrupt\n"); + return ret; + } pcie->reset_gpio = devm_gpiod_get_from_of_node(dev, dev->of_node, "reset-gpios", 0, @@ -1814,15 +1816,12 @@ static int advk_pcie_probe(struct platform_device *pdev) return ret; } - irq_set_chained_handler_and_data(pcie->irq, advk_pcie_irq_handler, pcie); - bridge->sysdata = pcie; bridge->ops = &advk_pcie_ops; bridge->map_irq = advk_pcie_map_irq; ret = pci_host_probe(bridge); if (ret < 0) { - irq_set_chained_handler_and_data(pcie->irq, NULL, NULL); advk_pcie_remove_rp_irq_domain(pcie); advk_pcie_remove_msi_irq_domain(pcie); advk_pcie_remove_irq_domain(pcie); @@ -1871,9 +1870,6 @@ static int advk_pcie_remove(struct platform_device *pdev) advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG); advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG); - /* Remove IRQ handler */ - irq_set_chained_handler_and_data(pcie->irq, NULL, NULL); - /* Remove IRQ domains */ advk_pcie_remove_rp_irq_domain(pcie); advk_pcie_remove_msi_irq_domain(pcie); From 88110a9f6209be1ed7312bae029d000bc7c7e88f Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 28 Apr 2022 20:30:10 +0200 Subject: [PATCH 404/491] clk: bcm2835: fix bcm2835_clock_choose_div The commit 09e3b18ca5de ("clk: bcm2835: Remove unused variable") accidentially breaks the behavior of bcm2835_clock_choose_div() and booting of Raspberry Pi. The removed do_div macro call had side effects, so we need to restore it. Fixes: 09e3b18ca5de ("clk: bcm2835: Remove unused variable") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20220428183010.1635248-1-stefan.wahren@i2se.com Tested-by: Maxime Ripard Acked-by: Maxime Ripard Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-bcm2835.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 3ad20e75fd23f..48a1eb9f2d551 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -941,6 +941,7 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw, u64 temp = (u64)parent_rate << CM_DIV_FRAC_BITS; u32 div, mindiv, maxdiv; + do_div(temp, rate); div = temp; div &= ~unused_frac_mask; From 60d9f050da63b71392810af9b69ca8de6d23a17a Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Wed, 11 May 2022 22:02:06 +0200 Subject: [PATCH 405/491] Revert "clk: sunxi-ng: sun6i-rtc: Add support for H6" This reverts commit 1738890a3165ccd0da98ebd3e2d5f9b230d5afa8. Commit 1738890a3165 ("clk: sunxi-ng: sun6i-rtc: Add support for H6") breaks HDMI output on Tanix TX6 mini board. Exact reason isn't known, but because that commit doesn't actually improve anything, let's just revert it. Cc: stable@vger.kernel.org Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20220511200206.2458274-1-jernej.skrabec@gmail.com Signed-off-by: Stephen Boyd --- drivers/clk/sunxi-ng/ccu-sun6i-rtc.c | 15 --------------- drivers/rtc/rtc-sun6i.c | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c b/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c index 2f3ddc908ebd3..d65398497d5f6 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c @@ -298,10 +298,6 @@ static const struct sunxi_ccu_desc sun6i_rtc_ccu_desc = { .hw_clks = &sun6i_rtc_ccu_hw_clks, }; -static const struct clk_parent_data sun50i_h6_osc32k_fanout_parents[] = { - { .hw = &osc32k_clk.common.hw }, -}; - static const struct clk_parent_data sun50i_h616_osc32k_fanout_parents[] = { { .hw = &osc32k_clk.common.hw }, { .fw_name = "pll-32k" }, @@ -314,13 +310,6 @@ static const struct clk_parent_data sun50i_r329_osc32k_fanout_parents[] = { { .hw = &osc24M_32k_clk.common.hw } }; -static const struct sun6i_rtc_match_data sun50i_h6_rtc_ccu_data = { - .have_ext_osc32k = true, - .have_iosc_calibration = true, - .osc32k_fanout_parents = sun50i_h6_osc32k_fanout_parents, - .osc32k_fanout_nparents = ARRAY_SIZE(sun50i_h6_osc32k_fanout_parents), -}; - static const struct sun6i_rtc_match_data sun50i_h616_rtc_ccu_data = { .have_iosc_calibration = true, .rtc_32k_single_parent = true, @@ -335,10 +324,6 @@ static const struct sun6i_rtc_match_data sun50i_r329_rtc_ccu_data = { }; static const struct of_device_id sun6i_rtc_ccu_match[] = { - { - .compatible = "allwinner,sun50i-h6-rtc", - .data = &sun50i_h6_rtc_ccu_data, - }, { .compatible = "allwinner,sun50i-h616-rtc", .data = &sun50i_h616_rtc_ccu_data, diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index 5b3e4da634061..5252ce4cbda4e 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -370,6 +370,23 @@ CLK_OF_DECLARE_DRIVER(sun8i_h3_rtc_clk, "allwinner,sun8i-h3-rtc", CLK_OF_DECLARE_DRIVER(sun50i_h5_rtc_clk, "allwinner,sun50i-h5-rtc", sun8i_h3_rtc_clk_init); +static const struct sun6i_rtc_clk_data sun50i_h6_rtc_data = { + .rc_osc_rate = 16000000, + .fixed_prescaler = 32, + .has_prescaler = 1, + .has_out_clk = 1, + .export_iosc = 1, + .has_losc_en = 1, + .has_auto_swt = 1, +}; + +static void __init sun50i_h6_rtc_clk_init(struct device_node *node) +{ + sun6i_rtc_clk_init(node, &sun50i_h6_rtc_data); +} +CLK_OF_DECLARE_DRIVER(sun50i_h6_rtc_clk, "allwinner,sun50i-h6-rtc", + sun50i_h6_rtc_clk_init); + /* * The R40 user manual is self-conflicting on whether the prescaler is * fixed or configurable. The clock diagram shows it as fixed, but there From af8ca6eaa9b24a90484218e356f959a94bff22fa Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 13 May 2022 20:00:30 +0200 Subject: [PATCH 406/491] net: lan966x: Fix assignment of the MAC address The following two scenarios were failing for lan966x. 1. If the port had the address X and then trying to assign the same address, then the HW was just removing this address because first it tries to learn new address and then delete the old one. As they are the same the HW remove it. 2. If the port eth0 was assigned the same address as one of the other ports eth1 then when assigning back the address to eth0 then the HW was deleting the address of eth1. The case 1. is fixed by checking if the port has already the same address while case 2. is fixed by checking if the address is used by any other port. Fixes: e18aba8941b40b ("net: lan966x: add mactable support") Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20220513180030.3076793-1-horatiu.vultur@microchip.com Signed-off-by: Paolo Abeni --- .../ethernet/microchip/lan966x/lan966x_main.c | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 95830e3e2b1fb..05f6dcc9dfd52 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -103,6 +103,24 @@ static int lan966x_create_targets(struct platform_device *pdev, return 0; } +static bool lan966x_port_unique_address(struct net_device *dev) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + int p; + + for (p = 0; p < lan966x->num_phys_ports; ++p) { + port = lan966x->ports[p]; + if (!port || port->dev == dev) + continue; + + if (ether_addr_equal(dev->dev_addr, port->dev->dev_addr)) + return false; + } + + return true; +} + static int lan966x_port_set_mac_address(struct net_device *dev, void *p) { struct lan966x_port *port = netdev_priv(dev); @@ -110,16 +128,26 @@ static int lan966x_port_set_mac_address(struct net_device *dev, void *p) const struct sockaddr *addr = p; int ret; + if (ether_addr_equal(addr->sa_data, dev->dev_addr)) + return 0; + /* Learn the new net device MAC address in the mac table. */ ret = lan966x_mac_cpu_learn(lan966x, addr->sa_data, HOST_PVID); if (ret) return ret; + /* If there is another port with the same address as the dev, then don't + * delete it from the MAC table + */ + if (!lan966x_port_unique_address(dev)) + goto out; + /* Then forget the previous one. */ ret = lan966x_mac_cpu_forget(lan966x, dev->dev_addr, HOST_PVID); if (ret) return ret; +out: eth_hw_addr_set(dev, addr->sa_data); return ret; } From ef6b1cd11962aec21c58d137006ab122dbc8d6fd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 15 May 2022 19:01:56 +0200 Subject: [PATCH 407/491] net: systemport: Fix an error handling path in bcm_sysport_probe() if devm_clk_get_optional() fails, we still need to go through the error handling path. Add the missing goto. Fixes: 6328a126896ea ("net: systemport: Manage Wake-on-LAN clock") Signed-off-by: Christophe JAILLET Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/99d70634a81c229885ae9e4ee69b2035749f7edc.1652634040.git.christophe.jaillet@wanadoo.fr Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bcmsysport.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 60dde29974bfe..df51be3cbe069 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2585,8 +2585,10 @@ static int bcm_sysport_probe(struct platform_device *pdev) device_set_wakeup_capable(&pdev->dev, 1); priv->wol_clk = devm_clk_get_optional(&pdev->dev, "sw_sysportwol"); - if (IS_ERR(priv->wol_clk)) - return PTR_ERR(priv->wol_clk); + if (IS_ERR(priv->wol_clk)) { + ret = PTR_ERR(priv->wol_clk); + goto err_deregister_fixed_link; + } /* Set the needed headroom once and for all */ BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8); From 4d33ab08c0af140752a46f227a6bf97dab1e17b4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 May 2022 21:37:27 -0400 Subject: [PATCH 408/491] xfrm: set dst dev to blackhole_netdev instead of loopback_dev in ifdown The global blackhole_netdev has replaced pernet loopback_dev to become the one given to the object that holds an netdev when ifdown in many places of ipv4 and ipv6 since commit 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to invalidate dst entries"). Especially after commit faab39f63c1f ("net: allow out-of-order netdev unregistration"), it's no longer safe to use loopback_dev that may be freed before other netdev. This patch is to set dst dev to blackhole_netdev instead of loopback_dev in ifdown. v1->v2: - add Fixes tag as Eric suggested. Fixes: faab39f63c1f ("net: allow out-of-order netdev unregistration") Signed-off-by: Xin Long Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/e8c87482998ca6fcdab214f5a9d582899ec0c648.1652665047.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 00bd0ecff5a1b..f1876ea61fdce 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3744,7 +3744,7 @@ static int stale_bundle(struct dst_entry *dst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) { - dst->dev = dev_net(dev)->loopback_dev; + dst->dev = blackhole_netdev; dev_hold(dst->dev); dev_put(dev); } From 9e7fef9521e73ca8afd7da9e58c14654b02dfad8 Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Sat, 14 May 2022 13:06:56 +0800 Subject: [PATCH 409/491] net: vmxnet3: fix possible use-after-free bugs in vmxnet3_rq_alloc_rx_buf() In vmxnet3_rq_alloc_rx_buf(), when dma_map_single() fails, rbi->skb is freed immediately. Similarly, in another branch, when dma_map_page() fails, rbi->page is also freed. In the two cases, vmxnet3_rq_alloc_rx_buf() returns an error to its callers vmxnet3_rq_init() -> vmxnet3_rq_init_all() -> vmxnet3_activate_dev(). Then vmxnet3_activate_dev() calls vmxnet3_rq_cleanup_all() in error handling code, and rbi->skb or rbi->page are freed again in vmxnet3_rq_cleanup_all(), causing use-after-free bugs. To fix these possible bugs, rbi->skb and rbi->page should be cleared after they are freed. The error log in our fault-injection testing is shown as follows: [ 14.319016] BUG: KASAN: use-after-free in consume_skb+0x2f/0x150 ... [ 14.321586] Call Trace: ... [ 14.325357] consume_skb+0x2f/0x150 [ 14.325671] vmxnet3_rq_cleanup_all+0x33a/0x4e0 [vmxnet3] [ 14.326150] vmxnet3_activate_dev+0xb9d/0x2ca0 [vmxnet3] [ 14.326616] vmxnet3_open+0x387/0x470 [vmxnet3] ... [ 14.361675] Allocated by task 351: ... [ 14.362688] __netdev_alloc_skb+0x1b3/0x6f0 [ 14.362960] vmxnet3_rq_alloc_rx_buf+0x1b0/0x8d0 [vmxnet3] [ 14.363317] vmxnet3_activate_dev+0x3e3/0x2ca0 [vmxnet3] [ 14.363661] vmxnet3_open+0x387/0x470 [vmxnet3] ... [ 14.367309] [ 14.367412] Freed by task 351: ... [ 14.368932] __dev_kfree_skb_any+0xd2/0xe0 [ 14.369193] vmxnet3_rq_alloc_rx_buf+0x71e/0x8d0 [vmxnet3] [ 14.369544] vmxnet3_activate_dev+0x3e3/0x2ca0 [vmxnet3] [ 14.369883] vmxnet3_open+0x387/0x470 [vmxnet3] [ 14.370174] __dev_open+0x28a/0x420 [ 14.370399] __dev_change_flags+0x192/0x590 [ 14.370667] dev_change_flags+0x7a/0x180 [ 14.370919] do_setlink+0xb28/0x3570 [ 14.371150] rtnl_newlink+0x1160/0x1740 [ 14.371399] rtnetlink_rcv_msg+0x5bf/0xa50 [ 14.371661] netlink_rcv_skb+0x1cd/0x3e0 [ 14.371913] netlink_unicast+0x5dc/0x840 [ 14.372169] netlink_sendmsg+0x856/0xc40 [ 14.372420] ____sys_sendmsg+0x8a7/0x8d0 [ 14.372673] __sys_sendmsg+0x1c2/0x270 [ 14.372914] do_syscall_64+0x41/0x90 [ 14.373145] entry_SYSCALL_64_after_hwframe+0x44/0xae ... Fixes: 5738a09d58d5a ("vmxnet3: fix checks for dma mapping errors") Reported-by: TOTE Robot Signed-off-by: Zixuan Fu Link: https://lore.kernel.org/r/20220514050656.2636588-1-r33s3n6@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index d9d90baac72a2..1154f18842128 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -589,6 +589,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { dev_kfree_skb_any(rbi->skb); + rbi->skb = NULL; rq->stats.rx_buf_alloc_failure++; break; } @@ -613,6 +614,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { put_page(rbi->page); + rbi->page = NULL; rq->stats.rx_buf_alloc_failure++; break; } From edf410cb74dc612fd47ef5be319c5a0bcd6e6ccd Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Sat, 14 May 2022 13:07:11 +0800 Subject: [PATCH 410/491] net: vmxnet3: fix possible NULL pointer dereference in vmxnet3_rq_cleanup() In vmxnet3_rq_create(), when dma_alloc_coherent() fails, vmxnet3_rq_destroy() is called. It sets rq->rx_ring[i].base to NULL. Then vmxnet3_rq_create() returns an error to its callers mxnet3_rq_create_all() -> vmxnet3_change_mtu(). Then vmxnet3_change_mtu() calls vmxnet3_force_close() -> dev_close() in error handling code. And the driver calls vmxnet3_close() -> vmxnet3_quiesce_dev() -> vmxnet3_rq_cleanup_all() -> vmxnet3_rq_cleanup(). In vmxnet3_rq_cleanup(), rq->rx_ring[ring_idx].base is accessed, but this variable is NULL, causing a NULL pointer dereference. To fix this possible bug, an if statement is added to check whether rq->rx_ring[0].base is NULL in vmxnet3_rq_cleanup() and exit early if so. The error log in our fault-injection testing is shown as follows: [ 65.220135] BUG: kernel NULL pointer dereference, address: 0000000000000008 ... [ 65.222633] RIP: 0010:vmxnet3_rq_cleanup_all+0x396/0x4e0 [vmxnet3] ... [ 65.227977] Call Trace: ... [ 65.228262] vmxnet3_quiesce_dev+0x80f/0x8a0 [vmxnet3] [ 65.228580] vmxnet3_close+0x2c4/0x3f0 [vmxnet3] [ 65.228866] __dev_close_many+0x288/0x350 [ 65.229607] dev_close_many+0xa4/0x480 [ 65.231124] dev_close+0x138/0x230 [ 65.231933] vmxnet3_force_close+0x1f0/0x240 [vmxnet3] [ 65.232248] vmxnet3_change_mtu+0x75d/0x920 [vmxnet3] ... Fixes: d1a890fa37f27 ("net: VMware virtual Ethernet NIC driver: vmxnet3") Reported-by: TOTE Robot Signed-off-by: Zixuan Fu Link: https://lore.kernel.org/r/20220514050711.2636709-1-r33s3n6@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/vmxnet3/vmxnet3_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 1154f18842128..93e8d119d45f6 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1668,6 +1668,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, u32 i, ring_idx; struct Vmxnet3_RxDesc *rxd; + /* ring has already been cleaned up */ + if (!rq->rx_ring[0].base) + return; + for (ring_idx = 0; ring_idx < 2; ring_idx++) { for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { #ifdef __BIG_ENDIAN_BITFIELD From 7c3e9fcad9c7d8bb5d69a576044fb16b1d2e8a01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Tue, 17 May 2022 09:27:08 +0200 Subject: [PATCH 411/491] dma-buf: fix use of DMA_BUF_SET_NAME_{A,B} in userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The typedefs u32 and u64 are not available in userspace. Thus user get an error he try to use DMA_BUF_SET_NAME_A or DMA_BUF_SET_NAME_B: $ gcc -Wall -c -MMD -c -o ioctls_list.o ioctls_list.c In file included from /usr/include/x86_64-linux-gnu/asm/ioctl.h:1, from /usr/include/linux/ioctl.h:5, from /usr/include/asm-generic/ioctls.h:5, from ioctls_list.c:11: ioctls_list.c:463:29: error: ‘u32’ undeclared here (not in a function) 463 | { "DMA_BUF_SET_NAME_A", DMA_BUF_SET_NAME_A, -1, -1 }, // linux/dma-buf.h | ^~~~~~~~~~~~~~~~~~ ioctls_list.c:464:29: error: ‘u64’ undeclared here (not in a function) 464 | { "DMA_BUF_SET_NAME_B", DMA_BUF_SET_NAME_B, -1, -1 }, // linux/dma-buf.h | ^~~~~~~~~~~~~~~~~~ The issue was initially reported here[1]. [1]: https://github.com/jerome-pouiller/ioctl/pull/14 Signed-off-by: Jérôme Pouiller Reviewed-by: Christian König Fixes: a5bff92eaac4 ("dma-buf: Fix SET_NAME ioctl uapi") CC: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20220517072708.245265-1-Jerome.Pouiller@silabs.com Signed-off-by: Christian König --- include/uapi/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 8e4a2ca0bcbf7..b1523cb8ab307 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -92,7 +92,7 @@ struct dma_buf_sync { * between them in actual uapi, they're just different numbers. */ #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) -#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) -#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) +#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) +#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) #endif From 19bef63f951e47dd4ba54810e6f7c7ff9344a3ef Mon Sep 17 00:00:00 2001 From: Prakruthi Deepak Heragu Date: Fri, 13 May 2022 10:46:54 -0700 Subject: [PATCH 412/491] arm64: paravirt: Use RCU read locks to guard stolen_time During hotplug, the stolen time data structure is unmapped and memset. There is a possibility of the timer IRQ being triggered before memset and stolen time is getting updated as part of this timer IRQ handler. This causes the below crash in timer handler - [ 3457.473139][ C5] Unable to handle kernel paging request at virtual address ffffffc03df05148 ... [ 3458.154398][ C5] Call trace: [ 3458.157648][ C5] para_steal_clock+0x30/0x50 [ 3458.162319][ C5] irqtime_account_process_tick+0x30/0x194 [ 3458.168148][ C5] account_process_tick+0x3c/0x280 [ 3458.173274][ C5] update_process_times+0x5c/0xf4 [ 3458.178311][ C5] tick_sched_timer+0x180/0x384 [ 3458.183164][ C5] __run_hrtimer+0x160/0x57c [ 3458.187744][ C5] hrtimer_interrupt+0x258/0x684 [ 3458.192698][ C5] arch_timer_handler_virt+0x5c/0xa0 [ 3458.198002][ C5] handle_percpu_devid_irq+0xdc/0x414 [ 3458.203385][ C5] handle_domain_irq+0xa8/0x168 [ 3458.208241][ C5] gic_handle_irq.34493+0x54/0x244 [ 3458.213359][ C5] call_on_irq_stack+0x40/0x70 [ 3458.218125][ C5] do_interrupt_handler+0x60/0x9c [ 3458.223156][ C5] el1_interrupt+0x34/0x64 [ 3458.227560][ C5] el1h_64_irq_handler+0x1c/0x2c [ 3458.232503][ C5] el1h_64_irq+0x7c/0x80 [ 3458.236736][ C5] free_vmap_area_noflush+0x108/0x39c [ 3458.242126][ C5] remove_vm_area+0xbc/0x118 [ 3458.246714][ C5] vm_remove_mappings+0x48/0x2a4 [ 3458.251656][ C5] __vunmap+0x154/0x278 [ 3458.255796][ C5] stolen_time_cpu_down_prepare+0xc0/0xd8 [ 3458.261542][ C5] cpuhp_invoke_callback+0x248/0xc34 [ 3458.266842][ C5] cpuhp_thread_fun+0x1c4/0x248 [ 3458.271696][ C5] smpboot_thread_fn+0x1b0/0x400 [ 3458.276638][ C5] kthread+0x17c/0x1e0 [ 3458.280691][ C5] ret_from_fork+0x10/0x20 As a fix, introduce rcu lock to update stolen time structure. Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online") Cc: stable@vger.kernel.org Suggested-by: Will Deacon Signed-off-by: Prakruthi Deepak Heragu Signed-off-by: Elliot Berman Reviewed-by: Srivatsa S. Bhat (VMware) Link: https://lore.kernel.org/r/20220513174654.362169-1-quic_eberman@quicinc.com Signed-off-by: Will Deacon --- arch/arm64/kernel/paravirt.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 75fed4460407d..57c7c211f8c71 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu) DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); struct pv_time_stolen_time_region { - struct pvclock_vcpu_stolen_time *kaddr; + struct pvclock_vcpu_stolen_time __rcu *kaddr; }; static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region); @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc); /* return stolen time in ns by asking the hypervisor */ static u64 para_steal_clock(int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; + u64 ret = 0; reg = per_cpu_ptr(&stolen_time_region, cpu); @@ -61,28 +63,37 @@ static u64 para_steal_clock(int cpu) * online notification callback runs. Until the callback * has run we just return zero. */ - if (!reg->kaddr) + rcu_read_lock(); + kaddr = rcu_dereference(reg->kaddr); + if (!kaddr) { + rcu_read_unlock(); return 0; + } - return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time)); + ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time)); + rcu_read_unlock(); + return ret; } static int stolen_time_cpu_down_prepare(unsigned int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; reg = this_cpu_ptr(&stolen_time_region); if (!reg->kaddr) return 0; - memunmap(reg->kaddr); - memset(reg, 0, sizeof(*reg)); + kaddr = rcu_replace_pointer(reg->kaddr, NULL, true); + synchronize_rcu(); + memunmap(kaddr); return 0; } static int stolen_time_cpu_online(unsigned int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; struct arm_smccc_res res; @@ -93,17 +104,19 @@ static int stolen_time_cpu_online(unsigned int cpu) if (res.a0 == SMCCC_RET_NOT_SUPPORTED) return -EINVAL; - reg->kaddr = memremap(res.a0, + kaddr = memremap(res.a0, sizeof(struct pvclock_vcpu_stolen_time), MEMREMAP_WB); + rcu_assign_pointer(reg->kaddr, kaddr); + if (!reg->kaddr) { pr_warn("Failed to map stolen time data structure\n"); return -ENOMEM; } - if (le32_to_cpu(reg->kaddr->revision) != 0 || - le32_to_cpu(reg->kaddr->attributes) != 0) { + if (le32_to_cpu(kaddr->revision) != 0 || + le32_to_cpu(kaddr->attributes) != 0) { pr_warn_once("Unexpected revision or attributes in stolen time data\n"); return -ENXIO; } From eb3d8ea3e1f03f4b0b72d8f5ed9eb7c3165862e8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 May 2022 17:07:35 +0100 Subject: [PATCH 413/491] arm64: kexec: load from kimage prior to clobbering In arm64_relocate_new_kernel() we load some fields out of the kimage structure after relocation has occurred. As the kimage structure isn't allocated to be relocation-safe, it may be clobbered during relocation, and we may load junk values out of the structure. Due to this, kexec may fail when the kimage allocation happens to fall within a PA range that an object will be relocated to. This has been observed to occur for regular kexec on a QEMU TCG 'virt' machine with 2GiB of RAM, where the PA range of the new kernel image overlaps the kimage structure. Avoid this by ensuring we load all values from the kimage structure prior to relocation. I've tested this atop v5.16 and v5.18-rc6. Fixes: 878fdbd70486 ("arm64: kexec: pass kimage as the only argument to relocation function") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Pasha Tatashin Cc: Will Deacon Reviewed-by: Pasha Tatashin Link: https://lore.kernel.org/r/20220516160735.731404-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/relocate_kernel.S | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index f0a3df9e18a32..413f899e4ac63 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -37,6 +37,15 @@ * safe memory that has been set up to be preserved during the copy operation. */ SYM_CODE_START(arm64_relocate_new_kernel) + /* + * The kimage structure isn't allocated specially and may be clobbered + * during relocation. We must load any values we need from it prior to + * any relocation occurring. + */ + ldr x28, [x0, #KIMAGE_START] + ldr x27, [x0, #KIMAGE_ARCH_EL2_VECTORS] + ldr x26, [x0, #KIMAGE_ARCH_DTB_MEM] + /* Setup the list loop variables. */ ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */ ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */ @@ -72,21 +81,20 @@ SYM_CODE_START(arm64_relocate_new_kernel) ic iallu dsb nsh isb - ldr x4, [x0, #KIMAGE_START] /* relocation start */ - ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */ - ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ turn_off_mmu x12, x13 /* Start new image. */ - cbz x1, .Lel1 - mov x1, x4 /* relocation start */ - mov x2, x0 /* dtb address */ + cbz x27, .Lel1 + mov x1, x28 /* kernel entry point */ + mov x2, x26 /* dtb address */ mov x3, xzr mov x4, xzr mov x0, #HVC_SOFT_RESTART hvc #0 /* Jumps from el2 */ .Lel1: + mov x0, x26 /* dtb address */ + mov x1, xzr mov x2, xzr mov x3, xzr - br x4 /* Jumps from el1 */ + br x28 /* Jumps from el1 */ SYM_CODE_END(arm64_relocate_new_kernel) From 1d0cb4c8864addc362bae98e8ffa5500c87e1227 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 17 May 2022 10:35:32 +0100 Subject: [PATCH 414/491] arm64: mte: Ensure the cleared tags are visible before setting the PTE As an optimisation, only pages mapped with PROT_MTE in user space have the MTE tags zeroed. This is done lazily at the set_pte_at() time via mte_sync_tags(). However, this function is missing a barrier and another CPU may see the PTE updated before the zeroed tags are visible. Add an smp_wmb() barrier if the mapping is Normal Tagged. Signed-off-by: Catalin Marinas Fixes: 34bfeea4a9e9 ("arm64: mte: Clear the tags when a page is mapped in user-space with PROT_MTE") Cc: # 5.10.x Reported-by: Vladimir Murzin Cc: Will Deacon Reviewed-by: Steven Price Tested-by: Vladimir Murzin Link: https://lore.kernel.org/r/20220517093532.127095-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/mte.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 78b3e0f8e997c..d502703e8373a 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -76,6 +76,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) mte_sync_page_tags(page, old_pte, check_swap, pte_is_tagged); } + + /* ensure the tags are visible before the PTE is set */ + smp_wmb(); } int memcmp_pages(struct page *page1, struct page *page2) From 4503cc7fdf9a84cd631b0cb8ecb3c9b1bdbf3594 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Thu, 28 Apr 2022 10:33:50 +0200 Subject: [PATCH 415/491] ice: fix crash when writing timestamp on RX rings Do not allow to write timestamps on RX rings if PF is being configured. When PF is being configured RX rings can be freed or rebuilt. If at the same time timestamps are updated, the kernel will crash by dereferencing null RX ring pointer. PID: 1449 TASK: ff187d28ed658040 CPU: 34 COMMAND: "ice-ptp-0000:51" #0 [ff1966a94a713bb0] machine_kexec at ffffffff9d05a0be #1 [ff1966a94a713c08] __crash_kexec at ffffffff9d192e9d #2 [ff1966a94a713cd0] crash_kexec at ffffffff9d1941bd #3 [ff1966a94a713ce8] oops_end at ffffffff9d01bd54 #4 [ff1966a94a713d08] no_context at ffffffff9d06bda4 #5 [ff1966a94a713d60] __bad_area_nosemaphore at ffffffff9d06c10c #6 [ff1966a94a713da8] do_page_fault at ffffffff9d06cae4 #7 [ff1966a94a713de0] page_fault at ffffffff9da0107e [exception RIP: ice_ptp_update_cached_phctime+91] RIP: ffffffffc076db8b RSP: ff1966a94a713e98 RFLAGS: 00010246 RAX: 16e3db9c6b7ccae4 RBX: ff187d269dd3c180 RCX: ff187d269cd4d018 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ff187d269cfcc644 R8: ff187d339b9641b0 R9: 0000000000000000 R10: 0000000000000002 R11: 0000000000000000 R12: ff187d269cfcc648 R13: ffffffff9f128784 R14: ffffffff9d101b70 R15: ff187d269cfcc640 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #8 [ff1966a94a713ea0] ice_ptp_periodic_work at ffffffffc076dbef [ice] #9 [ff1966a94a713ee0] kthread_worker_fn at ffffffff9d101c1b #10 [ff1966a94a713f10] kthread at ffffffff9d101b4d #11 [ff1966a94a713f50] ret_from_fork at ffffffff9da0023f Fixes: 77a781155a65 ("ice: enable receive hardware timestamping") Signed-off-by: Arkadiusz Kubalewski Reviewed-by: Michal Schmidt Tested-by: Dave Cain Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index da025c204577f..662947c882e8b 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -500,12 +500,19 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts) * This function must be called periodically to ensure that the cached value * is never more than 2 seconds old. It must also be called whenever the PHC * time has been changed. + * + * Return: + * * 0 - OK, successfully updated + * * -EAGAIN - PF was busy, need to reschedule the update */ -static void ice_ptp_update_cached_phctime(struct ice_pf *pf) +static int ice_ptp_update_cached_phctime(struct ice_pf *pf) { u64 systime; int i; + if (test_and_set_bit(ICE_CFG_BUSY, pf->state)) + return -EAGAIN; + /* Read the current PHC time */ systime = ice_ptp_read_src_clk_reg(pf, NULL); @@ -528,6 +535,9 @@ static void ice_ptp_update_cached_phctime(struct ice_pf *pf) WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime); } } + clear_bit(ICE_CFG_BUSY, pf->state); + + return 0; } /** @@ -2330,17 +2340,18 @@ static void ice_ptp_periodic_work(struct kthread_work *work) { struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work); struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp); + int err; if (!test_bit(ICE_FLAG_PTP, pf->flags)) return; - ice_ptp_update_cached_phctime(pf); + err = ice_ptp_update_cached_phctime(pf); ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx); - /* Run twice a second */ + /* Run twice a second or reschedule if phc update failed */ kthread_queue_delayed_work(ptp->kworker, &ptp->work, - msecs_to_jiffies(500)); + msecs_to_jiffies(err ? 10 : 500)); } /** From 31b6298fd8e29effe9ed6b77351ac5969be56ce0 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Thu, 28 Apr 2022 14:11:42 -0700 Subject: [PATCH 416/491] ice: fix possible under reporting of ethtool Tx and Rx statistics The hardware statistics counters are not cleared during resets so the drivers first access is to initialize the baseline and then subsequent reads are for reporting the counters. The statistics counters are read during the watchdog subtask when the interface is up. If the baseline is not initialized before the interface is up, then there can be a brief window in which some traffic can be transmitted/received before the initial baseline reading takes place. Directly initialize ethtool statistics in driver open so the baseline will be initialized when the interface is up, and any dropped packets incremented before the interface is up won't be reported. Fixes: 28dc1b86f8ea9 ("ice: ignore dropped packets during init") Signed-off-by: Paul Greenwalt Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 949669fed7d60..963a5f40e071b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6172,9 +6172,10 @@ static int ice_up_complete(struct ice_vsi *vsi) ice_ptp_link_change(pf, pf->hw.pf_id, true); } - /* clear this now, and the first stats read will be used as baseline */ - vsi->stat_offsets_loaded = false; - + /* Perform an initial read of the statistics registers now to + * set the baseline so counters are ready when interface is up + */ + ice_update_eth_stats(vsi); ice_service_task_schedule(pf); return 0; From bf13502ed5f941b0777b3fd1e24dac5d93f3886c Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Sun, 8 May 2022 19:33:48 -0400 Subject: [PATCH 417/491] ice: Fix interrupt moderation settings getting cleared Adaptive-rx and Adaptive-tx are interrupt moderation settings that can be enabled/disabled using ethtool: ethtool -C ethX adaptive-rx on/off adaptive-tx on/off Unfortunately those settings are getting cleared after changing number of queues, or in ethtool world 'channels': ethtool -L ethX rx 1 tx 1 Clearing was happening due to introduction of bit fields in ice_ring_container struct. This way only itr_setting bits were rebuilt during ice_vsi_rebuild_set_coalesce(). Introduce an anonymous struct of bitfields and create a union to refer to them as a single variable. This way variable can be easily saved and restored. Fixes: 61dc79ced7aa ("ice: Restore interrupt throttle settings after VSI rebuild") Signed-off-by: Michal Wilczynski Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 16 ++++++++-------- drivers/net/ethernet/intel/ice/ice_txrx.h | 11 ++++++++--- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 6d19c58ccacd4..454e01ae09b97 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3043,8 +3043,8 @@ ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi, ice_for_each_q_vector(vsi, i) { struct ice_q_vector *q_vector = vsi->q_vectors[i]; - coalesce[i].itr_tx = q_vector->tx.itr_setting; - coalesce[i].itr_rx = q_vector->rx.itr_setting; + coalesce[i].itr_tx = q_vector->tx.itr_settings; + coalesce[i].itr_rx = q_vector->rx.itr_settings; coalesce[i].intrl = q_vector->intrl; if (i < vsi->num_txq) @@ -3100,21 +3100,21 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, */ if (i < vsi->alloc_rxq && coalesce[i].rx_valid) { rc = &vsi->q_vectors[i]->rx; - rc->itr_setting = coalesce[i].itr_rx; + rc->itr_settings = coalesce[i].itr_rx; ice_write_itr(rc, rc->itr_setting); } else if (i < vsi->alloc_rxq) { rc = &vsi->q_vectors[i]->rx; - rc->itr_setting = coalesce[0].itr_rx; + rc->itr_settings = coalesce[0].itr_rx; ice_write_itr(rc, rc->itr_setting); } if (i < vsi->alloc_txq && coalesce[i].tx_valid) { rc = &vsi->q_vectors[i]->tx; - rc->itr_setting = coalesce[i].itr_tx; + rc->itr_settings = coalesce[i].itr_tx; ice_write_itr(rc, rc->itr_setting); } else if (i < vsi->alloc_txq) { rc = &vsi->q_vectors[i]->tx; - rc->itr_setting = coalesce[0].itr_tx; + rc->itr_settings = coalesce[0].itr_tx; ice_write_itr(rc, rc->itr_setting); } @@ -3128,12 +3128,12 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, for (; i < vsi->num_q_vectors; i++) { /* transmit */ rc = &vsi->q_vectors[i]->tx; - rc->itr_setting = coalesce[0].itr_tx; + rc->itr_settings = coalesce[0].itr_tx; ice_write_itr(rc, rc->itr_setting); /* receive */ rc = &vsi->q_vectors[i]->rx; - rc->itr_setting = coalesce[0].itr_rx; + rc->itr_settings = coalesce[0].itr_rx; ice_write_itr(rc, rc->itr_setting); vsi->q_vectors[i]->intrl = coalesce[0].intrl; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index cead3eb149bd5..ffb3f6a589da4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -384,9 +384,14 @@ struct ice_ring_container { /* this matches the maximum number of ITR bits, but in usec * values, so it is shifted left one bit (bit zero is ignored) */ - u16 itr_setting:13; - u16 itr_reserved:2; - u16 itr_mode:1; + union { + struct { + u16 itr_setting:13; + u16 itr_reserved:2; + u16 itr_mode:1; + }; + u16 itr_settings; + }; enum ice_container_type type; }; From aa184e8671f0f911fc2fb3f68cd506e4d7838faa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 May 2022 12:32:05 -0600 Subject: [PATCH 418/491] io_uring: don't attempt to IOPOLL for MSG_RING requests We gate whether to IOPOLL for a request on whether the opcode is allowed on a ring setup for IOPOLL and if it's got a file assigned. MSG_RING is the only one that allows a file yet isn't pollable, it's merely supported to allow communication on an IOPOLL ring, not because we can poll for completion of it. Put the assigned file early and clear it, so we don't attempt to poll for it. Reported-by: syzbot+1a0a53300ce782f8b3ad@syzkaller.appspotmail.com Fixes: 3f1d52abf098 ("io_uring: defer msg-ring file validity check until command issue") Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 91de361ea9aba..e0823f58f7959 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4481,6 +4481,9 @@ static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) if (ret < 0) req_set_fail(req); __io_req_complete(req, issue_flags, ret, 0); + /* put file to avoid an attempt to IOPOLL the req */ + io_put_file(req->file); + req->file = NULL; return 0; } From 69e9cd66ae1392437234a63a3a1d60b6655f92ef Mon Sep 17 00:00:00 2001 From: Julian Orth Date: Tue, 17 May 2022 12:32:53 +0200 Subject: [PATCH 419/491] audit,io_uring,io-wq: call __audit_uring_exit for dummy contexts Not calling the function for dummy contexts will cause the context to not be reset. During the next syscall, this will cause an error in __audit_syscall_entry: WARN_ON(context->context != AUDIT_CTX_UNUSED); WARN_ON(context->name_count); if (context->context != AUDIT_CTX_UNUSED || context->name_count) { audit_panic("unrecoverable error in audit_syscall_entry()"); return; } These problematic dummy contexts are created via the following call chain: exit_to_user_mode_prepare -> arch_do_signal_or_restart -> get_signal -> task_work_run -> tctx_task_work -> io_req_task_submit -> io_issue_sqe -> audit_uring_entry Cc: stable@vger.kernel.org Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring") Signed-off-by: Julian Orth [PM: subject line tweaks] Signed-off-by: Paul Moore --- include/linux/audit.h | 2 +- kernel/auditsc.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/audit.h b/include/linux/audit.h index d06134ac6245f..cece702311388 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -339,7 +339,7 @@ static inline void audit_uring_entry(u8 op) } static inline void audit_uring_exit(int success, long code) { - if (unlikely(!audit_dummy_context())) + if (unlikely(audit_context())) __audit_uring_exit(success, code); } static inline void audit_syscall_entry(int major, unsigned long a0, diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ea2ee1181921e..f3a2abd6d1a19 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1959,6 +1959,12 @@ void __audit_uring_exit(int success, long code) { struct audit_context *ctx = audit_context(); + if (ctx->dummy) { + if (ctx->context != AUDIT_CTX_URING) + return; + goto out; + } + if (ctx->context == AUDIT_CTX_SYSCALL) { /* * NOTE: See the note in __audit_uring_entry() about the case From d0031e6fbed955ff8d5f5bbc8fe7382482559cec Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Wed, 13 Apr 2022 10:13:18 +0300 Subject: [PATCH 420/491] clk: at91: generated: consider range when calculating best rate clk_generated_best_diff() helps in finding the parent and the divisor to compute a rate closest to the required one. However, it doesn't take into account the request's range for the new rate. Make sure the new rate is within the required range. Fixes: 8a8f4bf0c480 ("clk: at91: clk-generated: create function to find best_diff") Signed-off-by: Codrin Ciubotariu Link: https://lore.kernel.org/r/20220413071318.244912-1-codrin.ciubotariu@microchip.com Reviewed-by: Claudiu Beznea Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-generated.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index 23cc8297ec4c0..d429ba52a7190 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -117,6 +117,10 @@ static void clk_generated_best_diff(struct clk_rate_request *req, tmp_rate = parent_rate; else tmp_rate = parent_rate / div; + + if (tmp_rate < req->min_rate || tmp_rate > req->max_rate) + return; + tmp_diff = abs(req->rate - tmp_rate); if (*best_diff < 0 || *best_diff >= tmp_diff) { From 67c35a3b646cc68598ff0bb28de5f8bd7b2e81b3 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 16 May 2022 15:14:47 +0000 Subject: [PATCH 421/491] parisc: Disable debug code regarding cache flushes in handle_nadtlb_fault() Change the "BUG" to "WARNING" and disable the message because it triggers occasionally in spite of the check in flush_cache_page_if_present. The pte value extracted for the "from" page in copy_user_highpage is racy and occasionally the pte is cleared before the flush is complete. I assume that the page is simultaneously flushed by flush_cache_mm before the pte is cleared as nullifying the fdc doesn't seem to cause problems. I investigated various locking scenarios but I wasn't able to find a way to sequence the flushes. This code is called for every COW break and locks impact performance. This patch is related to the bigger cache flush patch because we need the pte on PA8800/PA8900 to flush using the vma context. I have also seen this from copy_to_user_page and copy_from_user_page. The messages appear infrequently when enabled. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/mm/fault.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index f114e102aaf21..84bc437be5cd1 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -22,6 +22,8 @@ #include +#define DEBUG_NATLB 0 + /* Various important other fields */ #define bit22set(x) (x & 0x00000200) #define bits23_25set(x) (x & 0x000001c0) @@ -450,8 +452,8 @@ handle_nadtlb_fault(struct pt_regs *regs) fallthrough; case 0x380: /* PDC and FIC instructions */ - if (printk_ratelimit()) { - pr_warn("BUG: nullifying cache flush/purge instruction\n"); + if (DEBUG_NATLB && printk_ratelimit()) { + pr_warn("WARNING: nullifying cache flush/purge instruction\n"); show_regs(regs); } if (insn & 0x20) { From 2de8b4cc2051ee1d40eedbcf94de0e7d04507c37 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 16 May 2022 15:14:47 +0000 Subject: [PATCH 422/491] parisc: Rewrite cache flush code for PA8800/PA8900 Originally, I was convinced that we needed to use tmpalias flushes everwhere, for both user and kernel flushes. However, when I modified flush_kernel_dcache_page_addr, to use a tmpalias flush, my c8000 would crash quite early when booting. The PDC returns alias values of 0 for the icache and dcache. This indicates that either the alias boundary is greater than 16MB or equivalent aliasing doesn't work. I modified the tmpalias code to make it easy to try alternate boundaries. I tried boundaries up to 128MB but still kernel tmpalias flushes didn't work on c8000. This led me to conclude that tmpalias flushes don't work on PA8800 and PA8900 machines, and that we needed to flush directly using the virtual address of user and kernel pages. This is likely the major cause of instability on the c8000 and rp34xx machines. Flushing user pages requires doing a temporary context switch as we have to flush pages that don't belong to the current context. Further, we have to deal with pages that aren't present. If a page isn't present, the flush instructions fault on every line. Other code has been rearranged and simplified based on testing. For example, I introduced a flush_cache_dup_mm routine. flush_cache_mm and flush_cache_dup_mm differ in that flush_cache_mm calls purge_cache_pages and flush_cache_dup_mm calls flush_cache_pages. In some implementations, pdc is more efficient than fdc. Based on my testing, I don't believe there's any performance benefit on the c8000. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/include/asm/cacheflush.h | 31 +-- arch/parisc/include/asm/page.h | 6 +- arch/parisc/kernel/cache.c | 326 ++++++++++++++++++--------- 3 files changed, 236 insertions(+), 127 deletions(-) diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index e8b4a03343d39..8d03b3b26229e 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -59,20 +59,12 @@ void flush_dcache_page(struct page *page); flush_kernel_icache_range_asm(s,e); \ } while (0) -#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ -do { \ - flush_cache_page(vma, vaddr, page_to_pfn(page)); \ - memcpy(dst, src, len); \ - flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); \ -} while (0) - -#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ -do { \ - flush_cache_page(vma, vaddr, page_to_pfn(page)); \ - memcpy(dst, src, len); \ -} while (0) - -void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn); +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long user_vaddr, void *dst, void *src, int len); +void copy_from_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long user_vaddr, void *dst, void *src, int len); +void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, + unsigned long pfn); void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -80,16 +72,7 @@ void flush_cache_range(struct vm_area_struct *vma, void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); #define ARCH_HAS_FLUSH_ANON_PAGE -static inline void -flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) -{ - if (PageAnon(page)) { - flush_tlb_page(vma, vmaddr); - preempt_disable(); - flush_dcache_page_asm(page_to_phys(page), vmaddr); - preempt_enable(); - } -} +void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr); #define ARCH_HAS_FLUSH_ON_KUNMAP static inline void kunmap_flush_on_unmap(void *addr) diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 0561568f7b489..6faaaa3ebe9b8 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -26,12 +26,14 @@ #define copy_page(to, from) copy_page_asm((void *)(to), (void *)(from)) struct page; +struct vm_area_struct; void clear_page_asm(void *page); void copy_page_asm(void *to, void *from); #define clear_user_page(vto, vaddr, page) clear_page_asm(vto) -void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, - struct page *pg); +void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, + struct vm_area_struct *vma); +#define __HAVE_ARCH_COPY_USER_HIGHPAGE /* * These are used to make use of C type-checking.. diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index e7911225a4f89..0fd04073d4b68 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -27,6 +27,7 @@ #include #include #include +#include int split_tlb __ro_after_init; int dcache_stride __ro_after_init; @@ -91,7 +92,7 @@ static inline void flush_data_cache(void) } -/* Virtual address of pfn. */ +/* Kernel virtual address of pfn. */ #define pfn_va(pfn) __va(PFN_PHYS(pfn)) void @@ -124,11 +125,13 @@ show_cache_info(struct seq_file *m) cache_info.ic_size/1024 ); if (cache_info.dc_loop != 1) snprintf(buf, 32, "%lu-way associative", cache_info.dc_loop); - seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s)\n", + seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s, alias=%d)\n", cache_info.dc_size/1024, (cache_info.dc_conf.cc_wt ? "WT":"WB"), (cache_info.dc_conf.cc_sh ? ", shared I/D":""), - ((cache_info.dc_loop == 1) ? "direct mapped" : buf)); + ((cache_info.dc_loop == 1) ? "direct mapped" : buf), + cache_info.dc_conf.cc_alias + ); seq_printf(m, "ITLB entries\t: %ld\n" "DTLB entries\t: %ld%s\n", cache_info.it_size, cache_info.dt_size, @@ -324,25 +327,81 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, preempt_enable(); } -static inline void -__purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, - unsigned long physaddr) +static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr) { - if (!static_branch_likely(&parisc_has_cache)) - return; + unsigned long flags, space, pgd, prot; +#ifdef CONFIG_TLB_PTLOCK + unsigned long pgd_lock; +#endif + + vmaddr &= PAGE_MASK; + preempt_disable(); - purge_dcache_page_asm(physaddr, vmaddr); + + /* Set context for flush */ + local_irq_save(flags); + prot = mfctl(8); + space = mfsp(SR_USER); + pgd = mfctl(25); +#ifdef CONFIG_TLB_PTLOCK + pgd_lock = mfctl(28); +#endif + switch_mm_irqs_off(NULL, vma->vm_mm, NULL); + local_irq_restore(flags); + + flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE); if (vma->vm_flags & VM_EXEC) - flush_icache_page_asm(physaddr, vmaddr); + flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE); + flush_tlb_page(vma, vmaddr); + + /* Restore previous context */ + local_irq_save(flags); +#ifdef CONFIG_TLB_PTLOCK + mtctl(pgd_lock, 28); +#endif + mtctl(pgd, 25); + mtsp(space, SR_USER); + mtctl(prot, 8); + local_irq_restore(flags); + preempt_enable(); } +static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr) +{ + pte_t *ptep = NULL; + pgd_t *pgd = mm->pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + if (!pgd_none(*pgd)) { + p4d = p4d_offset(pgd, addr); + if (!p4d_none(*p4d)) { + pud = pud_offset(p4d, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) + ptep = pte_offset_map(pmd, addr); + } + } + } + return ptep; +} + +static inline bool pte_needs_flush(pte_t pte) +{ + return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_NO_CACHE)) + == (_PAGE_PRESENT | _PAGE_ACCESSED); +} + void flush_dcache_page(struct page *page) { struct address_space *mapping = page_mapping_file(page); struct vm_area_struct *mpnt; unsigned long offset; unsigned long addr, old_addr = 0; + unsigned long count = 0; pgoff_t pgoff; if (mapping && !mapping_mapped(mapping)) { @@ -357,33 +416,52 @@ void flush_dcache_page(struct page *page) pgoff = page->index; - /* We have carefully arranged in arch_get_unmapped_area() that + /* + * We have carefully arranged in arch_get_unmapped_area() that * *any* mappings of a file are always congruently mapped (whether * declared as MAP_PRIVATE or MAP_SHARED), so we only need - * to flush one address here for them all to become coherent */ - + * to flush one address here for them all to become coherent + * on machines that support equivalent aliasing + */ flush_dcache_mmap_lock(mapping); vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; addr = mpnt->vm_start + offset; + if (parisc_requires_coherency()) { + pte_t *ptep; - /* The TLB is the engine of coherence on parisc: The - * CPU is entitled to speculate any page with a TLB - * mapping, so here we kill the mapping then flush the - * page along a special flush only alias mapping. - * This guarantees that the page is no-longer in the - * cache for any process and nor may it be - * speculatively read in (until the user or kernel - * specifically accesses it, of course) */ - - flush_tlb_page(mpnt, addr); - if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) - != (addr & (SHM_COLOUR - 1))) { - __flush_cache_page(mpnt, addr, page_to_phys(page)); - if (parisc_requires_coherency() && old_addr) - printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", old_addr, addr, mpnt->vm_file); - old_addr = addr; + ptep = get_ptep(mpnt->vm_mm, addr); + if (ptep && pte_needs_flush(*ptep)) + flush_user_cache_page(mpnt, addr); + } else { + /* + * The TLB is the engine of coherence on parisc: + * The CPU is entitled to speculate any page + * with a TLB mapping, so here we kill the + * mapping then flush the page along a special + * flush only alias mapping. This guarantees that + * the page is no-longer in the cache for any + * process and nor may it be speculatively read + * in (until the user or kernel specifically + * accesses it, of course) + */ + flush_tlb_page(mpnt, addr); + if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) + != (addr & (SHM_COLOUR - 1))) { + __flush_cache_page(mpnt, addr, page_to_phys(page)); + /* + * Software is allowed to have any number + * of private mappings to a page. + */ + if (!(mpnt->vm_flags & VM_SHARED)) + continue; + if (old_addr) + pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", + old_addr, addr, mpnt->vm_file); + old_addr = addr; + } } + WARN_ON(++count == 4096); } flush_dcache_mmap_unlock(mapping); } @@ -403,7 +481,7 @@ void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; unsigned long size; - unsigned long threshold; + unsigned long threshold, threshold2; alltime = mfctl(16); flush_data_cache(); @@ -417,11 +495,16 @@ void __init parisc_setup_cache_timing(void) printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n", alltime, size, rangetime); - threshold = L1_CACHE_ALIGN(size * alltime / rangetime); - if (threshold > cache_info.dc_size) - threshold = cache_info.dc_size; - if (threshold) - parisc_cache_flush_threshold = threshold; + threshold = L1_CACHE_ALIGN((unsigned long)((uint64_t)size * alltime / rangetime)); + pr_info("Calculated flush threshold is %lu KiB\n", + threshold/1024); + + /* + * The threshold computed above isn't very reliable. The following + * heuristic works reasonably well on c8000/rp3440. + */ + threshold2 = cache_info.dc_size * num_online_cpus(); + parisc_cache_flush_threshold = threshold2; printk(KERN_INFO "Cache flush threshold set to %lu KiB\n", parisc_cache_flush_threshold/1024); @@ -477,19 +560,47 @@ void flush_kernel_dcache_page_addr(void *addr) } EXPORT_SYMBOL(flush_kernel_dcache_page_addr); -void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, - struct page *pg) +static void flush_cache_page_if_present(struct vm_area_struct *vma, + unsigned long vmaddr, unsigned long pfn) { - /* Copy using kernel mapping. No coherency is needed (all in - kunmap) for the `to' page. However, the `from' page needs to - be flushed through a mapping equivalent to the user mapping - before it can be accessed through the kernel mapping. */ - preempt_disable(); - flush_dcache_page_asm(__pa(vfrom), vaddr); - copy_page_asm(vto, vfrom); - preempt_enable(); + pte_t *ptep = get_ptep(vma->vm_mm, vmaddr); + + /* + * The pte check is racy and sometimes the flush will trigger + * a non-access TLB miss. Hopefully, the page has already been + * flushed. + */ + if (ptep && pte_needs_flush(*ptep)) + flush_cache_page(vma, vmaddr, pfn); +} + +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kfrom = kmap_local_page(from); + kto = kmap_local_page(to); + flush_cache_page_if_present(vma, vaddr, page_to_pfn(from)); + copy_page_asm(kto, kfrom); + kunmap_local(kto); + kunmap_local(kfrom); +} + +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long user_vaddr, void *dst, void *src, int len) +{ + flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page)); + memcpy(dst, src, len); + flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); +} + +void copy_from_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long user_vaddr, void *dst, void *src, int len) +{ + flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page)); + memcpy(dst, src, len); } -EXPORT_SYMBOL(copy_user_page); /* __flush_tlb_range() * @@ -520,92 +631,105 @@ int __flush_tlb_range(unsigned long sid, unsigned long start, return 0; } -static inline unsigned long mm_total_size(struct mm_struct *mm) +static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - struct vm_area_struct *vma; - unsigned long usize = 0; - - for (vma = mm->mmap; vma; vma = vma->vm_next) - usize += vma->vm_end - vma->vm_start; - return usize; -} - -static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr) -{ - pte_t *ptep = NULL; + unsigned long addr, pfn; + pte_t *ptep; - if (!pgd_none(*pgd)) { - p4d_t *p4d = p4d_offset(pgd, addr); - if (!p4d_none(*p4d)) { - pud_t *pud = pud_offset(p4d, addr); - if (!pud_none(*pud)) { - pmd_t *pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) - ptep = pte_offset_map(pmd, addr); + for (addr = start; addr < end; addr += PAGE_SIZE) { + /* + * The vma can contain pages that aren't present. Although + * the pte search is expensive, we need the pte to find the + * page pfn and to check whether the page should be flushed. + */ + ptep = get_ptep(vma->vm_mm, addr); + if (ptep && pte_needs_flush(*ptep)) { + if (parisc_requires_coherency()) { + flush_user_cache_page(vma, addr); + } else { + pfn = pte_pfn(*ptep); + if (WARN_ON(!pfn_valid(pfn))) + return; + __flush_cache_page(vma, addr, PFN_PHYS(pfn)); } } } - return ptep; } -static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline unsigned long mm_total_size(struct mm_struct *mm) { - unsigned long addr, pfn; - pte_t *ptep; + struct vm_area_struct *vma; + unsigned long usize = 0; - for (addr = start; addr < end; addr += PAGE_SIZE) { - ptep = get_ptep(mm->pgd, addr); - if (ptep) { - pfn = pte_pfn(*ptep); - flush_cache_page(vma, addr, pfn); - } - } + for (vma = mm->mmap; vma && usize < parisc_cache_flush_threshold; vma = vma->vm_next) + usize += vma->vm_end - vma->vm_start; + return usize; } void flush_cache_mm(struct mm_struct *mm) { struct vm_area_struct *vma; - /* Flushing the whole cache on each cpu takes forever on - rp3440, etc. So, avoid it if the mm isn't too big. */ - if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && - mm_total_size(mm) >= parisc_cache_flush_threshold) { - if (mm->context.space_id) - flush_tlb_all(); + /* + * Flushing the whole cache on each cpu takes forever on + * rp3440, etc. So, avoid it if the mm isn't too big. + * + * Note that we must flush the entire cache on machines + * with aliasing caches to prevent random segmentation + * faults. + */ + if (!parisc_requires_coherency() + || mm_total_size(mm) >= parisc_cache_flush_threshold) { + if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) + return; + flush_tlb_all(); flush_cache_all(); return; } + /* Flush mm */ for (vma = mm->mmap; vma; vma = vma->vm_next) - flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end); + flush_cache_pages(vma, vma->vm_start, vma->vm_end); } -void flush_cache_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && - end - start >= parisc_cache_flush_threshold) { - if (vma->vm_mm->context.space_id) - flush_tlb_range(vma, start, end); + if (!parisc_requires_coherency() + || end - start >= parisc_cache_flush_threshold) { + if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) + return; + flush_tlb_range(vma, start, end); flush_cache_all(); return; } - flush_cache_pages(vma, vma->vm_mm, start, end); + flush_cache_pages(vma, start, end); } -void -flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) +void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { - if (pfn_valid(pfn)) { - if (likely(vma->vm_mm->context.space_id)) { - flush_tlb_page(vma, vmaddr); - __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); - } else { - __purge_cache_page(vma, vmaddr, PFN_PHYS(pfn)); - } + if (WARN_ON(!pfn_valid(pfn))) + return; + if (parisc_requires_coherency()) + flush_user_cache_page(vma, vmaddr); + else + __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); +} + +void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) +{ + if (!PageAnon(page)) + return; + + if (parisc_requires_coherency()) { + flush_user_cache_page(vma, vmaddr); + return; } + + flush_tlb_page(vma, vmaddr); + preempt_disable(); + flush_dcache_page_asm(page_to_phys(page), vmaddr); + preempt_enable(); } void flush_kernel_vmap_range(void *vaddr, int size) From 798082be69fea995a475ca1db8f9873589e207d9 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 16 May 2022 15:32:00 +0000 Subject: [PATCH 423/491] parisc: Fix patch code locking and flushing This change fixes the following: 1) The flags variable is not initialized. Always use raw_spin_lock_irqsave and raw_spin_unlock_irqrestore to serialize patching. 2) flush_kernel_vmap_range is primarily intended for DMA flushes. The whole cache flush in flush_kernel_vmap_range is only possible when interrupts are enabled on SMP machines. Since __patch_text_multiple calls flush_kernel_vmap_range with interrupts disabled, it is better to directly call flush_kernel_dcache_range_asm and flush_kernel_icache_range_asm. 3) The final call to flush_icache_range is unnecessary. Tested with `[PATCH, V3] parisc: Rewrite cache flush code for PA8800/PA8900' change on rp3440, c8000 and c3750 (32 and 64-bit). Note by Helge: This patch had been temporarily reverted shortly before v5.18-rc6 in order to fix boot issues. Now it can be re-applied. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/patch.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c index 80a0ab372802d..e59574f65e641 100644 --- a/arch/parisc/kernel/patch.c +++ b/arch/parisc/kernel/patch.c @@ -40,10 +40,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags, *need_unmap = 1; set_fixmap(fixmap, page_to_phys(page)); - if (flags) - raw_spin_lock_irqsave(&patch_lock, *flags); - else - __acquire(&patch_lock); + raw_spin_lock_irqsave(&patch_lock, *flags); return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); } @@ -52,10 +49,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { clear_fixmap(fixmap); - if (flags) - raw_spin_unlock_irqrestore(&patch_lock, *flags); - else - __release(&patch_lock); + raw_spin_unlock_irqrestore(&patch_lock, *flags); } void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) @@ -67,8 +61,9 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) int mapped; /* Make sure we don't have any aliases in cache */ - flush_kernel_vmap_range(addr, len); - flush_icache_range(start, end); + flush_kernel_dcache_range_asm(start, end); + flush_kernel_icache_range_asm(start, end); + flush_tlb_kernel_range(start, end); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped); @@ -81,8 +76,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) * We're crossing a page boundary, so * need to remap */ - flush_kernel_vmap_range((void *)fixmap, - (p-fixmap) * sizeof(*p)); + flush_kernel_dcache_range_asm((unsigned long)fixmap, + (unsigned long)p); + flush_tlb_kernel_range((unsigned long)fixmap, + (unsigned long)p); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, @@ -90,10 +87,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) } } - flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p)); + flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p); + flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); - flush_icache_range(start, end); } void __kprobes __patch_text(void *addr, u32 insn) From 6e03b13cc7d9427c2c77feed1549191015615202 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Mon, 16 May 2022 11:20:42 +0800 Subject: [PATCH 424/491] drm/dp/mst: fix a possible memory leak in fetch_monitor_name() drm_dp_mst_get_edid call kmemdup to create mst_edid. So mst_edid need to be freed after use. Signed-off-by: Hangyu Hua Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20220516032042.13166-1-hbh25y@gmail.com --- drivers/gpu/drm/dp/drm_dp_mst_topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/dp/drm_dp_mst_topology.c b/drivers/gpu/drm/dp/drm_dp_mst_topology.c index 11300b53d24fc..7a7cc44686f97 100644 --- a/drivers/gpu/drm/dp/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/dp/drm_dp_mst_topology.c @@ -4852,6 +4852,7 @@ static void fetch_monitor_name(struct drm_dp_mst_topology_mgr *mgr, mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port); drm_edid_get_monitor_name(mst_edid, name, namelen); + kfree(mst_edid); } /** From 6254bd3db316c9ccb3b05caa8b438be63245466f Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 17 May 2022 14:08:16 +0200 Subject: [PATCH 425/491] selinux: fix bad cleanup on error in hashtab_duplicate() The code attempts to free the 'new' pointer using kmem_cache_free(), which is wrong because this function isn't responsible of freeing it. Instead, the function should free new->htable and clear the contents of *new (to prevent double-free). Cc: stable@vger.kernel.org Fixes: c7c556f1e81b ("selinux: refactor changing booleans") Reported-by: Wander Lairson Costa Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore --- security/selinux/ss/hashtab.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index 0ae4e4e57a401..3fb8f9026e9be 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -179,7 +179,8 @@ int hashtab_duplicate(struct hashtab *new, struct hashtab *orig, kmem_cache_free(hashtab_node_cachep, cur); } } - kmem_cache_free(hashtab_node_cachep, new); + kfree(new->htable); + memset(new, 0, sizeof(*new)); return -ENOMEM; } From 5361448e45fac6fb96738df748229432a62d78b6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 15 May 2022 20:07:02 +0200 Subject: [PATCH 426/491] net/qla3xxx: Fix a test in ql_reset_work() test_bit() tests if one bit is set or not. Here the logic seems to check of bit QL_RESET_PER_SCSI (i.e. 4) OR bit QL_RESET_START (i.e. 3) is set. In fact, it checks if bit 7 (4 | 3 = 7) is set, that is to say QL_ADAPTER_UP. This looks harmless, because this bit is likely be set, and when the ql_reset_work() delayed work is scheduled in ql3xxx_isr() (the only place that schedule this work), QL_RESET_START or QL_RESET_PER_SCSI is set. This has been spotted by smatch. Fixes: 5a4faa873782 ("[PATCH] qla3xxx NIC driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/80e73e33f390001d9c0140ffa9baddf6466a41a2.1652637337.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qla3xxx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index b30589a135c24..06f4d9a9e9388 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3614,7 +3614,8 @@ static void ql_reset_work(struct work_struct *work) qdev->mem_map_registers; unsigned long hw_flags; - if (test_bit((QL_RESET_PER_SCSI | QL_RESET_START), &qdev->flags)) { + if (test_bit(QL_RESET_PER_SCSI, &qdev->flags) || + test_bit(QL_RESET_START, &qdev->flags)) { clear_bit(QL_LINK_MASTER, &qdev->flags); /* From 23dd4581350d4ffa23d58976ec46408f8f4c1e16 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 17 May 2022 09:25:30 +0800 Subject: [PATCH 427/491] NFC: nci: fix sleep in atomic context bugs caused by nci_skb_alloc There are sleep in atomic context bugs when the request to secure element of st-nci is timeout. The root cause is that nci_skb_alloc with GFP_KERNEL parameter is called in st_nci_se_wt_timeout which is a timer handler. The call paths that could trigger bugs are shown below: (interrupt context 1) st_nci_se_wt_timeout nci_hci_send_event nci_hci_send_data nci_skb_alloc(..., GFP_KERNEL) //may sleep (interrupt context 2) st_nci_se_wt_timeout nci_hci_send_event nci_hci_send_data nci_send_data nci_queue_tx_data_frags nci_skb_alloc(..., GFP_KERNEL) //may sleep This patch changes allocation mode of nci_skb_alloc from GFP_KERNEL to GFP_ATOMIC in order to prevent atomic context sleeping. The GFP_ATOMIC flag makes memory allocation operation could be used in atomic context. Fixes: ed06aeefdac3 ("nfc: st-nci: Rename st21nfcb to st-nci") Signed-off-by: Duoming Zhou Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220517012530.75714-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/nfc/nci/data.c | 2 +- net/nfc/nci/hci.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 6055dc9a82aa0..aa5e712adf078 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, skb_frag = nci_skb_alloc(ndev, (NCI_DATA_HDR_SIZE + frag_len), - GFP_KERNEL); + GFP_ATOMIC); if (skb_frag == NULL) { rc = -ENOMEM; goto free_exit; diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 19703a649b5a6..78c4b6addf15a 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, i = 0; skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; @@ -184,7 +184,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, if (i < data_len) { skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; From 525f447f88b14a64424642de670f77424e067d5b Mon Sep 17 00:00:00 2001 From: Mingzhe Zou Date: Mon, 16 May 2022 13:47:21 +0800 Subject: [PATCH 428/491] scsi: target: Fix incorrect use of cpumask_t In commit d72d827f2f26, I used 'cpumask_t' incorrectly: void iscsit_thread_get_cpumask(struct iscsi_conn *conn) { int ord, cpu; cpumask_t conn_allowed_cpumask; ...... } static ssize_t lio_target_wwn_cpus_allowed_list_store( struct config_item *item, const char *page, size_t count) { int ret; char *orig; cpumask_t new_allowed_cpumask; ...... } The correct pattern should be as follows: cpumask_var_t mask; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; ... use 'mask' here ... free_cpumask_var(mask); Link: https://lore.kernel.org/r/20220516054721.1548-1-mingzhe.zou@easystack.cn Fixes: d72d827f2f26 ("scsi: target: Add iscsi/cpus_allowed_list in configfs") Reported-by: Test Bot Reviewed-by: Mike Christie Signed-off-by: Mingzhe Zou Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target.c | 32 ++++++++++++++------ drivers/target/iscsi/iscsi_target_configfs.c | 24 +++++++++------ 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 6fe6a6bab3f46..ddf6c2a7212bc 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3596,10 +3596,7 @@ static int iscsit_send_reject( void iscsit_thread_get_cpumask(struct iscsi_conn *conn) { int ord, cpu; - cpumask_t conn_allowed_cpumask; - - cpumask_and(&conn_allowed_cpumask, iscsit_global->allowed_cpumask, - cpu_online_mask); + cpumask_var_t conn_allowed_cpumask; /* * bitmap_id is assigned from iscsit_global->ts_bitmap from @@ -3609,13 +3606,28 @@ void iscsit_thread_get_cpumask(struct iscsi_conn *conn) * iSCSI connection's RX/TX threads will be scheduled to * execute upon. */ - cpumask_clear(conn->conn_cpumask); - ord = conn->bitmap_id % cpumask_weight(&conn_allowed_cpumask); - for_each_cpu(cpu, &conn_allowed_cpumask) { - if (ord-- == 0) { - cpumask_set_cpu(cpu, conn->conn_cpumask); - return; + if (!zalloc_cpumask_var(&conn_allowed_cpumask, GFP_KERNEL)) { + ord = conn->bitmap_id % cpumask_weight(cpu_online_mask); + for_each_online_cpu(cpu) { + if (ord-- == 0) { + cpumask_set_cpu(cpu, conn->conn_cpumask); + return; + } + } + } else { + cpumask_and(conn_allowed_cpumask, iscsit_global->allowed_cpumask, + cpu_online_mask); + + cpumask_clear(conn->conn_cpumask); + ord = conn->bitmap_id % cpumask_weight(conn_allowed_cpumask); + for_each_cpu(cpu, conn_allowed_cpumask) { + if (ord-- == 0) { + cpumask_set_cpu(cpu, conn->conn_cpumask); + free_cpumask_var(conn_allowed_cpumask); + return; + } } + free_cpumask_var(conn_allowed_cpumask); } /* * This should never be reached.. diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 0cedcfe207b56..57b4fd56d92ab 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1137,23 +1137,27 @@ static ssize_t lio_target_wwn_cpus_allowed_list_show( static ssize_t lio_target_wwn_cpus_allowed_list_store( struct config_item *item, const char *page, size_t count) { - int ret; + int ret = -ENOMEM; char *orig; - cpumask_t new_allowed_cpumask; + cpumask_var_t new_allowed_cpumask; + + if (!zalloc_cpumask_var(&new_allowed_cpumask, GFP_KERNEL)) + goto out; orig = kstrdup(page, GFP_KERNEL); if (!orig) - return -ENOMEM; + goto out_free_cpumask; - cpumask_clear(&new_allowed_cpumask); - ret = cpulist_parse(orig, &new_allowed_cpumask); + ret = cpulist_parse(orig, new_allowed_cpumask); + if (!ret) + cpumask_copy(iscsit_global->allowed_cpumask, + new_allowed_cpumask); kfree(orig); - if (ret != 0) - return ret; - - cpumask_copy(iscsit_global->allowed_cpumask, &new_allowed_cpumask); - return count; +out_free_cpumask: + free_cpumask_var(new_allowed_cpumask); +out: + return ret ? ret : count; } CONFIGFS_ATTR(lio_target_wwn_, cpus_allowed_list); From 2c5fc6cd269ad3476da99dad02521d2af4a8e906 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Mon, 21 Mar 2022 10:07:44 +0200 Subject: [PATCH 429/491] net/mlx5: DR, Fix missing flow_source when creating multi-destination FW table In order to support multiple destination FTEs with SW steering FW table is created with single FTE with multiple actions and SW steering rule forward to it. When creating this table, flow source isn't set according to the original FTE. Fix this by passing the original FTE flow source to the created FW table. Fixes: 34583beea4b7 ("net/mlx5: DR, Create multi-destination table for SW-steering use") Signed-off-by: Maor Dickman Reviewed-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h | 3 ++- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 850937cd8bf9c..b52b539c8d2ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -842,7 +842,8 @@ struct mlx5dr_action * mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, struct mlx5dr_action_dest *dests, u32 num_of_dests, - bool ignore_flow_level) + bool ignore_flow_level, + u32 flow_source) { struct mlx5dr_cmd_flow_destination_hw_info *hw_dests; struct mlx5dr_action **ref_actions; @@ -914,7 +915,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, reformat_req, &action->dest_tbl->fw_tbl.id, &action->dest_tbl->fw_tbl.group_id, - ignore_flow_level); + ignore_flow_level, + flow_source); if (ret) goto free_action; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c index 68a4c32d5f34c..f05ef0cd54bac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -104,7 +104,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, bool reformat_req, u32 *tbl_id, u32 *group_id, - bool ignore_flow_level) + bool ignore_flow_level, + u32 flow_source) { struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; struct mlx5dr_cmd_fte_info fte_info = {}; @@ -139,6 +140,7 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, fte_info.val = val; fte_info.dest_arr = dest; fte_info.ignore_flow_level = ignore_flow_level; + fte_info.flow_context.flow_source = flow_source; ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 46866a5fc5ca3..98320e3945adb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -1461,7 +1461,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, bool reformat_req, u32 *tbl_id, u32 *group_id, - bool ignore_flow_level); + bool ignore_flow_level, + u32 flow_source); void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id, u32 group_id); #endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 045b0cf90063b..728f818825892 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -520,6 +520,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, } else if (num_term_actions > 1) { bool ignore_flow_level = !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL); + u32 flow_source = fte->flow_context.flow_source; if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { @@ -529,7 +530,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, tmp_action = mlx5dr_action_create_mult_dest_tbl(domain, term_actions, num_term_actions, - ignore_flow_level); + ignore_flow_level, + flow_source); if (!tmp_action) { err = -EOPNOTSUPP; goto free_actions; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index ec5cbec0d4553..7626c85643b1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -99,7 +99,8 @@ struct mlx5dr_action * mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, struct mlx5dr_action_dest *dests, u32 num_of_dests, - bool ignore_flow_level); + bool ignore_flow_level, + u32 flow_source); struct mlx5dr_action *mlx5dr_action_create_drop(void); From b33886971dbc4a86d1ec5369a2aaefc60a7cd72d Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 9 Mar 2022 14:45:58 +0200 Subject: [PATCH 430/491] net/mlx5: Initialize flow steering during driver probe Currently, software objects of flow steering are created and destroyed during reload flow. In case a device is unloaded, the following error is printed during grace period: mlx5_core 0000:00:0b.0: mlx5_fw_fatal_reporter_err_work:690:(pid 95): Driver is in error state. Unloading As a solution to fix use-after-free bugs, where we try to access these objects, when reading the value of flow_steering_mode devlink param[1], let's split flow steering creation and destruction into two routines: * init and cleanup: memory, cache, and pools allocation/free. * create and destroy: namespaces initialization and cleanup. While at it, re-order the cleanup function to mirror the init function. [1] Kasan trace: [ 385.119849 ] BUG: KASAN: use-after-free in mlx5_devlink_fs_mode_get+0x3b/0xa0 [ 385.119849 ] Read of size 4 at addr ffff888104b79308 by task bash/291 [ 385.119849 ] [ 385.119849 ] CPU: 1 PID: 291 Comm: bash Not tainted 5.17.0-rc1+ #2 [ 385.119849 ] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 [ 385.119849 ] Call Trace: [ 385.119849 ] [ 385.119849 ] dump_stack_lvl+0x6e/0x91 [ 385.119849 ] print_address_description.constprop.0+0x1f/0x160 [ 385.119849 ] ? mlx5_devlink_fs_mode_get+0x3b/0xa0 [ 385.119849 ] ? mlx5_devlink_fs_mode_get+0x3b/0xa0 [ 385.119849 ] kasan_report.cold+0x83/0xdf [ 385.119849 ] ? devlink_param_notify+0x20/0x190 [ 385.119849 ] ? mlx5_devlink_fs_mode_get+0x3b/0xa0 [ 385.119849 ] mlx5_devlink_fs_mode_get+0x3b/0xa0 [ 385.119849 ] devlink_nl_param_fill+0x18a/0xa50 [ 385.119849 ] ? _raw_spin_lock_irqsave+0x8d/0xe0 [ 385.119849 ] ? devlink_flash_update_timeout_notify+0xf0/0xf0 [ 385.119849 ] ? __wake_up_common+0x4b/0x1e0 [ 385.119849 ] ? preempt_count_sub+0x14/0xc0 [ 385.119849 ] ? _raw_spin_unlock_irqrestore+0x28/0x40 [ 385.119849 ] ? __wake_up_common_lock+0xe3/0x140 [ 385.119849 ] ? __wake_up_common+0x1e0/0x1e0 [ 385.119849 ] ? __sanitizer_cov_trace_const_cmp8+0x27/0x80 [ 385.119849 ] ? __rcu_read_unlock+0x48/0x70 [ 385.119849 ] ? kasan_unpoison+0x23/0x50 [ 385.119849 ] ? __kasan_slab_alloc+0x2c/0x80 [ 385.119849 ] ? memset+0x20/0x40 [ 385.119849 ] ? __sanitizer_cov_trace_const_cmp4+0x25/0x80 [ 385.119849 ] devlink_param_notify+0xce/0x190 [ 385.119849 ] devlink_unregister+0x92/0x2b0 [ 385.119849 ] remove_one+0x41/0x140 [ 385.119849 ] pci_device_remove+0x68/0x140 [ 385.119849 ] ? pcibios_free_irq+0x10/0x10 [ 385.119849 ] __device_release_driver+0x294/0x3f0 [ 385.119849 ] device_driver_detach+0x82/0x130 [ 385.119849 ] unbind_store+0x193/0x1b0 [ 385.119849 ] ? subsys_interface_unregister+0x270/0x270 [ 385.119849 ] drv_attr_store+0x4e/0x70 [ 385.119849 ] ? drv_attr_show+0x60/0x60 [ 385.119849 ] sysfs_kf_write+0xa7/0xc0 [ 385.119849 ] kernfs_fop_write_iter+0x23a/0x2f0 [ 385.119849 ] ? sysfs_kf_bin_read+0x160/0x160 [ 385.119849 ] new_sync_write+0x311/0x430 [ 385.119849 ] ? new_sync_read+0x480/0x480 [ 385.119849 ] ? _raw_spin_lock+0x87/0xe0 [ 385.119849 ] ? __sanitizer_cov_trace_cmp4+0x25/0x80 [ 385.119849 ] ? security_file_permission+0x94/0xa0 [ 385.119849 ] vfs_write+0x4c7/0x590 [ 385.119849 ] ksys_write+0xf6/0x1e0 [ 385.119849 ] ? __x64_sys_read+0x50/0x50 [ 385.119849 ] ? fpregs_assert_state_consistent+0x99/0xa0 [ 385.119849 ] do_syscall_64+0x3d/0x90 [ 385.119849 ] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 385.119849 ] RIP: 0033:0x7fc36ef38504 [ 385.119849 ] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 48 8d 05 f9 61 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 385.119849 ] RSP: 002b:00007ffde0ff3d08 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 385.119849 ] RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007fc36ef38504 [ 385.119849 ] RDX: 000000000000000c RSI: 00007fc370521040 RDI: 0000000000000001 [ 385.119849 ] RBP: 00007fc370521040 R08: 00007fc36f00b8c0 R09: 00007fc36ee4b740 [ 385.119849 ] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc36f00a760 [ 385.119849 ] R13: 000000000000000c R14: 00007fc36f005760 R15: 000000000000000c [ 385.119849 ] [ 385.119849 ] [ 385.119849 ] Allocated by task 65: [ 385.119849 ] kasan_save_stack+0x1e/0x40 [ 385.119849 ] __kasan_kmalloc+0x81/0xa0 [ 385.119849 ] mlx5_init_fs+0x11b/0x1160 [ 385.119849 ] mlx5_load+0x13c/0x220 [ 385.119849 ] mlx5_load_one+0xda/0x160 [ 385.119849 ] mlx5_recover_device+0xb8/0x100 [ 385.119849 ] mlx5_health_try_recover+0x2f9/0x3a1 [ 385.119849 ] devlink_health_reporter_recover+0x75/0x100 [ 385.119849 ] devlink_health_report+0x26c/0x4b0 [ 385.275909 ] mlx5_fw_fatal_reporter_err_work+0x11e/0x1b0 [ 385.275909 ] process_one_work+0x520/0x970 [ 385.275909 ] worker_thread+0x378/0x950 [ 385.275909 ] kthread+0x1bb/0x200 [ 385.275909 ] ret_from_fork+0x1f/0x30 [ 385.275909 ] [ 385.275909 ] Freed by task 65: [ 385.275909 ] kasan_save_stack+0x1e/0x40 [ 385.275909 ] kasan_set_track+0x21/0x30 [ 385.275909 ] kasan_set_free_info+0x20/0x30 [ 385.275909 ] __kasan_slab_free+0xfc/0x140 [ 385.275909 ] kfree+0xa5/0x3b0 [ 385.275909 ] mlx5_unload+0x2e/0xb0 [ 385.275909 ] mlx5_unload_one+0x86/0xb0 [ 385.275909 ] mlx5_fw_fatal_reporter_err_work.cold+0xca/0xcf [ 385.275909 ] process_one_work+0x520/0x970 [ 385.275909 ] worker_thread+0x378/0x950 [ 385.275909 ] kthread+0x1bb/0x200 [ 385.275909 ] ret_from_fork+0x1f/0x30 [ 385.275909 ] [ 385.275909 ] The buggy address belongs to the object at ffff888104b79300 [ 385.275909 ] which belongs to the cache kmalloc-128 of size 128 [ 385.275909 ] The buggy address is located 8 bytes inside of [ 385.275909 ] 128-byte region [ffff888104b79300, ffff888104b79380) [ 385.275909 ] The buggy address belongs to the page: [ 385.275909 ] page:00000000de44dd39 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x104b78 [ 385.275909 ] head:00000000de44dd39 order:1 compound_mapcount:0 [ 385.275909 ] flags: 0x8000000000010200(slab|head|zone=2) [ 385.275909 ] raw: 8000000000010200 0000000000000000 dead000000000122 ffff8881000428c0 [ 385.275909 ] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000 [ 385.275909 ] page dumped because: kasan: bad access detected [ 385.275909 ] [ 385.275909 ] Memory state around the buggy address: [ 385.275909 ] ffff888104b79200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc [ 385.275909 ] ffff888104b79280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 385.275909 ] >ffff888104b79300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 385.275909 ] ^ [ 385.275909 ] ffff888104b79380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 385.275909 ] ffff888104b79400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 385.275909 ]] Fixes: e890acd5ff18 ("net/mlx5: Add devlink flow_steering_mode parameter") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 131 ++++++++++-------- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 6 +- .../net/ethernet/mellanox/mlx5/core/main.c | 15 +- 3 files changed, 91 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 816d991f76210..3ad67e6b5586d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2663,28 +2663,6 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) clean_tree(&root_ns->ns.node); } -void mlx5_cleanup_fs(struct mlx5_core_dev *dev) -{ - struct mlx5_flow_steering *steering = dev->priv.steering; - - cleanup_root_ns(steering->root_ns); - cleanup_root_ns(steering->fdb_root_ns); - steering->fdb_root_ns = NULL; - kfree(steering->fdb_sub_ns); - steering->fdb_sub_ns = NULL; - cleanup_root_ns(steering->port_sel_root_ns); - cleanup_root_ns(steering->sniffer_rx_root_ns); - cleanup_root_ns(steering->sniffer_tx_root_ns); - cleanup_root_ns(steering->rdma_rx_root_ns); - cleanup_root_ns(steering->rdma_tx_root_ns); - cleanup_root_ns(steering->egress_root_ns); - mlx5_cleanup_fc_stats(dev); - kmem_cache_destroy(steering->ftes_cache); - kmem_cache_destroy(steering->fgs_cache); - mlx5_ft_pool_destroy(dev); - kfree(steering); -} - static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; @@ -3086,42 +3064,27 @@ static int init_egress_root_ns(struct mlx5_flow_steering *steering) return err; } -int mlx5_init_fs(struct mlx5_core_dev *dev) +void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev) { - struct mlx5_flow_steering *steering; - int err = 0; - - err = mlx5_init_fc_stats(dev); - if (err) - return err; - - err = mlx5_ft_pool_init(dev); - if (err) - return err; - - steering = kzalloc(sizeof(*steering), GFP_KERNEL); - if (!steering) { - err = -ENOMEM; - goto err; - } - - steering->dev = dev; - dev->priv.steering = steering; + struct mlx5_flow_steering *steering = dev->priv.steering; - if (mlx5_fs_dr_is_supported(dev)) - steering->mode = MLX5_FLOW_STEERING_MODE_SMFS; - else - steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; + cleanup_root_ns(steering->root_ns); + cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + kfree(steering->fdb_sub_ns); + steering->fdb_sub_ns = NULL; + cleanup_root_ns(steering->port_sel_root_ns); + cleanup_root_ns(steering->sniffer_rx_root_ns); + cleanup_root_ns(steering->sniffer_tx_root_ns); + cleanup_root_ns(steering->rdma_rx_root_ns); + cleanup_root_ns(steering->rdma_tx_root_ns); + cleanup_root_ns(steering->egress_root_ns); +} - steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", - sizeof(struct mlx5_flow_group), 0, - 0, NULL); - steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, - 0, NULL); - if (!steering->ftes_cache || !steering->fgs_cache) { - err = -ENOMEM; - goto err; - } +int mlx5_fs_core_init(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int err = 0; if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && (MLX5_CAP_GEN(dev, nic_flow_table))) || @@ -3180,8 +3143,64 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) } return 0; + +err: + mlx5_fs_core_cleanup(dev); + return err; +} + +void mlx5_fs_core_free(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + kmem_cache_destroy(steering->ftes_cache); + kmem_cache_destroy(steering->fgs_cache); + kfree(steering); + mlx5_ft_pool_destroy(dev); + mlx5_cleanup_fc_stats(dev); +} + +int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering; + int err = 0; + + err = mlx5_init_fc_stats(dev); + if (err) + return err; + + err = mlx5_ft_pool_init(dev); + if (err) + goto err; + + steering = kzalloc(sizeof(*steering), GFP_KERNEL); + if (!steering) { + err = -ENOMEM; + goto err; + } + + steering->dev = dev; + dev->priv.steering = steering; + + if (mlx5_fs_dr_is_supported(dev)) + steering->mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; + + steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + sizeof(struct mlx5_flow_group), 0, + 0, NULL); + steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + 0, NULL); + if (!steering->ftes_cache || !steering->fgs_cache) { + err = -ENOMEM; + goto err; + } + + return 0; + err: - mlx5_cleanup_fs(dev); + mlx5_fs_core_free(dev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index c488a7c5b07e9..3f20523e514fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -298,8 +298,10 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, enum mlx5_flow_steering_mode mode); -int mlx5_init_fs(struct mlx5_core_dev *dev); -void mlx5_cleanup_fs(struct mlx5_core_dev *dev); +int mlx5_fs_core_alloc(struct mlx5_core_dev *dev); +void mlx5_fs_core_free(struct mlx5_core_dev *dev); +int mlx5_fs_core_init(struct mlx5_core_dev *dev); +void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev); int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports); void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2589e39eb9c72..c6737720bf3a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -938,6 +938,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_sf_table_cleanup; } + err = mlx5_fs_core_alloc(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc flow steering\n"); + goto err_fs; + } + dev->dm = mlx5_dm_create(dev); if (IS_ERR(dev->dm)) mlx5_core_warn(dev, "Failed to init device memory%d\n", err); @@ -948,6 +954,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) return 0; +err_fs: + mlx5_sf_table_cleanup(dev); err_sf_table_cleanup: mlx5_sf_hw_table_cleanup(dev); err_sf_hw_table_cleanup: @@ -985,6 +993,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); mlx5_dm_cleanup(dev); + mlx5_fs_core_free(dev); mlx5_sf_table_cleanup(dev); mlx5_sf_hw_table_cleanup(dev); mlx5_vhca_event_cleanup(dev); @@ -1191,7 +1200,7 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_tls_start; } - err = mlx5_init_fs(dev); + err = mlx5_fs_core_init(dev); if (err) { mlx5_core_err(dev, "Failed to init flow steering\n"); goto err_fs; @@ -1236,7 +1245,7 @@ static int mlx5_load(struct mlx5_core_dev *dev) err_vhca: mlx5_vhca_event_stop(dev); err_set_hca: - mlx5_cleanup_fs(dev); + mlx5_fs_core_cleanup(dev); err_fs: mlx5_accel_tls_cleanup(dev); err_tls_start: @@ -1265,7 +1274,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_ec_cleanup(dev); mlx5_sf_hw_table_destroy(dev); mlx5_vhca_event_stop(dev); - mlx5_cleanup_fs(dev); + mlx5_fs_core_cleanup(dev); mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); From 785d7ed295513bd3374095304b7034fd65c123b0 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 3 Apr 2022 23:18:10 +0300 Subject: [PATCH 431/491] net/mlx5: DR, Ignore modify TTL on RX if device doesn't support it When modifying TTL, packet's csum has to be recalculated. Due to HW issue in ConnectX-5, csum recalculation for modify TTL on RX is supported through a work-around that is specifically enabled by configuration. If the work-around isn't enabled, rather than adding an unsupported action the modify TTL action on RX should be ignored. Ignoring modify TTL action might result in zero actions, so in such cases we will not convert the match STE to modify STE, as it is done by FW in DMFS. This patch fixes an issue where modify TTL action was ignored both on RX and TX instead of only on RX. Fixes: 4ff725e1d4ad ("net/mlx5: DR, Ignore modify TTL if device doesn't support it") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_action.c | 65 +++++++++++++------ .../mellanox/mlx5/core/steering/dr_ste_v0.c | 4 +- 2 files changed, 48 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index b52b539c8d2ce..1383550f44c12 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -530,6 +530,37 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, return 0; } +static void dr_action_modify_ttl_adjust(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_actions_attr *attr, + bool rx_rule, + bool *recalc_cs_required) +{ + *recalc_cs_required = false; + + /* if device supports csum recalculation - no adjustment needed */ + if (mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps)) + return; + + /* no adjustment needed on TX rules */ + if (!rx_rule) + return; + + if (!MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify)) { + /* Ignore the modify TTL action. + * It is always kept as last HW action. + */ + attr->modify_actions--; + return; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB) + /* Due to a HW bug on some devices, modifying TTL on RX flows + * will cause an incorrect checksum calculation. In such cases + * we will use a FW table to recalculate the checksum. + */ + *recalc_cs_required = true; +} + static void dr_action_print_sequence(struct mlx5dr_domain *dmn, struct mlx5dr_action *actions[], int last_idx) @@ -650,8 +681,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, case DR_ACTION_TYP_MODIFY_HDR: attr.modify_index = action->rewrite->index; attr.modify_actions = action->rewrite->num_of_actions; - recalc_cs_required = action->rewrite->modify_ttl && - !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps); + if (action->rewrite->modify_ttl) + dr_action_modify_ttl_adjust(dmn, &attr, rx_rule, + &recalc_cs_required); break; case DR_ACTION_TYP_L2_TO_TNL_L2: case DR_ACTION_TYP_L2_TO_TNL_L3: @@ -732,12 +764,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, *new_hw_ste_arr_sz = nic_matcher->num_of_builders; last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1); - /* Due to a HW bug in some devices, modifying TTL on RX flows will - * cause an incorrect checksum calculation. In this case we will - * use a FW table to recalculate. - */ - if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB && - rx_rule && recalc_cs_required && dest_action) { + if (recalc_cs_required && dest_action) { ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr); if (ret) { mlx5dr_err(dmn, @@ -1558,12 +1585,6 @@ dr_action_modify_check_is_ttl_modify(const void *sw_action) return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL; } -static bool dr_action_modify_ttl_ignore(struct mlx5dr_domain *dmn) -{ - return !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps) && - !MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify); -} - static int dr_actions_convert_modify_header(struct mlx5dr_action *action, u32 max_hw_actions, u32 num_sw_actions, @@ -1575,6 +1596,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, const struct mlx5dr_ste_action_modify_field *hw_dst_action_info; const struct mlx5dr_ste_action_modify_field *hw_src_action_info; struct mlx5dr_domain *dmn = action->rewrite->dmn; + __be64 *modify_ttl_sw_action = NULL; int ret, i, hw_idx = 0; __be64 *sw_action; __be64 hw_action; @@ -1587,8 +1609,14 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, action->rewrite->allow_rx = 1; action->rewrite->allow_tx = 1; - for (i = 0; i < num_sw_actions; i++) { - sw_action = &sw_actions[i]; + for (i = 0; i < num_sw_actions || modify_ttl_sw_action; i++) { + /* modify TTL is handled separately, as a last action */ + if (i == num_sw_actions) { + sw_action = modify_ttl_sw_action; + modify_ttl_sw_action = NULL; + } else { + sw_action = &sw_actions[i]; + } ret = dr_action_modify_check_field_limitation(action, sw_action); @@ -1597,10 +1625,9 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, if (!(*modify_ttl) && dr_action_modify_check_is_ttl_modify(sw_action)) { - if (dr_action_modify_ttl_ignore(dmn)) - continue; - + modify_ttl_sw_action = sw_action; *modify_ttl = true; + continue; } /* Convert SW action to HW action */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index 5a322335f2043..2010d4ac65190 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -420,7 +420,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, * encapsulation. The reason for that is that we support * modify headers for outer headers only */ - if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) { dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT); dr_ste_v0_set_rewrite_actions(last_ste, attr->modify_actions, @@ -513,7 +513,7 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn, } } - if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) { if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT) dr_ste_v0_arr_init_next(&last_ste, added_stes, From 379169740b0a955972cebb4994b2607b264a4b41 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 4 Apr 2022 21:51:15 +0300 Subject: [PATCH 432/491] net/mlx5e: Wrap mlx5e_trap_napi_poll into rcu_read_lock The body of mlx5e_napi_poll is wrapped into rcu_read_lock to be able to read the XDP program pointer using rcu_dereference. However, the trap RQ NAPI doesn't use rcu_read_lock, because the trap RQ works only in the non-linear mode, and mlx5e_skb_from_cqe_nonlinear, until recently, didn't support XDP and didn't call rcu_dereference. Starting from the cited commit, mlx5e_skb_from_cqe_nonlinear supports XDP and calls rcu_dereference, but mlx5e_trap_napi_poll doesn't wrap it into rcu_read_lock. It leads to RCU-lockdep warnings like this: WARNING: suspicious RCU usage This commit fixes the issue by adding an rcu_read_lock to mlx5e_trap_napi_poll, similarly to mlx5e_napi_poll. Fixes: ea5d49bdae8b ("net/mlx5e: Add XDP multi buffer support to the non-linear legacy RQ") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index a55b066746cb7..857840ab1e918 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -14,19 +14,26 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget) bool busy = false; int work_done = 0; + rcu_read_lock(); + ch_stats->poll++; work_done = mlx5e_poll_rx_cq(&rq->cq, budget); busy |= work_done == budget; busy |= rq->post_wqes(rq); - if (busy) - return budget; + if (busy) { + work_done = budget; + goto out; + } if (unlikely(!napi_complete_done(napi, work_done))) - return work_done; + goto out; mlx5e_cq_arm(&rq->cq); + +out: + rcu_read_unlock(); return work_done; } From 15a5078cab30d7aa02ad14bfadebf247d95fc239 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 11 Apr 2022 17:29:08 +0300 Subject: [PATCH 433/491] net/mlx5e: Block rx-gro-hw feature in switchdev mode When the driver is in switchdev mode and rx-gro-hw is set, the RQ needs special CQE handling. Till then, block setting of rx-gro-hw feature in switchdev mode, to avoid failure while setting the feature due to failure while opening the RQ. Fixes: f97d5c2a453e ("net/mlx5e: Add handle SHAMPO cqe support") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2f1dedc721d1e..9992419617146 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3864,6 +3864,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev if (netdev->features & NETIF_F_NTUPLE) netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); + features &= ~NETIF_F_GRO_HW; + if (netdev->features & NETIF_F_GRO_HW) + netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n"); + return features; } From cf6e34c8c22fba66bd21244b95ea47e235f68974 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 12 Apr 2022 18:37:03 +0300 Subject: [PATCH 434/491] net/mlx5e: Properly block LRO when XDP is enabled LRO is incompatible and mutually exclusive with XDP. However, the needed checks are only made when enabling XDP. If LRO is enabled when XDP is already active, the command will succeed, and XDP will be skipped in the data path, although still enabled. This commit fixes the bug by checking the XDP status in mlx5e_fix_features and disabling LRO if XDP is enabled. Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9992419617146..3969916cfafed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3900,6 +3900,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } + if (params->xdp_prog) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with XDP\n"); + features &= ~NETIF_F_LRO; + } + } + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { features &= ~NETIF_F_RXHASH; if (netdev->features & NETIF_F_RXHASH) From b0617e7b35001c92c8fa777e1a095d3e693813df Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 12 Apr 2022 18:54:26 +0300 Subject: [PATCH 435/491] net/mlx5e: Properly block HW GRO when XDP is enabled HW GRO is incompatible and mutually exclusive with XDP and XSK. However, the needed checks are only made when enabling XDP. If HW GRO is enabled when XDP is already active, the command will succeed, and XDP will be skipped in the data path, although still enabled. This commit fixes the bug by checking the XDP and XSK status in mlx5e_fix_features and disabling HW GRO if XDP is enabled. Fixes: 83439f3c37aa ("net/mlx5e: Add HW-GRO offload") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3969916cfafed..6afd07901a108 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3905,6 +3905,18 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "LRO is incompatible with XDP\n"); features &= ~NETIF_F_LRO; } + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "HW GRO is incompatible with XDP\n"); + features &= ~NETIF_F_GRO_HW; + } + } + + if (priv->xsk.refcnt) { + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n", + priv->xsk.refcnt); + features &= ~NETIF_F_GRO_HW; + } } if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { From 6bbd723035badafe4a8eb17ccdecd96eae7a96d5 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 13 Apr 2022 15:50:42 +0300 Subject: [PATCH 436/491] net/mlx5e: Remove HW-GRO from reported features We got reports of certain HW-GRO flows causing kernel call traces, which might be related to firmware. To be on the safe side, disable the feature for now and re-enable it once a driver/firmware fix is found. Fixes: 83439f3c37aa ("net/mlx5e: Add HW-GRO offload") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6afd07901a108..fa229998606c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4873,10 +4873,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; - if (!!MLX5_CAP_GEN(mdev, shampo) && - mlx5e_check_fragmented_striding_rq_cap(mdev)) - netdev->hw_features |= NETIF_F_GRO_HW; - if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; From 8e1dcf499a67c494aafff00f25d88320dfec0af3 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 20 Mar 2022 14:02:24 +0200 Subject: [PATCH 437/491] net/mlx5e: CT: Fix support for GRE tuples cited commit removed support for GRE tuples when software steering was enabled. To bring back support for GRE tuples, add GRE ipv4/ipv6 matchers. Fixes: 3ee61ebb0df1 ("net/mlx5: CT: Add software steering ct flow steering provider") Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc/ct_fs_smfs.c | 56 +++++++++++-------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c index 59988e24b7041..271261bf1dc2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -23,7 +23,7 @@ struct mlx5_ct_fs_smfs_matcher { }; struct mlx5_ct_fs_smfs_matchers { - struct mlx5_ct_fs_smfs_matcher smfs_matchers[4]; + struct mlx5_ct_fs_smfs_matcher smfs_matchers[6]; struct list_head used; }; @@ -44,7 +44,8 @@ struct mlx5_ct_fs_smfs_rule { }; static inline void -mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp) +mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp, + bool gre) { void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); @@ -77,7 +78,7 @@ mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bo MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport); MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, ntohs(MLX5_CT_TCP_FLAGS_MASK)); - } else { + } else if (!gre) { MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport); MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport); } @@ -87,7 +88,7 @@ mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bo static struct mlx5dr_matcher * mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4, - bool tcp, u32 priority) + bool tcp, bool gre, u32 priority) { struct mlx5dr_matcher *dr_matcher; struct mlx5_flow_spec *spec; @@ -96,7 +97,7 @@ mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, if (!spec) return ERR_PTR(-ENOMEM); - mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp); + mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS; dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec); @@ -108,7 +109,7 @@ mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, } static struct mlx5_ct_fs_smfs_matcher * -mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp) +mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre) { struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher; @@ -119,7 +120,7 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp int prio; matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers; - smfs_matcher = &matchers->smfs_matchers[ipv4 * 2 + tcp]; + smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre]; if (refcount_inc_not_zero(&smfs_matcher->ref)) return smfs_matcher; @@ -145,11 +146,11 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp } tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl; - dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, prio); + dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio); if (IS_ERR(dr_matcher)) { netdev_warn(fs->netdev, - "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d), err: %ld\n", - nat, ipv4, tcp, PTR_ERR(dr_matcher)); + "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n", + nat, ipv4, tcp, gre, PTR_ERR(dr_matcher)); smfs_matcher = ERR_CAST(dr_matcher); goto out_unlock; @@ -222,16 +223,17 @@ mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs) static inline bool mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys) { -#define DISSECTOR_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name) - const u32 basic_keys = DISSECTOR_BIT(BASIC) | DISSECTOR_BIT(CONTROL) | - DISSECTOR_BIT(PORTS) | DISSECTOR_BIT(META); - const u32 ipv4_tcp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS) | DISSECTOR_BIT(TCP); - const u32 ipv4_udp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS); - const u32 ipv6_tcp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS) | DISSECTOR_BIT(TCP); - const u32 ipv6_udp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS); +#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name) + const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META); + const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP); + const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP); + const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS); + const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS); + const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); + const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || - used_keys == ipv6_udp); + used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre); } static bool @@ -254,20 +256,24 @@ mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *f flow_rule_match_control(flow_rule, &control); flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs); flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs); - flow_rule_match_ports(flow_rule, &ports); - flow_rule_match_tcp(flow_rule, &tcp); + if (basic.key->ip_proto != IPPROTO_GRE) + flow_rule_match_ports(flow_rule, &ports); + if (basic.key->ip_proto == IPPROTO_TCP) + flow_rule_match_tcp(flow_rule, &tcp); if (basic.mask->n_proto != htons(0xFFFF) || (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) || basic.mask->ip_proto != 0xFF || - (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP)) { + (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP && + basic.key->ip_proto != IPPROTO_GRE)) { ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)", ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto), basic.key->ip_proto, basic.mask->ip_proto); return false; } - if (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF)) { + if (basic.key->ip_proto != IPPROTO_GRE && + (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) { ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)", ports.mask->src, ports.mask->dst); return false; @@ -291,7 +297,7 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, struct mlx5dr_action *actions[5]; struct mlx5dr_rule *rule; int num_actions = 0, err; - bool nat, tcp, ipv4; + bool nat, tcp, ipv4, gre; if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule)) return ERR_PTR(-EOPNOTSUPP); @@ -314,8 +320,10 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4; tcp = MLX5_GET(fte_match_param, spec->match_value, outer_headers.ip_protocol) == IPPROTO_TCP; + gre = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_GRE; - smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp); + smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre); if (IS_ERR(smfs_matcher)) { err = PTR_ERR(smfs_matcher); goto err_matcher; From 04c551bad3713661ac85902163b3a567864c9a7c Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 7 Apr 2022 13:37:32 +0300 Subject: [PATCH 438/491] net/mlx5e: CT: Fix setting flow_source for smfs ct tuples Cited patch sets flow_source to ANY overriding the provided spec flow_source, avoiding the optimization done by commit c9c079b4deaa ("net/mlx5: CT: Set flow source hint from provided tuple device"). To fix the above, set the dr_rule flow_source from provided flow spec. Fixes: 3ee61ebb0df1 ("net/mlx5: CT: Add software steering ct flow steering provider") Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c index 271261bf1dc2a..bec9ed0103a93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -330,7 +330,7 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, } rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions, - MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT); + spec->flow_context.flow_source); if (!rule) { err = -EINVAL; goto err_create; From 16d42d313350946f4b9a8b74a13c99f0461a6572 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 4 Apr 2022 10:47:36 +0300 Subject: [PATCH 439/491] net/mlx5: Drain fw_reset when removing device In case fw sync reset is called in parallel to device removal, device might stuck in the following deadlock: CPU 0 CPU 1 ----- ----- remove_one uninit_one (locks intf_state_mutex) mlx5_sync_reset_now_event() work in fw_reset->wq. mlx5_enter_error_state() mutex_lock (intf_state_mutex) cleanup_once fw_reset_cleanup() destroy_workqueue(fw_reset->wq) Drain the fw_reset WQ, and make sure no new work is being queued, before entering uninit_one(). The Drain is done before devlink_unregister() since fw_reset, in some flows, is using devlink API devlink_remote_reload_actions_performed(). Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/fw_reset.c | 25 ++++++++++++++++--- .../ethernet/mellanox/mlx5/core/fw_reset.h | 1 + .../net/ethernet/mellanox/mlx5/core/main.c | 4 +++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index ca1aba845dd6d..81eb67fb95b04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -8,7 +8,8 @@ enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, - MLX5_FW_RESET_FLAGS_PENDING_COMP + MLX5_FW_RESET_FLAGS_PENDING_COMP, + MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS }; struct mlx5_fw_reset { @@ -208,7 +209,10 @@ static void poll_sync_reset(struct timer_list *t) if (fatal_error) { mlx5_core_warn(dev, "Got Device Reset\n"); - queue_work(fw_reset->wq, &fw_reset->reset_reload_work); + if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags)) + queue_work(fw_reset->wq, &fw_reset->reset_reload_work); + else + mlx5_core_err(dev, "Device is being removed, Drop new reset work\n"); return; } @@ -433,9 +437,12 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb); struct mlx5_eqe *eqe = data; + if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags)) + return NOTIFY_DONE; + switch (eqe->sub_type) { case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT: - queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work); + queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work); break; case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT: mlx5_sync_reset_events_handle(fw_reset, eqe); @@ -479,6 +486,18 @@ void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev) mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb); } +void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags); + cancel_work_sync(&fw_reset->fw_live_patch_work); + cancel_work_sync(&fw_reset->reset_request_work); + cancel_work_sync(&fw_reset->reset_reload_work); + cancel_work_sync(&fw_reset->reset_now_work); + cancel_work_sync(&fw_reset->reset_abort_work); +} + int mlx5_fw_reset_init(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index 694fc7cb26845..dc141c7e641a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -16,6 +16,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev); +void mlx5_drain_fw_reset(struct mlx5_core_dev *dev); int mlx5_fw_reset_init(struct mlx5_core_dev *dev); void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c6737720bf3a2..ef196cb764e2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1627,6 +1627,10 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); + /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain + * fw_reset before unregistering the devlink. + */ + mlx5_drain_fw_reset(dev); devlink_unregister(devlink); mlx5_sriov_disable(pdev); mlx5_crdump_disable(dev); From a91714312eb16f9ecd1f7f8b3efe1380075f28d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 May 2022 02:13:40 -0400 Subject: [PATCH 440/491] percpu_ref_init(): clean ->percpu_count_ref on failure That way percpu_ref_exit() is safe after failing percpu_ref_init(). At least one user (cgroup_create()) had a double-free that way; there might be other similar bugs. Easier to fix in percpu_ref_init(), rather than playing whack-a-mole in sloppy users... Usual symptoms look like a messed refcounting in one of subsystems that use percpu allocations (might be percpu-refcount, might be something else). Having refcounts for two different objects share memory is Not Nice(tm)... Reported-by: syzbot+5b1e53987f858500ec00@syzkaller.appspotmail.com Signed-off-by: Al Viro --- lib/percpu-refcount.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index af9302141bcf6..e5c5315da2741 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -76,6 +76,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, data = kzalloc(sizeof(*ref->data), gfp); if (!data) { free_percpu((void __percpu *)ref->percpu_count_ptr); + ref->percpu_count_ptr = 0; return -ENOMEM; } From 4dc2a5a8f6754492180741facf2a8787f2c415d7 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 17 May 2022 17:42:31 +0800 Subject: [PATCH 441/491] net: af_key: add check for pfkey_broadcast in function pfkey_process If skb_clone() returns null pointer, pfkey_broadcast() will return error. Therefore, it should be better to check the return value of pfkey_broadcast() and return error if fails. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jiasheng Jiang Signed-off-by: Steffen Klassert --- net/key/af_key.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index fd51db3be91c4..92e9d75dba2f4 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2826,8 +2826,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, - BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); + err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, + BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); + if (err) + return err; memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); From 015c44d7bff3f44d569716117becd570c179ca32 Mon Sep 17 00:00:00 2001 From: Thomas Bartschies Date: Wed, 18 May 2022 08:32:18 +0200 Subject: [PATCH 442/491] net: af_key: check encryption module availability consistency Since the recent introduction supporting the SM3 and SM4 hash algos for IPsec, the kernel produces invalid pfkey acquire messages, when these encryption modules are disabled. This happens because the availability of the algos wasn't checked in all necessary functions. This patch adds these checks. Signed-off-by: Thomas Bartschies Signed-off-by: Steffen Klassert --- net/key/af_key.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 92e9d75dba2f4..339d95df19d32 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2900,7 +2900,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg)) + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2918,7 +2918,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg))) + if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; for (k = 1; ; k++) { @@ -2929,7 +2929,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg)) + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } } From 0dc14aa94ccd8ba35eb17a0f9b123d1566efd39e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:44:51 +0100 Subject: [PATCH 443/491] ARM: 9196/1: spectre-bhb: enable for Cortex-A15 The Spectre-BHB mitigations were inadvertently left disabled for Cortex-A15, due to the fact that cpu_v7_bugs_init() is not called in that case. So fix that. Fixes: b9baf5c8c5c3 ("ARM: Spectre-BHB workaround") Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) --- arch/arm/mm/proc-v7-bugs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 06dbfb968182d..fb9f3eb6bf483 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -288,6 +288,7 @@ void cpu_v7_ca15_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } void cpu_v7_bugs_init(void) From 3cfb3019979666bdf33a1010147363cf05e0f17b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:46:17 +0100 Subject: [PATCH 444/491] ARM: 9197/1: spectre-bhb: fix loop8 sequence for Thumb2 In Thumb2, 'b . + 4' produces a branch instruction that uses a narrow encoding, and so it does not jump to the following instruction as expected. So use W(b) instead. Fixes: 6c7cb60bff7a ("ARM: fix Thumb2 regression with Spectre BHB") Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/entry-armv.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 06508698abb85..7a8682468a847 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1145,7 +1145,7 @@ vector_bhb_loop8_\name: @ bhb workaround mov r0, #8 -3: b . + 4 +3: W(b) . + 4 subs r0, r0, #1 bne 3b dsb From ba2c89e0ea74a904d5231643245753d77422e7f5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 17 May 2022 11:02:11 -0700 Subject: [PATCH 445/491] mptcp: fix checksum byte order The MPTCP code typecasts the checksum value to u16 and then converts it to big endian while storing the value into the MPTCP option. As a result, the wire encoding for little endian host is wrong, and that causes interoperabilty interoperability issues with other implementation or host with different endianness. Address the issue writing in the packet the unmodified __sum16 value. MPTCP checksum is disabled by default, interoperating with systems with bad mptcp-level csum encoding should cause fallback to TCP. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/275 Fixes: c5b39e26d003 ("mptcp: send out checksum for DSS") Fixes: 390b95a5fb84 ("mptcp: receive checksum for DSS") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 36 ++++++++++++++++++++++++------------ net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 2 +- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 325383646f5c0..b548cec86c9d8 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -107,7 +107,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; } if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { - mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum = get_unaligned((__force __sum16 *)ptr); mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; ptr += 2; } @@ -221,7 +221,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; - mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum = get_unaligned((__force __sum16 *)ptr); ptr += 2; } @@ -1240,7 +1240,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } -u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) +__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) { struct csum_pseudo_header header; __wsum csum; @@ -1256,15 +1256,25 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) header.csum = 0; csum = csum_partial(&header, sizeof(header), sum); - return (__force u16)csum_fold(csum); + return csum_fold(csum); } -static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext) { return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len, ~csum_unfold(mpext->csum)); } +static void put_len_csum(u16 len, __sum16 csum, void *data) +{ + __sum16 *sumptr = data + 2; + __be16 *ptr = data; + + put_unaligned_be16(len, ptr); + + put_unaligned(csum, sumptr); +} + void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { @@ -1340,8 +1350,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, put_unaligned_be32(mpext->subflow_seq, ptr); ptr += 1; if (opts->csum_reqd) { - put_unaligned_be32(mpext->data_len << 16 | - mptcp_make_csum(mpext), ptr); + put_len_csum(mpext->data_len, + mptcp_make_csum(mpext), + ptr); } else { put_unaligned_be32(mpext->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); @@ -1392,11 +1403,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, goto mp_capable_done; if (opts->csum_reqd) { - put_unaligned_be32(opts->data_len << 16 | - __mptcp_make_csum(opts->data_seq, - opts->subflow_seq, - opts->data_len, - ~csum_unfold(opts->csum)), ptr); + put_len_csum(opts->data_len, + __mptcp_make_csum(opts->data_seq, + opts->subflow_seq, + opts->data_len, + ~csum_unfold(opts->csum)), + ptr); } else { put_unaligned_be32(opts->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f4ce28bb0fdcc..fb40dd676a26e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -723,7 +723,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); -u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); +__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 8c37087f0d846..e90fe7eec43ac 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -888,7 +888,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); u32 offset, seq, delta; - u16 csum; + __sum16 csum; int len; if (!csum_reqd) From ae66fb2ba6c3dcaf8b9612b65aa949a1a4bed150 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 17 May 2022 11:02:12 -0700 Subject: [PATCH 446/491] mptcp: Do TCP fallback on early DSS checksum failure RFC 8684 section 3.7 describes several opportunities for a MPTCP connection to "fall back" to regular TCP early in the connection process, before it has been confirmed that MPTCP options can be successfully propagated on all SYN, SYN/ACK, and data packets. If a peer acknowledges the first received data packet with a regular TCP header (no MPTCP options), fallback is allowed. If the recipient of that first data packet finds a MPTCP DSS checksum error, this provides an opportunity to fail gracefully with a TCP fallback rather than resetting the connection (as might happen if a checksum failure were detected later). This commit modifies the checksum failure code to attempt fallback on the initial subflow of a MPTCP connection, only if it's a failure in the first data mapping. In cases where the peer initiates the connection, requests checksums, is the first to send data, and the peer is sending incorrect checksums (see https://github.com/multipath-tcp/mptcp_net-next/issues/275), this allows the connection to proceed as TCP rather than reset. Fixes: dd8bcd1768ff ("mptcp: validate the data checksum") Acked-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 3 ++- net/mptcp/subflow.c | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index fb40dd676a26e..5655a63aa6a8b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -443,7 +443,8 @@ struct mptcp_subflow_context { can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1; /* local_id is correctly initialized */ + local_id_valid : 1, /* local_id is correctly initialized */ + valid_csum_seen : 1; /* at least one csum validated */ enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e90fe7eec43ac..be76ada89d969 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -955,11 +955,14 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * subflow->map_data_csum); if (unlikely(csum)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); - subflow->send_mp_fail = 1; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); + if (subflow->mp_join || subflow->valid_csum_seen) { + subflow->send_mp_fail = 1; + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); + } return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; } + subflow->valid_csum_seen = 1; return MAPPING_OK; } @@ -1141,6 +1144,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss } } +static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) +{ + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + + if (subflow->mp_join) + return false; + else if (READ_ONCE(msk->csum_enabled)) + return !subflow->valid_csum_seen; + else + return !subflow->fully_established; +} + static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -1218,7 +1233,7 @@ static bool subflow_check_data_avail(struct sock *ssk) return true; } - if (subflow->mp_join || subflow->fully_established) { + if (!subflow_can_fallback(subflow)) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ From b8cedb7093b2d1394cae9b86494cba4b62d3a30a Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 18 May 2022 18:53:21 +0800 Subject: [PATCH 447/491] nfc: pn533: Fix buggy cleanup order When removing the pn533 device (i2c or USB), there is a logic error. The original code first cancels the worker (flush_delayed_work) and then destroys the workqueue (destroy_workqueue), leaving the timer the last one to be deleted (del_timer). This result in a possible race condition in a multi-core preempt-able kernel. That is, if the cleanup (pn53x_common_clean) is concurrently run with the timer handler (pn533_listen_mode_timer), the timer can queue the poll_work to the already destroyed workqueue, causing use-after-free. This patch reorder the cleanup: it uses the del_timer_sync to make sure the handler is finished before the routine will destroy the workqueue. Note that the timer cannot be activated by the worker again. static void pn533_wq_poll(struct work_struct *work) ... rc = pn533_send_poll_frame(dev); if (rc) return; if (cur_mod->len == 0 && dev->poll_mod_count > 1) mod_timer(&dev->listen_timer, ...); That is, the mod_timer can be called only when pn533_send_poll_frame() returns no error, which is impossible because the device is detaching and the lower driver should return ENODEV code. Signed-off-by: Lin Ma Signed-off-by: David S. Miller --- drivers/nfc/pn533/pn533.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index a491db46e3bd4..d9f6367b9993d 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -2787,13 +2787,14 @@ void pn53x_common_clean(struct pn533 *priv) { struct pn533_cmd *cmd, *n; + /* delete the timer before cleanup the worker */ + del_timer_sync(&priv->listen_timer); + flush_delayed_work(&priv->poll_work); destroy_workqueue(priv->wq); skb_queue_purge(&priv->resp_q); - del_timer(&priv->listen_timer); - list_for_each_entry_safe(cmd, n, &priv->cmd_queue, queue) { list_del(&cmd->queue); kfree(cmd); From 942d2ad5d2e0df758a645ddfadffde2795322728 Mon Sep 17 00:00:00 2001 From: Kevin Mitchell Date: Tue, 17 May 2022 11:01:05 -0700 Subject: [PATCH 448/491] igb: skip phy status check where unavailable igb_read_phy_reg() will silently return, leaving phy_data untouched, if hw->ops.read_reg isn't set. Depending on the uninitialized value of phy_data, this led to the phy status check either succeeding immediately or looping continuously for 2 seconds before emitting a noisy err-level timeout. This message went out to the console even though there was no actual problem. Instead, first check if there is read_reg function pointer. If not, proceed without trying to check the phy status register. Fixes: b72f3f72005d ("igb: When GbE link up, wait for Remote receiver status condition") Signed-off-by: Kevin Mitchell Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 34b33b21e0dcd..68be2976f539f 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5505,7 +5505,8 @@ static void igb_watchdog_task(struct work_struct *work) break; } - if (adapter->link_speed != SPEED_1000) + if (adapter->link_speed != SPEED_1000 || + !hw->phy.ops.read_reg) goto no_wait; /* wait for Remote receiver status OK */ From 6fd45e79e8b93b8d22fb8fe22c32fbad7e9190bd Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 17 May 2022 18:52:17 +0930 Subject: [PATCH 449/491] net: ftgmac100: Disable hardware checksum on AST2600 The AST2600 when using the i210 NIC over NC-SI has been observed to produce incorrect checksum results with specific MTU values. This was first observed when sending data across a long distance set of networks. On a local network, the following test was performed using a 1MB file of random data. On the receiver run this script: #!/bin/bash while [ 1 ]; do # Zero the stats nstat -r > /dev/null nc -l 9899 > test-file # Check for checksum errors TcpInCsumErrors=$(nstat | grep TcpInCsumErrors) if [ -z "$TcpInCsumErrors" ]; then echo No TcpInCsumErrors else echo TcpInCsumErrors = $TcpInCsumErrors fi done On an AST2600 system: # nc 9899 < test-file The test was repeated with various MTU values: # ip link set mtu 1410 dev eth0 The observed results: 1500 - good 1434 - bad 1400 - good 1410 - bad 1420 - good The test was repeated after disabling tx checksumming: # ethtool -K eth0 tx-checksumming off And all MTU values tested resulted in transfers without error. An issue with the driver cannot be ruled out, however there has been no bug discovered so far. David has done the work to take the original bug report of slow data transfer between long distance connections and triaged it down to this test case. The vendor suspects this this is a hardware issue when using NC-SI. The fixes line refers to the patch that introduced AST2600 support. Reported-by: David Wilder Reviewed-by: Dylan Hung Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index caf48023f8ea5..5231818943c6e 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1928,6 +1928,11 @@ static int ftgmac100_probe(struct platform_device *pdev) /* AST2400 doesn't have working HW checksum generation */ if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac"))) netdev->hw_features &= ~NETIF_F_HW_CSUM; + + /* AST2600 tx checksum with NCSI is broken */ + if (priv->use_ncsi && of_device_is_compatible(np, "aspeed,ast2600-mac")) + netdev->hw_features &= ~NETIF_F_HW_CSUM; + if (np && of_get_property(np, "no-hw-checksum", NULL)) netdev->hw_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM); netdev->features |= netdev->hw_features; From e5eaac2beb54f0a16ff851125082d9faeb475572 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 17 May 2022 10:44:14 +0200 Subject: [PATCH 450/491] netfilter: flowtable: fix TCP flow teardown This patch addresses three possible problems: 1. ct gc may race to undo the timeout adjustment of the packet path, leaving the conntrack entry in place with the internal offload timeout (one day). 2. ct gc removes the ct because the IPS_OFFLOAD_BIT is not set and the CLOSE timeout is reached before the flow offload del. 3. tcp ct is always set to ESTABLISHED with a very long timeout in flow offload teardown/delete even though the state might be already CLOSED. Also as a remark we cannot assume that the FIN or RST packet is hitting flow table teardown as the packet might get bumped to the slow path in nftables. This patch resets IPS_OFFLOAD_BIT from flow_offload_teardown(), so conntrack handles the tcp rst/fin packet which triggers the CLOSE/FIN state transition. Moreover, teturn the connection's ownership to conntrack upon teardown by clearing the offload flag and fixing the established timeout value. The flow table GC thread will asynchonrnously free the flow table and hardware offload entries. Before this patch, the IPS_OFFLOAD_BIT remained set for expired flows on which is also misleading since the flow is back to classic conntrack path. If nf_ct_delete() removes the entry from the conntrack table, then it calls nf_ct_put() which decrements the refcnt. This is not a problem because the flowtable holds a reference to the conntrack object from flow_offload_alloc() path which is released via flow_offload_free(). This patch also updates nft_flow_offload to skip packets in SYN_RECV state. Since we might miss or bump packets to slow path, we do not know what will happen there while we are still in SYN_RECV, this patch postpones offload up to the next packet which also aligns to the existing behaviour in tc-ct. flow_offload_teardown() does not reset the existing tcp state from flow_offload_fixup_tcp() to ESTABLISHED anymore, packets bump to slow path might have already update the state to CLOSE/FIN. Joint work with Oz and Sven. Fixes: 1e5b2471bcc4 ("netfilter: nf_flow_table: teardown flow timeout race") Signed-off-by: Oz Shlomo Signed-off-by: Sven Auhagen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 33 +++++++----------------------- net/netfilter/nft_flow_offload.c | 3 ++- 2 files changed, 9 insertions(+), 27 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 20b4a14e5d4ea..ebdf5332e8383 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init); static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) { - tcp->state = TCP_CONNTRACK_ESTABLISHED; tcp->seen[0].td_maxwin = 0; tcp->seen[1].td_maxwin = 0; } -static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) +static void flow_offload_fixup_ct(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); int l4num = nf_ct_protonum(ct); @@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); - timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + flow_offload_fixup_tcp(&ct->proto.tcp); + + timeout = tn->timeouts[ct->proto.tcp.state]; timeout -= tn->offload_timeout; } else if (l4num == IPPROTO_UDP) { struct nf_udp_net *tn = nf_udp_pernet(net); @@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); } -static void flow_offload_fixup_ct_state(struct nf_conn *ct) -{ - if (nf_ct_protonum(ct) == IPPROTO_TCP) - flow_offload_fixup_tcp(&ct->proto.tcp); -} - -static void flow_offload_fixup_ct(struct nf_conn *ct) -{ - flow_offload_fixup_ct_state(ct); - flow_offload_fixup_ct_timeout(ct); -} - static void flow_offload_route_release(struct flow_offload *flow) { nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); @@ -361,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table, rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, nf_flow_offload_rhash_params); - - clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); - - if (nf_flow_has_expired(flow)) - flow_offload_fixup_ct(flow->ct); - else - flow_offload_fixup_ct_timeout(flow->ct); - flow_offload_free(flow); } void flow_offload_teardown(struct flow_offload *flow) { + clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); set_bit(NF_FLOW_TEARDOWN, &flow->flags); - - flow_offload_fixup_ct_state(flow->ct); + flow_offload_fixup_ct(flow->ct); } EXPORT_SYMBOL_GPL(flow_offload_teardown); @@ -466,7 +447,7 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || nf_flow_has_stale_dst(flow)) - set_bit(NF_FLOW_TEARDOWN, &flow->flags); + flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { if (test_bit(NF_FLOW_HW, &flow->flags)) { diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 187b8cb9a5103..6f0b07fe648d0 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -298,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, case IPPROTO_TCP: tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(_tcph), &_tcph); - if (unlikely(!tcph || tcph->fin || tcph->rst)) + if (unlikely(!tcph || tcph->fin || tcph->rst || + !nf_conntrack_tcp_established(ct))) goto out; break; case IPPROTO_UDP: From 2738d9d963bd1f06d5114c2b4fa5771a95703991 Mon Sep 17 00:00:00 2001 From: Ritaro Takenaka Date: Tue, 17 May 2022 12:55:30 +0200 Subject: [PATCH 451/491] netfilter: flowtable: move dst_check to packet path Fixes sporadic IPv6 packet loss when flow offloading is enabled. IPv6 route GC and flowtable GC are not synchronized. When dst_cache becomes stale and a packet passes through the flow before the flowtable GC teardowns it, the packet can be dropped. So, it is necessary to check dst every time in packet path. Fixes: 227e1e4d0d6c ("netfilter: nf_flowtable: skip device lookup from interface index") Signed-off-by: Ritaro Takenaka Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 23 +---------------------- net/netfilter/nf_flow_table_ip.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index ebdf5332e8383..f2def06d10709 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -421,32 +421,11 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, return err; } -static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple) -{ - struct dst_entry *dst; - - if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || - tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) { - dst = tuple->dst_cache; - if (!dst_check(dst, tuple->dst_cookie)) - return true; - } - - return false; -} - -static bool nf_flow_has_stale_dst(struct flow_offload *flow) -{ - return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) || - flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple); -} - static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { if (nf_flow_has_expired(flow) || - nf_ct_is_dying(flow->ct) || - nf_flow_has_stale_dst(flow)) + nf_ct_is_dying(flow->ct)) flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 32c0eb1b48212..b350fe9d00b0b 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) return true; } +static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple) +{ + if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH && + tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM) + return true; + + return dst_check(tuple->dst_cache, tuple->dst_cookie); +} + static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, const struct nf_hook_state *state, struct dst_entry *dst) @@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) return NF_ACCEPT; + if (!nf_flow_dst_check(&tuplehash->tuple)) { + flow_offload_teardown(flow); + return NF_ACCEPT; + } + if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; @@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff)) return NF_ACCEPT; + if (!nf_flow_dst_check(&tuplehash->tuple)) { + flow_offload_teardown(flow); + return NF_ACCEPT; + } + if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; From 9e539c5b6d9c5b996e45105921ee9dd955c0f535 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 May 2022 14:51:34 +0200 Subject: [PATCH 452/491] netfilter: nf_tables: disable expression reduction infra Either userspace or kernelspace need to pre-fetch keys inconditionally before comparisons for this to work. Otherwise, register tracking data is misleading and it might result in reducing expressions which are not yet registers. First expression is also guaranteed to be evaluated always, however, certain expressions break before writing data to registers, before comparing the data, leaving the register in undetermined state. This patch disables this infrastructure by now. Fixes: b2d306542ff9 ("netfilter: nf_tables: do not reduce read-only expressions") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 16c3a39689f47..a096b9fbbbdff 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8342,16 +8342,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); static bool nft_expr_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { - if (!expr->ops->reduce) { - pr_warn_once("missing reduce for expression %s ", - expr->ops->type->name); - return false; - } - - if (nft_reduce_is_readonly(expr)) - return false; - - return expr->ops->reduce(track, expr); + return false; } static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) From acde3929492bcb9ceb0df1270230c422b1013798 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 16 May 2022 11:47:35 +0300 Subject: [PATCH 453/491] vdpa/mlx5: Use consistent RQT size The current code evaluates RQT size based on the configured number of virtqueues. This can raise an issue in the following scenario: Assume MQ was negotiated. 1. mlx5_vdpa_set_map() gets called. 2. handle_ctrl_mq() is called setting cur_num_vqs to some value, lower than the configured max VQs. 3. A second set_map gets called, but now a smaller number of VQs is used to evaluate the size of the RQT. 4. handle_ctrl_mq() is called with a value larger than what the RQT can hold. This will emit errors and the driver state is compromised. To fix this, we use a new field in struct mlx5_vdpa_net to hold the required number of entries in the RQT. This value is evaluated in mlx5_vdpa_set_driver_features() where we have the negotiated features all set up. In addition to that, we take into consideration the max capability of RQT entries early when the device is added so we don't need to take consider it when creating the RQT. Last, we remove the use of mlx5_vdpa_max_qps() which just returns the max_vas / 2 and make the code clearer. Fixes: 52893733f2c5 ("vdpa/mlx5: Add multiqueue support") Acked-by: Jason Wang Signed-off-by: Eli Cohen Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 61 +++++++++++-------------------- 1 file changed, 21 insertions(+), 40 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 79001301b3832..e0de44000d92d 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -161,6 +161,7 @@ struct mlx5_vdpa_net { struct mlx5_flow_handle *rx_rule_mcast; bool setup; u32 cur_num_vqs; + u32 rqt_size; struct notifier_block nb; struct vdpa_callback config_cb; struct mlx5_vdpa_wq_ent cvq_ent; @@ -204,17 +205,12 @@ static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); } -static inline u32 mlx5_vdpa_max_qps(int max_vqs) -{ - return max_vqs / 2; -} - static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) { if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) return 2; - return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); + return mvdev->max_vqs; } static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) @@ -1236,25 +1232,13 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue * static int create_rqt(struct mlx5_vdpa_net *ndev) { __be32 *list; - int max_rqt; void *rqtc; int inlen; void *in; int i, j; int err; - int num; - - if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) - num = 1; - else - num = ndev->cur_num_vqs / 2; - max_rqt = min_t(int, roundup_pow_of_two(num), - 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); - if (max_rqt < 1) - return -EOPNOTSUPP; - - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num); + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num); in = kzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1263,12 +1247,12 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); - MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt); + MLX5_SET(rqtc, rqtc, rqt_max_size, ndev->rqt_size); list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); - for (i = 0, j = 0; i < max_rqt; i++, j += 2) - list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id); + for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2) + list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); - MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt); + MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size); err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); kfree(in); if (err) @@ -1282,19 +1266,13 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) { __be32 *list; - int max_rqt; void *rqtc; int inlen; void *in; int i, j; int err; - max_rqt = min_t(int, roundup_pow_of_two(ndev->cur_num_vqs / 2), - 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); - if (max_rqt < 1) - return -EOPNOTSUPP; - - inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num); + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num); in = kzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1305,10 +1283,10 @@ static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); - for (i = 0, j = 0; i < max_rqt; i++, j += 2) + for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2) list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); - MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt); + MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size); err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); kfree(in); if (err) @@ -1625,7 +1603,7 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || - newqps > mlx5_vdpa_max_qps(mvdev->max_vqs)) + newqps > ndev->rqt_size) break; if (ndev->cur_num_vqs == 2 * newqps) { @@ -1989,7 +1967,7 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) int err; int i; - for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) { + for (i = 0; i < mvdev->max_vqs; i++) { err = setup_vq(ndev, &ndev->vqs[i]); if (err) goto err_vq; @@ -2060,9 +2038,11 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)) - ndev->cur_num_vqs = 2 * mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); + ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); else - ndev->cur_num_vqs = 2; + ndev->rqt_size = 1; + + ndev->cur_num_vqs = 2 * ndev->rqt_size; update_cvq_info(mvdev); return err; @@ -2529,7 +2509,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev) struct mlx5_vdpa_virtqueue *mvq; int i; - for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) { + for (i = 0; i < ndev->mvdev.max_vqs; ++i) { mvq = &ndev->vqs[i]; memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); mvq->index = i; @@ -2671,7 +2651,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, return -EOPNOTSUPP; } - max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues); + max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues), + 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)); if (max_vqs < 2) { dev_warn(mdev->device, "%d virtqueues are supported. At least 2 are required\n", @@ -2742,7 +2723,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC); } - config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs)); + config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); mvdev->vdev.dma_dev = &mdev->pdev->dev; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) @@ -2769,7 +2750,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ndev->nb.notifier_call = event_handler; mlx5_notifier_register(mdev, &ndev->nb); mvdev->vdev.mdev = &mgtdev->mgtdev; - err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs) + 1); + err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); if (err) goto err_reg; From fb4554c2232e44d595920f4d5c66cf8f7d13f9bc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 16 May 2022 16:42:13 +0800 Subject: [PATCH 454/491] Fix double fget() in vhost_net_set_backend() Descriptor table is a shared resource; two fget() on the same descriptor may return different struct file references. get_tap_ptr_ring() is called after we'd found (and pinned) the socket we'll be using and it tries to find the private tun/tap data structures associated with it. Redoing the lookup by the same file descriptor we'd used to get the socket is racy - we need to same struct file. Thanks to Jason for spotting a braino in the original variant of patch - I'd missed the use of fd == -1 for disabling backend, and in that case we can end up with sock == NULL and sock != oldsock. Cc: stable@kernel.org Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: Al Viro --- drivers/vhost/net.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 792ab5f236471..297b5db474545 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1450,13 +1450,9 @@ static struct socket *get_raw_socket(int fd) return ERR_PTR(r); } -static struct ptr_ring *get_tap_ptr_ring(int fd) +static struct ptr_ring *get_tap_ptr_ring(struct file *file) { struct ptr_ring *ring; - struct file *file = fget(fd); - - if (!file) - return NULL; ring = tun_get_tx_ring(file); if (!IS_ERR(ring)) goto out; @@ -1465,7 +1461,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd) goto out; ring = NULL; out: - fput(file); return ring; } @@ -1552,8 +1547,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) r = vhost_net_enable_vq(n, vq); if (r) goto err_used; - if (index == VHOST_NET_VQ_RX) - nvq->rx_ring = get_tap_ptr_ring(fd); + if (index == VHOST_NET_VQ_RX) { + if (sock) + nvq->rx_ring = get_tap_ptr_ring(sock->file); + else + nvq->rx_ring = NULL; + } oldubufs = nvq->ubufs; nvq->ubufs = ubufs; From 75dbb685f4e8786c33ddef8279bab0eadfb0731f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 14 May 2022 12:16:47 +0200 Subject: [PATCH 455/491] libceph: fix potential use-after-free on linger ping and resends request_reinit() is not only ugly as the comment rightfully suggests, but also unsafe. Even though it is called with osdc->lock held for write in all cases, resetting the OSD request refcount can still race with handle_reply() and result in use-after-free. Taking linger ping as an example: handle_timeout thread handle_reply thread down_read(&osdc->lock) req = lookup_request(...) ... finish_request(req) # unregisters up_read(&osdc->lock) __complete_request(req) linger_ping_cb(req) # req->r_kref == 2 because handle_reply still holds its ref down_write(&osdc->lock) send_linger_ping(lreq) req = lreq->ping_req # same req # cancel_linger_request is NOT # called - handle_reply already # unregistered request_reinit(req) WARN_ON(req->r_kref != 1) # fires request_init(req) kref_init(req->r_kref) # req->r_kref == 1 after kref_init ceph_osdc_put_request(req) kref_put(req->r_kref) # req->r_kref == 0 after kref_put, req is freed !!! This happens because send_linger_ping() always (re)uses the same OSD request for watch ping requests, relying on cancel_linger_request() to unregister it from the OSD client and rip its messages out from the messenger. send_linger() does the same for watch/notify registration and watch reconnect requests. Unfortunately cancel_request() doesn't guarantee that after it returns the OSD client would be completely done with the OSD request -- a ref could still be held and the callback (if specified) could still be invoked too. The original motivation for request_reinit() was inability to deal with allocation failures in send_linger() and send_linger_ping(). Switching to using osdc->req_mempool (currently only used by CephFS) respects that and allows us to get rid of request_reinit(). Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Xiubo Li Acked-by: Jeff Layton --- include/linux/ceph/osd_client.h | 3 + net/ceph/osd_client.c | 302 +++++++++++++------------------- 2 files changed, 122 insertions(+), 183 deletions(-) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 3431011f364dd..cba8a6ffc3290 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -287,6 +287,9 @@ struct ceph_osd_linger_request { rados_watcherrcb_t errcb; void *data; + struct ceph_pagelist *request_pl; + struct page **notify_id_pages; + struct page ***preply_pages; size_t *preply_len; }; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 83eb97c94e834..4b88f2a4a6e24 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req) target_init(&req->r_t); } -/* - * This is ugly, but it allows us to reuse linger registration and ping - * requests, keeping the structure of the code around send_linger{_ping}() - * reasonable. Setting up a min_nr=2 mempool for each linger request - * and dealing with copying ops (this blasts req only, watch op remains - * intact) isn't any better. - */ -static void request_reinit(struct ceph_osd_request *req) -{ - struct ceph_osd_client *osdc = req->r_osdc; - bool mempool = req->r_mempool; - unsigned int num_ops = req->r_num_ops; - u64 snapid = req->r_snapid; - struct ceph_snap_context *snapc = req->r_snapc; - bool linger = req->r_linger; - struct ceph_msg *request_msg = req->r_request; - struct ceph_msg *reply_msg = req->r_reply; - - dout("%s req %p\n", __func__, req); - WARN_ON(kref_read(&req->r_kref) != 1); - request_release_checks(req); - - WARN_ON(kref_read(&request_msg->kref) != 1); - WARN_ON(kref_read(&reply_msg->kref) != 1); - target_destroy(&req->r_t); - - request_init(req); - req->r_osdc = osdc; - req->r_mempool = mempool; - req->r_num_ops = num_ops; - req->r_snapid = snapid; - req->r_snapc = snapc; - req->r_linger = linger; - req->r_request = request_msg; - req->r_reply = reply_msg; -} - struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, unsigned int num_ops, @@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init); * @watch_opcode: CEPH_OSD_WATCH_OP_* */ static void osd_req_op_watch_init(struct ceph_osd_request *req, int which, - u64 cookie, u8 watch_opcode) + u8 watch_opcode, u64 cookie, u32 gen) { struct ceph_osd_req_op *op; op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); op->watch.cookie = cookie; op->watch.op = watch_opcode; - op->watch.gen = 0; + op->watch.gen = gen; +} + +/* + * prot_ver, timeout and notify payload (may be empty) should already be + * encoded in @request_pl + */ +static void osd_req_op_notify_init(struct ceph_osd_request *req, int which, + u64 cookie, struct ceph_pagelist *request_pl) +{ + struct ceph_osd_req_op *op; + + op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); + op->notify.cookie = cookie; + + ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl); + op->indata_len = request_pl->length; } /* @@ -2731,10 +2710,13 @@ static void linger_release(struct kref *kref) WARN_ON(!list_empty(&lreq->pending_lworks)); WARN_ON(lreq->osd); - if (lreq->reg_req) - ceph_osdc_put_request(lreq->reg_req); - if (lreq->ping_req) - ceph_osdc_put_request(lreq->ping_req); + if (lreq->request_pl) + ceph_pagelist_release(lreq->request_pl); + if (lreq->notify_id_pages) + ceph_release_page_vector(lreq->notify_id_pages, 1); + + ceph_osdc_put_request(lreq->reg_req); + ceph_osdc_put_request(lreq->ping_req); target_destroy(&lreq->t); kfree(lreq); } @@ -3003,6 +2985,12 @@ static void linger_commit_cb(struct ceph_osd_request *req) struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); + if (req != lreq->reg_req) { + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", + __func__, lreq, lreq->linger_id, req, lreq->reg_req); + goto out; + } + dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq, lreq->linger_id, req->r_result); linger_reg_commit_complete(lreq, req->r_result); @@ -3026,6 +3014,7 @@ static void linger_commit_cb(struct ceph_osd_request *req) } } +out: mutex_unlock(&lreq->lock); linger_put(lreq); } @@ -3048,6 +3037,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req) struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); + if (req != lreq->reg_req) { + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", + __func__, lreq, lreq->linger_id, req, lreq->reg_req); + goto out; + } + dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__, lreq, lreq->linger_id, req->r_result, lreq->last_error); if (req->r_result < 0) { @@ -3057,46 +3052,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req) } } +out: mutex_unlock(&lreq->lock); linger_put(lreq); } static void send_linger(struct ceph_osd_linger_request *lreq) { - struct ceph_osd_request *req = lreq->reg_req; - struct ceph_osd_req_op *op = &req->r_ops[0]; + struct ceph_osd_client *osdc = lreq->osdc; + struct ceph_osd_request *req; + int ret; - verify_osdc_wrlocked(req->r_osdc); + verify_osdc_wrlocked(osdc); + mutex_lock(&lreq->lock); dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - if (req->r_osd) - cancel_linger_request(req); + if (lreq->reg_req) { + if (lreq->reg_req->r_osd) + cancel_linger_request(lreq->reg_req); + ceph_osdc_put_request(lreq->reg_req); + } + + req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); + BUG_ON(!req); - request_reinit(req); target_copy(&req->r_t, &lreq->t); req->r_mtime = lreq->mtime; - mutex_lock(&lreq->lock); if (lreq->is_watch && lreq->committed) { - WARN_ON(op->op != CEPH_OSD_OP_WATCH || - op->watch.cookie != lreq->linger_id); - op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT; - op->watch.gen = ++lreq->register_gen; + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT, + lreq->linger_id, ++lreq->register_gen); dout("lreq %p reconnect register_gen %u\n", lreq, - op->watch.gen); + req->r_ops[0].watch.gen); req->r_callback = linger_reconnect_cb; } else { - if (!lreq->is_watch) + if (lreq->is_watch) { + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH, + lreq->linger_id, 0); + } else { lreq->notify_id = 0; - else - WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH); + + refcount_inc(&lreq->request_pl->refcnt); + osd_req_op_notify_init(req, 0, lreq->linger_id, + lreq->request_pl); + ceph_osd_data_pages_init( + osd_req_op_data(req, 0, notify, response_data), + lreq->notify_id_pages, PAGE_SIZE, 0, false, false); + } dout("lreq %p register\n", lreq); req->r_callback = linger_commit_cb; } - mutex_unlock(&lreq->lock); + + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + BUG_ON(ret); req->r_priv = linger_get(lreq); req->r_linger = true; + lreq->reg_req = req; + mutex_unlock(&lreq->lock); submit_request(req, true); } @@ -3106,6 +3119,12 @@ static void linger_ping_cb(struct ceph_osd_request *req) struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); + if (req != lreq->ping_req) { + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", + __func__, lreq, lreq->linger_id, req, lreq->ping_req); + goto out; + } + dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n", __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent, lreq->last_error); @@ -3121,6 +3140,7 @@ static void linger_ping_cb(struct ceph_osd_request *req) lreq->register_gen, req->r_ops[0].watch.gen); } +out: mutex_unlock(&lreq->lock); linger_put(lreq); } @@ -3128,8 +3148,8 @@ static void linger_ping_cb(struct ceph_osd_request *req) static void send_linger_ping(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; - struct ceph_osd_request *req = lreq->ping_req; - struct ceph_osd_req_op *op = &req->r_ops[0]; + struct ceph_osd_request *req; + int ret; if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { dout("%s PAUSERD\n", __func__); @@ -3141,19 +3161,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq) __func__, lreq, lreq->linger_id, lreq->ping_sent, lreq->register_gen); - if (req->r_osd) - cancel_linger_request(req); + if (lreq->ping_req) { + if (lreq->ping_req->r_osd) + cancel_linger_request(lreq->ping_req); + ceph_osdc_put_request(lreq->ping_req); + } - request_reinit(req); - target_copy(&req->r_t, &lreq->t); + req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); + BUG_ON(!req); - WARN_ON(op->op != CEPH_OSD_OP_WATCH || - op->watch.cookie != lreq->linger_id || - op->watch.op != CEPH_OSD_WATCH_OP_PING); - op->watch.gen = lreq->register_gen; + target_copy(&req->r_t, &lreq->t); + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id, + lreq->register_gen); req->r_callback = linger_ping_cb; + + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + BUG_ON(ret); + req->r_priv = linger_get(lreq); req->r_linger = true; + lreq->ping_req = req; ceph_osdc_get_request(req); account_request(req); @@ -3169,12 +3196,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq) down_write(&osdc->lock); linger_register(lreq); - if (lreq->is_watch) { - lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id; - lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id; - } else { - lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id; - } calc_target(osdc, &lreq->t, false); osd = lookup_create_osd(osdc, lreq->t.osd, true); @@ -3206,9 +3227,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq) */ static void __linger_cancel(struct ceph_osd_linger_request *lreq) { - if (lreq->is_watch && lreq->ping_req->r_osd) + if (lreq->ping_req && lreq->ping_req->r_osd) cancel_linger_request(lreq->ping_req); - if (lreq->reg_req->r_osd) + if (lreq->reg_req && lreq->reg_req->r_osd) cancel_linger_request(lreq->reg_req); cancel_linger_map_check(lreq); unlink_linger(lreq->osd, lreq); @@ -4657,43 +4678,6 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) } EXPORT_SYMBOL(ceph_osdc_sync); -static struct ceph_osd_request * -alloc_linger_request(struct ceph_osd_linger_request *lreq) -{ - struct ceph_osd_request *req; - - req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO); - if (!req) - return NULL; - - ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); - ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); - return req; -} - -static struct ceph_osd_request * -alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode) -{ - struct ceph_osd_request *req; - - req = alloc_linger_request(lreq); - if (!req) - return NULL; - - /* - * Pass 0 for cookie because we don't know it yet, it will be - * filled in by linger_submit(). - */ - osd_req_op_watch_init(req, 0, 0, watch_opcode); - - if (ceph_osdc_alloc_messages(req, GFP_NOIO)) { - ceph_osdc_put_request(req); - return NULL; - } - - return req; -} - /* * Returns a handle, caller owns a ref. */ @@ -4723,18 +4707,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, lreq->t.flags = CEPH_OSD_FLAG_WRITE; ktime_get_real_ts64(&lreq->mtime); - lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH); - if (!lreq->reg_req) { - ret = -ENOMEM; - goto err_put_lreq; - } - - lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING); - if (!lreq->ping_req) { - ret = -ENOMEM; - goto err_put_lreq; - } - linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (ret) { @@ -4772,8 +4744,8 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE; ktime_get_real_ts64(&req->r_mtime); - osd_req_op_watch_init(req, 0, lreq->linger_id, - CEPH_OSD_WATCH_OP_UNWATCH); + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH, + lreq->linger_id, 0); ret = ceph_osdc_alloc_messages(req, GFP_NOIO); if (ret) @@ -4859,35 +4831,6 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, } EXPORT_SYMBOL(ceph_osdc_notify_ack); -static int osd_req_op_notify_init(struct ceph_osd_request *req, int which, - u64 cookie, u32 prot_ver, u32 timeout, - void *payload, u32 payload_len) -{ - struct ceph_osd_req_op *op; - struct ceph_pagelist *pl; - int ret; - - op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); - op->notify.cookie = cookie; - - pl = ceph_pagelist_alloc(GFP_NOIO); - if (!pl) - return -ENOMEM; - - ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */ - ret |= ceph_pagelist_encode_32(pl, timeout); - ret |= ceph_pagelist_encode_32(pl, payload_len); - ret |= ceph_pagelist_append(pl, payload, payload_len); - if (ret) { - ceph_pagelist_release(pl); - return -ENOMEM; - } - - ceph_osd_data_pagelist_init(&op->notify.request_data, pl); - op->indata_len = pl->length; - return 0; -} - /* * @timeout: in seconds * @@ -4906,7 +4849,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, size_t *preply_len) { struct ceph_osd_linger_request *lreq; - struct page **pages; int ret; WARN_ON(!timeout); @@ -4919,41 +4861,35 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, if (!lreq) return -ENOMEM; - lreq->preply_pages = preply_pages; - lreq->preply_len = preply_len; - - ceph_oid_copy(&lreq->t.base_oid, oid); - ceph_oloc_copy(&lreq->t.base_oloc, oloc); - lreq->t.flags = CEPH_OSD_FLAG_READ; - - lreq->reg_req = alloc_linger_request(lreq); - if (!lreq->reg_req) { + lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO); + if (!lreq->request_pl) { ret = -ENOMEM; goto out_put_lreq; } - /* - * Pass 0 for cookie because we don't know it yet, it will be - * filled in by linger_submit(). - */ - ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout, - payload, payload_len); - if (ret) + ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */ + ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout); + ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len); + ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len); + if (ret) { + ret = -ENOMEM; goto out_put_lreq; + } /* for notify_id */ - pages = ceph_alloc_page_vector(1, GFP_NOIO); - if (IS_ERR(pages)) { - ret = PTR_ERR(pages); + lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO); + if (IS_ERR(lreq->notify_id_pages)) { + ret = PTR_ERR(lreq->notify_id_pages); + lreq->notify_id_pages = NULL; goto out_put_lreq; } - ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify, - response_data), - pages, PAGE_SIZE, 0, false, true); - ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO); - if (ret) - goto out_put_lreq; + lreq->preply_pages = preply_pages; + lreq->preply_len = preply_len; + + ceph_oid_copy(&lreq->t.base_oid, oid); + ceph_oloc_copy(&lreq->t.base_oloc, oloc); + lreq->t.flags = CEPH_OSD_FLAG_READ; linger_submit(lreq); ret = linger_reg_commit_wait(lreq); From d0bb883c6355bcb2cc149fb4d5c3b28ccd327a5e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 May 2022 17:17:54 +0200 Subject: [PATCH 456/491] libceph: fix misleading ceph_osdc_cancel_request() comment cancel_request() never guaranteed that after its return the OSD client would be completely done with the OSD request. The callback (if specified) can still be invoked and a ref can still be held. Signed-off-by: Ilya Dryomov Reviewed-by: Xiubo Li --- net/ceph/osd_client.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 4b88f2a4a6e24..9d82bb42e958f 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -4591,8 +4591,13 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, EXPORT_SYMBOL(ceph_osdc_start_request); /* - * Unregister a registered request. The request is not completed: - * ->r_result isn't set and __complete_request() isn't called. + * Unregister request. If @req was registered, it isn't completed: + * r_result isn't set and __complete_request() isn't invoked. + * + * If @req wasn't registered, this call may have raced with + * handle_reply(), in which case r_result would already be set and + * __complete_request() would be getting invoked, possibly even + * concurrently with this call. */ void ceph_osdc_cancel_request(struct ceph_osd_request *req) { From 7123d39dc24dcd21ff23d75f46f926b15269b9da Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 17 May 2022 12:00:37 -0500 Subject: [PATCH 457/491] drm/amd: Don't reset dGPUs if the system is going to s2idle An A+A configuration on ASUS ROG Strix G513QY proves that the ASIC reset for handling aborted suspend can't work with s2idle. This functionality was introduced in commit daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)"). A few other commits have gone on top of the ASIC reset, but this still doesn't work on the A+A configuration in s2idle. Avoid doing the reset on dGPUs specifically when using s2idle. Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2008 Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 14 ++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cdf0818088b3d..7606e3b6361ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1342,9 +1342,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); +bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } +static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 0e12315fa0cb8..98ac53ee6bb55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1045,6 +1045,20 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) (pm_suspend_target_state == PM_SUSPEND_MEM); } +/** + * amdgpu_acpi_should_gpu_reset + * + * @adev: amdgpu_device_pointer + * + * returns true if should reset GPU, false if not + */ +bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) +{ + if (adev->flags & AMD_IS_APU) + return false; + return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; +} + /** * amdgpu_acpi_is_s0ix_active * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7fd0277b28051..46ef57b07c151 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2336,7 +2336,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - if (!adev->in_s0ix) + if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); return 0; From e4920d42ce0e9c8aafb7f64b6d9d4ae02161e51e Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 18 May 2022 14:28:32 -0700 Subject: [PATCH 458/491] Input: ili210x - fix reset timing According to Ilitek "231x & ILI251x Programming Guide" Version: 2.30 "2.1. Power Sequence", "T4 Chip Reset and discharge time" is minimum 10ms and "T2 Chip initial time" is maximum 150ms. Adjust the reset timings such that T4 is 12ms and T2 is 160ms to fit those figures. This prevents sporadic touch controller start up failures when some systems with at least ILI251x controller boot, without this patch the systems sometimes fail to communicate with the touch controller. Fixes: 201f3c803544c ("Input: ili210x - add reset GPIO support") Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20220518204901.93534-1-marex@denx.de Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ili210x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 2bd407d86bae5..3a48262fb3d35 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -951,9 +951,9 @@ static int ili210x_i2c_probe(struct i2c_client *client, if (error) return error; - usleep_range(50, 100); + usleep_range(12000, 15000); gpiod_set_value_cansleep(reset_gpio, 0); - msleep(100); + msleep(160); } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); From b26ff9137183309c18cdfe931e1cafcf3c1a980d Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 18 May 2022 14:30:09 -0700 Subject: [PATCH 459/491] Input: ili210x - use one common reset implementation Rename ili251x_hardware_reset() to ili210x_hardware_reset(), change its parameter from struct device * to struct gpio_desc *, and use it as one single consistent reset implementation all over the driver. Also increase the minimum reset duration to 12ms, to make sure the reset is really within the spec. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20220518210423.106555-1-marex@denx.de Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ili210x.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 3a48262fb3d35..e9bd36adbe47d 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -756,15 +756,12 @@ static int ili251x_firmware_reset(struct i2c_client *client) return ili251x_firmware_busy(client); } -static void ili251x_hardware_reset(struct device *dev) +static void ili210x_hardware_reset(struct gpio_desc *reset_gpio) { - struct i2c_client *client = to_i2c_client(dev); - struct ili210x *priv = i2c_get_clientdata(client); - /* Reset the controller */ - gpiod_set_value_cansleep(priv->reset_gpio, 1); - usleep_range(10000, 15000); - gpiod_set_value_cansleep(priv->reset_gpio, 0); + gpiod_set_value_cansleep(reset_gpio, 1); + usleep_range(12000, 15000); + gpiod_set_value_cansleep(reset_gpio, 0); msleep(300); } @@ -773,6 +770,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev, const char *buf, size_t count) { struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); const char *fwname = ILI251X_FW_FILENAME; const struct firmware *fw; u16 ac_end, df_end; @@ -803,7 +801,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev, dev_dbg(dev, "Firmware update started, firmware=%s\n", fwname); - ili251x_hardware_reset(dev); + ili210x_hardware_reset(priv->reset_gpio); error = ili251x_firmware_reset(client); if (error) @@ -858,7 +856,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev, error = count; exit: - ili251x_hardware_reset(dev); + ili210x_hardware_reset(priv->reset_gpio); dev_dbg(dev, "Firmware update ended, error=%i\n", error); enable_irq(client->irq); kfree(fwbuf); @@ -951,9 +949,7 @@ static int ili210x_i2c_probe(struct i2c_client *client, if (error) return error; - usleep_range(12000, 15000); - gpiod_set_value_cansleep(reset_gpio, 0); - msleep(160); + ili210x_hardware_reset(reset_gpio); } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); From 090f9dd092c6c2e9e4b9d0472b8d8628e4b851cb Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Wed, 18 May 2022 17:16:30 +0200 Subject: [PATCH 460/491] selftests: forwarding: fix missing backslash Fix missing backslash, introduced in f62c5acc800ee. Causes all tests to not be installed. Fixes: f62c5acc800e ("selftests/net/forwarding: add missing tests to Makefile") Signed-off-by: Joachim Wiberg Acked-by: Hangbin Liu Link: https://lore.kernel.org/r/20220518151630.2747773-1-troglobit@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index c87e674b61b1d..e811090f77483 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -86,7 +86,7 @@ TEST_PROGS = bridge_igmp.sh \ vxlan_bridge_1d_port_8472.sh \ vxlan_bridge_1d.sh \ vxlan_bridge_1q_ipv6.sh \ - vxlan_bridge_1q_port_8472_ipv6.sh + vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ vxlan_symmetric_ipv6.sh \ From c2239294188fdbc3c58784a08d90edacd177bf9a Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Tue, 17 May 2022 14:46:00 -0700 Subject: [PATCH 461/491] ptp: ocp: change sysfs attr group handling In the detach path, the driver calls sysfs_remove_group() for the groups it believes has been registered. However, if the group was never previously registered, then this causes a splat. Instead, compute the groups that should be registered in advance, and then call sysfs_create_groups(), which registers them all at once. Update the error handling appropriately. Fixes: c205d53c4923 ("ptp: ocp: Add firmware capability bits for feature gating") Reported-by: Zheyu Ma Signed-off-by: Jonathan Lemon Link: https://lore.kernel.org/r/20220517214600.10606-1-jonathan.lemon@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 57 +++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 36c0e188216b1..860672d6a03c0 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -300,7 +300,7 @@ struct ptp_ocp { struct platform_device *spi_flash; struct clk_hw *i2c_clk; struct timer_list watchdog; - const struct ocp_attr_group *attr_tbl; + const struct attribute_group **attr_group; const struct ptp_ocp_eeprom_map *eeprom_map; struct dentry *debug_root; time64_t gnss_lost; @@ -1836,6 +1836,42 @@ ptp_ocp_signal_init(struct ptp_ocp *bp) bp->signal_out[i]->mem); } +static void +ptp_ocp_attr_group_del(struct ptp_ocp *bp) +{ + sysfs_remove_groups(&bp->dev.kobj, bp->attr_group); + kfree(bp->attr_group); +} + +static int +ptp_ocp_attr_group_add(struct ptp_ocp *bp, + const struct ocp_attr_group *attr_tbl) +{ + int count, i; + int err; + + count = 0; + for (i = 0; attr_tbl[i].cap; i++) + if (attr_tbl[i].cap & bp->fw_cap) + count++; + + bp->attr_group = kcalloc(count + 1, sizeof(struct attribute_group *), + GFP_KERNEL); + if (!bp->attr_group) + return -ENOMEM; + + count = 0; + for (i = 0; attr_tbl[i].cap; i++) + if (attr_tbl[i].cap & bp->fw_cap) + bp->attr_group[count++] = attr_tbl[i].group; + + err = sysfs_create_groups(&bp->dev.kobj, bp->attr_group); + if (err) + bp->attr_group[0] = NULL; + + return err; +} + static void ptp_ocp_sma_init(struct ptp_ocp *bp) { @@ -1905,7 +1941,6 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->flash_start = 1024 * 4096; bp->eeprom_map = fb_eeprom_map; bp->fw_version = ioread32(&bp->image->version); - bp->attr_tbl = fb_timecard_groups; bp->fw_cap = OCP_CAP_BASIC; ver = bp->fw_version & 0xffff; @@ -1919,6 +1954,10 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r) ptp_ocp_sma_init(bp); ptp_ocp_signal_init(bp); + err = ptp_ocp_attr_group_add(bp, fb_timecard_groups); + if (err) + return err; + err = ptp_ocp_fb_set_pins(bp); if (err) return err; @@ -3389,7 +3428,6 @@ ptp_ocp_complete(struct ptp_ocp *bp) { struct pps_device *pps; char buf[32]; - int i, err; if (bp->gnss_port != -1) { sprintf(buf, "ttyS%d", bp->gnss_port); @@ -3414,14 +3452,6 @@ ptp_ocp_complete(struct ptp_ocp *bp) if (pps) ptp_ocp_symlink(bp, pps->dev, "pps"); - for (i = 0; bp->attr_tbl[i].cap; i++) { - if (!(bp->attr_tbl[i].cap & bp->fw_cap)) - continue; - err = sysfs_create_group(&bp->dev.kobj, bp->attr_tbl[i].group); - if (err) - return err; - } - ptp_ocp_debugfs_add_device(bp); return 0; @@ -3493,15 +3523,11 @@ static void ptp_ocp_detach_sysfs(struct ptp_ocp *bp) { struct device *dev = &bp->dev; - int i; sysfs_remove_link(&dev->kobj, "ttyGNSS"); sysfs_remove_link(&dev->kobj, "ttyMAC"); sysfs_remove_link(&dev->kobj, "ptp"); sysfs_remove_link(&dev->kobj, "pps"); - if (bp->attr_tbl) - for (i = 0; bp->attr_tbl[i].cap; i++) - sysfs_remove_group(&dev->kobj, bp->attr_tbl[i].group); } static void @@ -3511,6 +3537,7 @@ ptp_ocp_detach(struct ptp_ocp *bp) ptp_ocp_debugfs_remove_device(bp); ptp_ocp_detach_sysfs(bp); + ptp_ocp_attr_group_del(bp); if (timer_pending(&bp->watchdog)) del_timer_sync(&bp->watchdog); if (bp->ts0) From fbb3abdf2223cd0dfc07de85fe5a43ba7f435bdf Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 18 May 2022 02:58:40 +0200 Subject: [PATCH 462/491] net: bridge: Clear offload_fwd_mark when passing frame up bridge interface. It is possible to stack bridges on top of each other. Consider the following which makes use of an Ethernet switch: br1 / \ / \ / \ br0.11 wlan0 | br0 / | \ p1 p2 p3 br0 is offloaded to the switch. Above br0 is a vlan interface, for vlan 11. This vlan interface is then a slave of br1. br1 also has a wireless interface as a slave. This setup trunks wireless lan traffic over the copper network inside a VLAN. A frame received on p1 which is passed up to the bridge has the skb->offload_fwd_mark flag set to true, indicating that the switch has dealt with forwarding the frame out ports p2 and p3 as needed. This flag instructs the software bridge it does not need to pass the frame back down again. However, the flag is not getting reset when the frame is passed upwards. As a result br1 sees the flag, wrongly interprets it, and fails to forward the frame to wlan0. When passing a frame upwards, clear the flag. This is the Rx equivalent of br_switchdev_frame_unmark() in br_dev_xmit(). Fixes: f1c2eddf4cb6 ("bridge: switchdev: Use an helper to clear forward mark") Signed-off-by: Andrew Lunn Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20220518005840.771575-1-andrew@lunn.ch Signed-off-by: Paolo Abeni --- net/bridge/br_input.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 196417859c4a9..68b3e850bcb9d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -39,6 +39,13 @@ static int br_pass_frame_up(struct sk_buff *skb) dev_sw_netstats_rx_add(brdev, skb->len); vg = br_vlan_group_rcu(br); + + /* Reset the offload_fwd_mark because there could be a stacked + * bridge above, and it should not think this bridge it doing + * that bridge's work forwarding out its ports. + */ + br_switchdev_frame_unmark(skb); + /* Bridge is just like any other port. Make sure the * packet is allowed except in promisc mode when someone * may be running packet capture. From 7b1d6924f27ba24b9e47abb9bd53d0bbc430a835 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 11 May 2022 13:52:19 +0200 Subject: [PATCH 463/491] drm/i915: Use i915_gem_object_ggtt_pin_ww for reloc_iomap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When removing short term pins, I've changed the the batch buffer pinning for relocation to use __i915_vma_pin, because i915_gem_object_ggtt_pin_ww was destroying the old vma. This caused regressions, because the functions are not identical. Fix the regressions by calling i915_gem_object_ggtt_pin_ww() again on ggtt-only platforms, but only if the batch can be pinned without being moved. Fixes: b5cfe6f7a6e1 ("drm/i915: Remove short-term pins from execbuf, v6.") Cc: Matthew Auld Reported-by: Mateusz Jończyk Tested-by: Hans de Goede Signed-off-by: Maarten Lankhorst Acked-by: Matthew Auld Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5806 Link: https://patchwork.freedesktop.org/patch/msgid/20220511115219.46507-1-maarten.lankhorst@linux.intel.com (cherry picked from commit 451374eef622fca6f00eeeda89aaccb45a30a149) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index d42f437149c95..6ca8929cf6e12 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1252,14 +1252,12 @@ static void *reloc_iomap(struct i915_vma *batch, * Only attempt to pin the batch buffer to ggtt if the current batch * is not inside ggtt, or the batch buffer is not misplaced. */ - if (!i915_is_ggtt(batch->vm)) { + if (!i915_is_ggtt(batch->vm) || + !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) { vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, PIN_MAPPABLE | PIN_NONBLOCK /* NOWARN */ | PIN_NOEVICT); - } else if (i915_vma_is_map_and_fenceable(batch)) { - __i915_vma_pin(batch); - vma = batch; } if (vma == ERR_PTR(-EDEADLK)) From e949dee3625e1b0ef2e40d9aa09c2995281b12f6 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 17 May 2022 12:10:46 +0200 Subject: [PATCH 464/491] mmc: core: Fix busy polling for MMC_SEND_OP_COND again It turned out that polling period for MMC_SEND_OP_COND, that currently is set to 1ms, still isn't sufficient. In particular a Micron eMMC on a Beaglebone platform, is reported to sometimes fail to initialize. Additional test, shows that extending the period to 4ms is working fine, so let's make that change. Reported-by: Jean Rene Dawin Tested-by: Jean Rene Dawin Fixes: 1760fdb6fe9f (mmc: core: Restore (almost) the busy polling for MMC_SEND_OP_COND") Fixes: 76bfc7ccc2fa ("mmc: core: adjust polling interval for CMD1") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20220517101046.27512-1-ulf.hansson@linaro.org --- drivers/mmc/core/mmc_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index 180d7e9d3400a..81c55bfd6e0c2 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -21,7 +21,7 @@ #define MMC_BKOPS_TIMEOUT_MS (120 * 1000) /* 120s */ #define MMC_SANITIZE_TIMEOUT_MS (240 * 1000) /* 240s */ -#define MMC_OP_COND_PERIOD_US (1 * 1000) /* 1ms */ +#define MMC_OP_COND_PERIOD_US (4 * 1000) /* 4ms */ #define MMC_OP_COND_TIMEOUT_MS 1000 /* 1s */ static const u8 tuning_blk_pattern_4bit[] = { From b17410182b6f98191fbf7f42d3b4a78512769d29 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 Apr 2022 21:38:56 +0200 Subject: [PATCH 465/491] riscv: dts: sifive: fu540-c000: align dma node name with dtschema Fixes dtbs_check warnings like: dma@3000000: $nodename:0: 'dma@3000000' does not match '^dma-controller(@.*)?$' Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220407193856.18223-1-krzysztof.kozlowski@linaro.org Fixes: c5ab54e9945b ("riscv: dts: add support for PDMA device of HiFive Unleashed Rev A00") Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index aad45d7f498fd..5c638fd5b35c7 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -167,7 +167,7 @@ clocks = <&prci FU540_PRCI_CLK_TLCLK>; status = "disabled"; }; - dma: dma@3000000 { + dma: dma-controller@3000000 { compatible = "sifive,fu540-c000-pdma"; reg = <0x0 0x3000000 0x0 0x8000>; interrupt-parent = <&plic0>; From c932edeaf6d6e6cc25088e61c3fcf585c30497c0 Mon Sep 17 00:00:00 2001 From: Conor Paxton Date: Tue, 17 May 2022 11:40:58 +0100 Subject: [PATCH 466/491] riscv: dts: microchip: fix gpio1 reg property typo Fix reg address typo in the gpio1 stanza. Signed-off-by: Conor Paxton Reviewed-by: Krzysztof Kozlowski Reviewed-by: Conor Dooley Fixes: 528a5b1f2556 ("riscv: dts: microchip: add new peripherals to icicle kit device tree") Link: https://lore.kernel.org/r/20220517104058.2004734-1-conor.paxton@microchip.com Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index c5c9d1360de07..f80aeb2cfd59c 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -366,7 +366,7 @@ gpio1: gpio@20121000 { compatible = "microchip,mpfs-gpio"; - reg = <000 0x20121000 0x0 0x1000>; + reg = <0x0 0x20121000 0x0 0x1000>; interrupt-parent = <&plic>; interrupt-controller; #interrupt-cells = <1>; From d5d92b64408443e113b9742f8f1c35278910dd4d Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Thu, 19 May 2022 15:05:29 +0900 Subject: [PATCH 467/491] scsi: ufs: core: Fix referencing invalid rsp field Fix referencing sense data when it is invalid. When the length of the data segment is 0, there is no valid information in the rsp field, so ufshpb_rsp_upiu() is returned without additional operation. Link: https://lore.kernel.org/r/252651381.41652940482659.JavaMail.epsvc@epcpadp4 Fixes: 4b5f49079c52 ("scsi: ufs: ufshpb: L2P map management for HPB read") Acked-by: Avri Altman Signed-off-by: Daejun Park Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshpb.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index 81099b68bbfbd..588c0329b80ca 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -1254,6 +1254,13 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) struct utp_hpb_rsp *rsp_field = &lrbp->ucd_rsp_ptr->hr; int data_seg_len; + data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2) + & MASK_RSP_UPIU_DATA_SEG_LEN; + + /* If data segment length is zero, rsp_field is not valid */ + if (!data_seg_len) + return; + if (unlikely(lrbp->lun != rsp_field->lun)) { struct scsi_device *sdev; bool found = false; @@ -1288,18 +1295,6 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) return; } - data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2) - & MASK_RSP_UPIU_DATA_SEG_LEN; - - /* To flush remained rsp_list, we queue the map_work task */ - if (!data_seg_len) { - if (!ufshpb_is_general_lun(hpb->lun)) - return; - - ufshpb_kick_map_work(hpb); - return; - } - BUILD_BUG_ON(sizeof(struct utp_hpb_rsp) != UTP_HPB_RSP_SIZE); if (!ufshpb_is_hpb_rsp_valid(hba, lrbp, rsp_field)) From 4ac19ead0dfbabd8e0bfc731f507cfb0b95d6c99 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Tue, 17 May 2022 05:12:36 +0000 Subject: [PATCH 468/491] kvm: x86/pmu: Fix the compare function used by the pmu event filter When returning from the compare function the u64 is truncated to an int. This results in a loss of the high nybble[1] in the event select and its sign if that nybble is in use. Switch from using a result that can end up being truncated to a result that can only be: 1, 0, -1. [1] bits 35:32 in the event select register and bits 11:8 in the event select. Fixes: 7ff775aca48ad ("KVM: x86/pmu: Use binary search to check filtered events") Signed-off-by: Aaron Lewis Reviewed-by: Sean Christopherson Message-Id: <20220517051238.2566934-1-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/pmu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index eca39f56c2315..0604bc29f0b8c 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -171,9 +171,12 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) return true; } -static int cmp_u64(const void *a, const void *b) +static int cmp_u64(const void *pa, const void *pb) { - return *(__u64 *)a - *(__u64 *)b; + u64 a = *(u64 *)pa; + u64 b = *(u64 *)pb; + + return (a > b) - (a < b); } void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) From 04baa2233d55e85e0f0f5dfe0401ecb027da9a0e Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Tue, 17 May 2022 05:12:37 +0000 Subject: [PATCH 469/491] selftests: kvm/x86: Add the helper function create_pmu_event_filter Add a helper function that creates a pmu event filter given an event list. Currently, a pmu event filter can only be created with the same hard coded event list. Add a way to create one given a different event list. Also, rename make_pmu_event_filter to alloc_pmu_event_filter to clarify it's purpose given the introduction of create_pmu_event_filter. No functional changes intended. Signed-off-by: Aaron Lewis Message-Id: <20220517051238.2566934-2-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- .../kvm/x86_64/pmu_event_filter_test.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 0d06ffa95d9d4..30c1a5804210c 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -208,7 +208,7 @@ static bool sanity_check_pmu(struct kvm_vm *vm) return success; } -static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents) +static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents) { struct kvm_pmu_event_filter *f; int size = sizeof(*f) + nevents * sizeof(f->events[0]); @@ -220,19 +220,29 @@ static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents) return f; } -static struct kvm_pmu_event_filter *event_filter(uint32_t action) + +static struct kvm_pmu_event_filter * +create_pmu_event_filter(const uint64_t event_list[], + int nevents, uint32_t action) { struct kvm_pmu_event_filter *f; int i; - f = make_pmu_event_filter(ARRAY_SIZE(event_list)); + f = alloc_pmu_event_filter(nevents); f->action = action; - for (i = 0; i < ARRAY_SIZE(event_list); i++) + for (i = 0; i < nevents; i++) f->events[i] = event_list[i]; return f; } +static struct kvm_pmu_event_filter *event_filter(uint32_t action) +{ + return create_pmu_event_filter(event_list, + ARRAY_SIZE(event_list), + action); +} + /* * Remove the first occurrence of 'event' (if any) from the filter's * event list. From c41ef29cc1d4fa4ac5f1bb8e6ab57bf6f02cf878 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Tue, 17 May 2022 05:12:38 +0000 Subject: [PATCH 470/491] selftests: kvm/x86: Verify the pmu event filter matches the correct event Add a test to demonstrate that when the guest programs an event select it is matched correctly in the pmu event filter and not inadvertently filtered. This could happen on AMD if the high nybble[1] in the event select gets truncated away only leaving the bottom byte[2] left for matching. This is a contrived example used for the convenience of demonstrating this issue, however, this can be applied to event selects 0x28A (OC Mode Switch) and 0x08A (L1 BTB Correction), where 0x08A could end up being denied when the event select was only set up to deny 0x28A. [1] bits 35:32 in the event select register and bits 11:8 in the event select. [2] bits 7:0 in the event select register and bits 7:0 in the event select. Signed-off-by: Aaron Lewis Message-Id: <20220517051238.2566934-3-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- .../kvm/x86_64/pmu_event_filter_test.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 30c1a5804210c..93d77574b255d 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -281,6 +281,22 @@ static uint64_t test_with_filter(struct kvm_vm *vm, return run_vm_to_sync(vm); } +static void test_amd_deny_list(struct kvm_vm *vm) +{ + uint64_t event = EVENT(0x1C2, 0); + struct kvm_pmu_event_filter *f; + uint64_t count; + + f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY); + count = test_with_filter(vm, f); + + free(f); + if (count != NUM_BRANCHES) + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", + __func__, count, NUM_BRANCHES); + TEST_ASSERT(count, "Allowed PMU event is not counting"); +} + static void test_member_deny_list(struct kvm_vm *vm) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); @@ -463,6 +479,9 @@ int main(int argc, char *argv[]) exit(KSFT_SKIP); } + if (use_amd_pmu()) + test_amd_deny_list(vm); + test_without_filter(vm); test_member_deny_list(vm); test_member_allow_list(vm); From 0ae065a5d265bc5ada13e350015458e0c5e5c351 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 May 2022 21:25:12 -0300 Subject: [PATCH 471/491] perf build: Fix check for btf__load_from_kernel_by_id() in libbpf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avi Kivity reported a problem where the __weak btf__load_from_kernel_by_id() in tools/perf/util/bpf-event.c was being used and it called btf__get_from_id() in tools/lib/bpf/btf.c that in turn called back to btf__load_from_kernel_by_id(), resulting in an endless loop. Fix this by adding a feature test to check if btf__load_from_kernel_by_id() is available when building perf with LIBBPF_DYNAMIC=1, and if not then provide the fallback to the old btf__get_from_id(), that doesn't call back to btf__load_from_kernel_by_id() since at that time it didn't exist at all. Tested on Fedora 35 where we have libbpf-devel 0.4.0 with LIBBPF_DYNAMIC where we don't have btf__load_from_kernel_by_id() and thus its feature test fail, not defining HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID: $ cat /tmp/build/perf-urgent/feature/test-libbpf-btf__load_from_kernel_by_id.make.output test-libbpf-btf__load_from_kernel_by_id.c: In function ‘main’: test-libbpf-btf__load_from_kernel_by_id.c:6:16: error: implicit declaration of function ‘btf__load_from_kernel_by_id’ [-Werror=implicit-function-declaration] 6 | return btf__load_from_kernel_by_id(20151128, NULL); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors $ $ nm /tmp/build/perf-urgent/perf | grep btf__load_from_kernel_by_id 00000000005ba180 T btf__load_from_kernel_by_id $ $ objdump --disassemble=btf__load_from_kernel_by_id -S /tmp/build/perf-urgent/perf /tmp/build/perf-urgent/perf: file format elf64-x86-64 00000000005ba180 : #include "record.h" #include "util/synthetic-events.h" #ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID struct btf *btf__load_from_kernel_by_id(__u32 id) { 5ba180: 55 push %rbp 5ba181: 48 89 e5 mov %rsp,%rbp 5ba184: 48 83 ec 10 sub $0x10,%rsp 5ba188: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax 5ba18f: 00 00 5ba191: 48 89 45 f8 mov %rax,-0x8(%rbp) 5ba195: 31 c0 xor %eax,%eax struct btf *btf; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" int err = btf__get_from_id(id, &btf); 5ba197: 48 8d 75 f0 lea -0x10(%rbp),%rsi 5ba19b: e8 a0 57 e5 ff call 40f940 5ba1a0: 89 c2 mov %eax,%edx #pragma GCC diagnostic pop return err ? ERR_PTR(err) : btf; 5ba1a2: 48 98 cltq 5ba1a4: 85 d2 test %edx,%edx 5ba1a6: 48 0f 44 45 f0 cmove -0x10(%rbp),%rax } Fixes: 218e7b775d368f38 ("perf bpf: Provide a weak btf__load_from_kernel_by_id() for older libbpf versions") Reported-by: Avi Kivity Link: https://lore.kernel.org/linux-perf-users/f0add43b-3de5-20c5-22c4-70aff4af959f@scylladb.com Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/linux-perf-users/YobjjFOblY4Xvwo7@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ .../feature/test-libbpf-btf__load_from_kernel_by_id.c | 7 +++++++ tools/perf/Makefile.config | 7 +++++++ tools/perf/util/bpf-event.c | 4 +++- 5 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index ae61f464043a1..c6a48d0ef9ff0 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -98,6 +98,7 @@ FEATURE_TESTS_EXTRA := \ llvm-version \ clang \ libbpf \ + libbpf-btf__load_from_kernel_by_id \ libpfm4 \ libdebuginfod \ clang-bpf-co-re diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index de66e1cc07348..cb4a2a4fa2e48 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -57,6 +57,7 @@ FILES= \ test-lzma.bin \ test-bpf.bin \ test-libbpf.bin \ + test-libbpf-btf__load_from_kernel_by_id.bin \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ @@ -287,6 +288,9 @@ $(OUTPUT)test-bpf.bin: $(OUTPUT)test-libbpf.bin: $(BUILD) -lbpf +$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin: + $(BUILD) -lbpf + $(OUTPUT)test-sdt.bin: $(BUILD) diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c new file mode 100644 index 0000000000000..f7c084428735a --- /dev/null +++ b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + return btf__load_from_kernel_by_id(20151128, NULL); +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f3bf9297bcc03..1bd64e7404b9f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -553,9 +553,16 @@ ifndef NO_LIBELF ifeq ($(feature-libbpf), 1) EXTLIBS += -lbpf $(call detected,CONFIG_LIBBPF_DYNAMIC) + + $(call feature_check,libbpf-btf__load_from_kernel_by_id) + ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1) + CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID + endif else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif + else + CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID endif endif diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 94624733af7e2..8271ab764eb56 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -22,7 +22,8 @@ #include "record.h" #include "util/synthetic-events.h" -struct btf * __weak btf__load_from_kernel_by_id(__u32 id) +#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID +struct btf *btf__load_from_kernel_by_id(__u32 id) { struct btf *btf; #pragma GCC diagnostic push @@ -32,6 +33,7 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) return err ? ERR_PTR(err) : btf; } +#endif int __weak bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name __maybe_unused, From e332b55fe79cca72451fe0b797219bd9fe6b9434 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 19 May 2022 01:49:13 -0700 Subject: [PATCH 472/491] KVM: eventfd: Fix false positive RCU usage warning The splat below can be seen when running kvm-unit-test: ============================= WARNING: suspicious RCU usage 5.18.0-rc7 #5 Tainted: G IOE ----------------------------- /home/kernel/linux/arch/x86/kvm/../../../virt/kvm/eventfd.c:80 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 4 locks held by qemu-system-x86/35124: #0: ffff9725391d80b8 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0x77/0x710 [kvm] #1: ffffbd25cfb2a0b8 (&kvm->srcu){....}-{0:0}, at: vcpu_enter_guest+0xdeb/0x1900 [kvm] #2: ffffbd25cfb2b920 (&kvm->irq_srcu){....}-{0:0}, at: kvm_hv_notify_acked_sint+0x79/0x1e0 [kvm] #3: ffffbd25cfb2b920 (&kvm->irq_srcu){....}-{0:0}, at: irqfd_resampler_ack+0x5/0x110 [kvm] stack backtrace: CPU: 2 PID: 35124 Comm: qemu-system-x86 Tainted: G IOE 5.18.0-rc7 #5 Call Trace: dump_stack_lvl+0x6c/0x9b irqfd_resampler_ack+0xfd/0x110 [kvm] kvm_notify_acked_gsi+0x32/0x90 [kvm] kvm_hv_notify_acked_sint+0xc5/0x1e0 [kvm] kvm_hv_set_msr_common+0xec1/0x1160 [kvm] kvm_set_msr_common+0x7c3/0xf60 [kvm] vmx_set_msr+0x394/0x1240 [kvm_intel] kvm_set_msr_ignored_check+0x86/0x200 [kvm] kvm_emulate_wrmsr+0x4f/0x1f0 [kvm] vmx_handle_exit+0x6fb/0x7e0 [kvm_intel] vcpu_enter_guest+0xe5a/0x1900 [kvm] kvm_arch_vcpu_ioctl_run+0x16e/0xac0 [kvm] kvm_vcpu_ioctl+0x279/0x710 [kvm] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae resampler-list is protected by irq_srcu (see kvm_irqfd_assign), so fix the false positive by using list_for_each_entry_srcu(). Signed-off-by: Wanpeng Li Message-Id: <1652950153-12489-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 59b1dd4a549ee..2a3ed401ce465 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -77,7 +77,8 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) idx = srcu_read_lock(&kvm->irq_srcu); - list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) + list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link, + srcu_read_lock_held(&kvm->irq_srcu)) eventfd_signal(irqfd->resamplefd, 1); srcu_read_unlock(&kvm->irq_srcu, idx); From 92d579ea3279aa87392b862df5810f0a7e30fcc6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 18 May 2022 20:20:01 -0700 Subject: [PATCH 473/491] perf stat: Fix and validate CPU map inputs in synthetic PERF_RECORD_STAT events Stat events can come from disk and so need a degree of validation. They contain a CPU which needs looking up via CPU map to access a counter. Add the CPU to index translation, alongside validity checking. Discussion thread: https://lore.kernel.org/linux-perf-users/CAP-5=fWQR=sCuiSMktvUtcbOLidEpUJLCybVF6=BRvORcDOq+g@mail.gmail.com/ Fixes: 7ac0089d138f80dc ("perf evsel: Pass cpu not cpu map index to synthesize") Reported-by: Michael Petlan Suggested-by: Michael Petlan Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Dave Marchevsky Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: John Fastabend Cc: Kan Liang Cc: KP Singh Cc: Lv Ruyi Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Michael Petlan Cc: Namhyung Kim Cc: netdev@vger.kernel.org Cc: Peter Zijlstra Cc: Quentin Monnet Cc: Song Liu Cc: Stephane Eranian Cc: Xing Zhengjun Cc: Yonghong Song Link: http://lore.kernel.org/lkml/20220519032005.1273691-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 817a2de264b46..c1af37e11f989 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -472,9 +472,10 @@ int perf_stat_process_counter(struct perf_stat_config *config, int perf_event__process_stat_event(struct perf_session *session, union perf_event *event) { - struct perf_counts_values count; + struct perf_counts_values count, *ptr; struct perf_record_stat *st = &event->stat; struct evsel *counter; + int cpu_map_idx; count.val = st->val; count.ena = st->ena; @@ -485,8 +486,18 @@ int perf_event__process_stat_event(struct perf_session *session, pr_err("Failed to resolve counter for stat event.\n"); return -EINVAL; } - - *perf_counts(counter->counts, st->cpu, st->thread) = count; + cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu}); + if (cpu_map_idx == -1) { + pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter)); + return -EINVAL; + } + ptr = perf_counts(counter->counts, cpu_map_idx, st->thread); + if (ptr == NULL) { + pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n", + st->cpu, st->thread, evsel__name(counter)); + return -EINVAL; + } + *ptr = count; counter->supported = true; return 0; } From 9bf3ac466faa83d51a8fe9212131701e58fdef74 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 11 May 2022 10:15:04 +0800 Subject: [PATCH 474/491] gpio: gpio-vf610: do not touch other bits when set the target bit For gpio controller contain register PDDR, when set one target bit, current logic will clear all other bits, this is wrong. Use operator '|=' to fix it. Fixes: 659d8a62311f ("gpio: vf610: add imx7ulp support") Reviewed-by: Peng Fan Signed-off-by: Haibo Chen Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-vf610.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 20780c35da1b4..23cddb265a0dc 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -125,9 +125,13 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, { struct vf610_gpio_port *port = gpiochip_get_data(chip); unsigned long mask = BIT(gpio); + u32 val; - if (port->sdata && port->sdata->have_paddr) - vf610_gpio_writel(mask, port->gpio_base + GPIO_PDDR); + if (port->sdata && port->sdata->have_paddr) { + val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR); + val |= mask; + vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); + } vf610_gpio_set(chip, gpio, value); From 3ecb10175b1f776f076553c24e2689e42953fef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 11 May 2022 09:58:56 +0200 Subject: [PATCH 475/491] gpio: mvebu/pwm: Refuse requests with inverted polarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver doesn't take struct pwm_state::polarity into account when configuring the hardware, so refuse requests for inverted polarity. Fixes: 757642f9a584 ("gpio: mvebu: Add limited PWM support") Signed-off-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mvebu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index a2c8dd329b31b..2db19cd640a43 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -707,6 +707,9 @@ static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, unsigned long flags; unsigned int on, off; + if (state->polarity != PWM_POLARITY_NORMAL) + return -EINVAL; + val = (unsigned long long) mvpwm->clk_rate * state->duty_cycle; do_div(val, NSEC_PER_SEC); if (val > UINT_MAX + 1ULL) From c87661f855c3f2023e40ddc364002601ee234367 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 May 2022 00:38:42 +0000 Subject: [PATCH 476/491] KVM: Free new dirty bitmap if creating a new memslot fails Fix a goof in kvm_prepare_memory_region() where KVM fails to free the new memslot's dirty bitmap during a CREATE action if kvm_arch_prepare_memory_region() fails. The logic is supposed to detect if the bitmap was allocated and thus needs to be freed, versus if the bitmap was inherited from the old memslot and thus needs to be kept. If there is no old memslot, then obviously the bitmap can't have been inherited The bug was exposed by commit 86931ff7207b ("KVM: x86/mmu: Do not create SPTEs for GFNs that exceed host.MAXPHYADDR"), which made it trivally easy for syzkaller to trigger failure during kvm_arch_prepare_memory_region(), but the bug can be hit other ways too, e.g. due to -ENOMEM when allocating x86's memslot metadata. The backtrace from kmemleak: __vmalloc_node_range+0xb40/0xbd0 mm/vmalloc.c:3195 __vmalloc_node mm/vmalloc.c:3232 [inline] __vmalloc+0x49/0x50 mm/vmalloc.c:3246 __vmalloc_array mm/util.c:671 [inline] __vcalloc+0x49/0x70 mm/util.c:694 kvm_alloc_dirty_bitmap virt/kvm/kvm_main.c:1319 kvm_prepare_memory_region virt/kvm/kvm_main.c:1551 kvm_set_memslot+0x1bd/0x690 virt/kvm/kvm_main.c:1782 __kvm_set_memory_region+0x689/0x750 virt/kvm/kvm_main.c:1949 kvm_set_memory_region virt/kvm/kvm_main.c:1962 kvm_vm_ioctl_set_memory_region virt/kvm/kvm_main.c:1974 kvm_vm_ioctl+0x377/0x13a0 virt/kvm/kvm_main.c:4528 vfs_ioctl fs/ioctl.c:51 __do_sys_ioctl fs/ioctl.c:870 __se_sys_ioctl fs/ioctl.c:856 __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae And the relevant sequence of KVM events: ioctl(3, KVM_CREATE_VM, 0) = 4 ioctl(4, KVM_SET_USER_MEMORY_REGION, {slot=0, flags=KVM_MEM_LOG_DIRTY_PAGES, guest_phys_addr=0x10000000000000, memory_size=4096, userspace_addr=0x20fe8000} ) = -1 EINVAL (Invalid argument) Fixes: 244893fa2859 ("KVM: Dynamically allocate "new" memslots from the get-go") Cc: stable@vger.kernel.org Reported-by: syzbot+8606b8a9cc97a63f1c87@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Message-Id: <20220518003842.1341782-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6d971fb1b08d8..5ab12214e18dd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1560,7 +1560,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm, r = kvm_arch_prepare_memory_region(kvm, old, new, change); /* Free the bitmap on failure if it was allocated above. */ - if (r && new && new->dirty_bitmap && old && !old->dirty_bitmap) + if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap)) kvm_destroy_dirty_bitmap(new); return r; From ea8c66fe8d8f4f93df941e52120a3512d7bf5128 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 19 May 2022 10:15:04 -0700 Subject: [PATCH 477/491] KVM: x86: hyper-v: fix type of valid_bank_mask In kvm_hv_flush_tlb(), valid_bank_mask is declared as unsigned long, but is used as u64, which is wrong for i386, and has been spotted by LKP after applying "KVM: x86: hyper-v: replace bitmap_weight() with hweight64()" https://lore.kernel.org/lkml/20220510154750.212913-12-yury.norov@gmail.com/ But it's wrong even without that patch because now bitmap_weight() dereferences a word after valid_bank_mask on i386. >> include/asm-generic/bitops/const_hweight.h:21:76: warning: right shift count >= width of type +[-Wshift-count-overflow] 21 | #define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) | ^~ include/asm-generic/bitops/const_hweight.h:10:16: note: in definition of macro '__const_hweight8' 10 | ((!!((w) & (1ULL << 0))) + \ | ^ include/asm-generic/bitops/const_hweight.h:20:31: note: in expansion of macro '__const_hweight16' 20 | #define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) | ^~~~~~~~~~~~~~~~~ include/asm-generic/bitops/const_hweight.h:21:54: note: in expansion of macro '__const_hweight32' 21 | #define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) | ^~~~~~~~~~~~~~~~~ include/asm-generic/bitops/const_hweight.h:29:49: note: in expansion of macro '__const_hweight64' 29 | #define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w)) | ^~~~~~~~~~~~~~~~~ arch/x86/kvm/hyperv.c:1983:36: note: in expansion of macro 'hweight64' 1983 | if (hc->var_cnt != hweight64(valid_bank_mask)) | ^~~~~~~~~ CC: Borislav Petkov CC: Dave Hansen CC: H. Peter Anvin CC: Ingo Molnar CC: Jim Mattson CC: Joerg Roedel CC: Paolo Bonzini CC: Sean Christopherson CC: Thomas Gleixner CC: Vitaly Kuznetsov CC: Wanpeng Li CC: kvm@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: x86@kernel.org Reported-by: kernel test robot Signed-off-by: Yury Norov Message-Id: <20220519171504.1238724-1-yury.norov@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 46f9dfb604694..a0702b6be3e89 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1914,7 +1914,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) struct hv_send_ipi_ex send_ipi_ex; struct hv_send_ipi send_ipi; DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); - unsigned long valid_bank_mask; + u64 valid_bank_mask; u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; u32 vector; bool all_cpus; @@ -1956,7 +1956,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; - if (hc->var_cnt != bitmap_weight(&valid_bank_mask, 64)) + if (hc->var_cnt != bitmap_weight((unsigned long *)&valid_bank_mask, 64)) return HV_STATUS_INVALID_HYPERCALL_INPUT; if (all_cpus) From 9f46c187e2e680ecd9de7983e4d081c3391acc76 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 20 May 2022 13:48:11 -0400 Subject: [PATCH 478/491] KVM: x86/mmu: fix NULL pointer dereference on guest INVPCID With shadow paging enabled, the INVPCID instruction results in a call to kvm_mmu_invpcid_gva. If INVPCID is executed with CR0.PG=0, the invlpg callback is not set and the result is a NULL pointer dereference. Fix it trivially by checking for mmu->invlpg before every call. There are other possibilities: - check for CR0.PG, because KVM (like all Intel processors after P5) flushes guest TLB on CR0.PG changes so that INVPCID/INVLPG are a nop with paging disabled - check for EFER.LMA, because KVM syncs and flushes when switching MMU contexts outside of 64-bit mode All of these are tricky, go for the simple solution. This is CVE-2022-1789. Reported-by: Yongkang Jia Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 56ebc4fb7f917..45e1573f8f1d3 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5470,14 +5470,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) uint i; if (pcid == kvm_get_active_pcid(vcpu)) { - mmu->invlpg(vcpu, gva, mmu->root.hpa); + if (mmu->invlpg) + mmu->invlpg(vcpu, gva, mmu->root.hpa); tlb_flush = true; } for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (VALID_PAGE(mmu->prev_roots[i].hpa) && pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) { - mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + if (mmu->invlpg) + mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); tlb_flush = true; } } From 3ac6487e584a1eb54071dbe1212e05b884136704 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 May 2022 20:38:06 +0200 Subject: [PATCH 479/491] perf: Fix sys_perf_event_open() race against self Norbert reported that it's possible to race sys_perf_event_open() such that the looser ends up in another context from the group leader, triggering many WARNs. The move_group case checks for races against itself, but the !move_group case doesn't, seemingly relying on the previous group_leader->ctx == ctx check. However, that check is racy due to not holding any locks at that time. Therefore, re-check the result after acquiring locks and bailing if they no longer match. Additionally, clarify the not_move_group case from the move_group-vs-move_group race. Fixes: f63a8daa5812 ("perf: Fix event->ctx locking") Reported-by: Norbert Slusarek Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- kernel/events/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7858bafffa9d6..7f1e4c5897e75 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -12217,6 +12217,9 @@ SYSCALL_DEFINE5(perf_event_open, * Do not allow to attach to a group in a different task * or CPU context. If we're moving SW events, we'll fix * this up later, so allow that. + * + * Racy, not holding group_leader->ctx->mutex, see comment with + * perf_event_ctx_lock(). */ if (!move_group && group_leader->ctx != ctx) goto err_context; @@ -12282,6 +12285,7 @@ SYSCALL_DEFINE5(perf_event_open, } else { perf_event_ctx_unlock(group_leader, gctx); move_group = 0; + goto not_move_group; } } @@ -12298,7 +12302,17 @@ SYSCALL_DEFINE5(perf_event_open, } } else { mutex_lock(&ctx->mutex); + + /* + * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx, + * see the group_leader && !move_group test earlier. + */ + if (group_leader && group_leader->ctx != ctx) { + err = -EINVAL; + goto err_locked; + } } +not_move_group: if (ctx->task == TASK_TOMBSTONE) { err = -ESRCH; From a2537c98a8a3b57002e54a262d180b9490bc7190 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 14 May 2022 10:31:47 +0800 Subject: [PATCH 480/491] i2c: mt7621: fix missing clk_disable_unprepare() on error in mtk_i2c_probe() Fix the missing clk_disable_unprepare() before return from mtk_i2c_probe() in the error handling case. Fixes: d04913ec5f89 ("i2c: mt7621: Add MediaTek MT7621/7628/7688 I2C driver") Signed-off-by: Yang Yingliang Reviewed-by: Stefan Roese Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt7621.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt7621.c b/drivers/i2c/busses/i2c-mt7621.c index 45fe4a7fe0c03..901f0fb04fee4 100644 --- a/drivers/i2c/busses/i2c-mt7621.c +++ b/drivers/i2c/busses/i2c-mt7621.c @@ -304,7 +304,8 @@ static int mtk_i2c_probe(struct platform_device *pdev) if (i2c->bus_freq == 0) { dev_warn(i2c->dev, "clock-frequency 0 not supported\n"); - return -EINVAL; + ret = -EINVAL; + goto err_disable_clk; } adap = &i2c->adap; @@ -322,10 +323,15 @@ static int mtk_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(adap); if (ret < 0) - return ret; + goto err_disable_clk; dev_info(&pdev->dev, "clock %u kHz\n", i2c->bus_freq / 1000); + return 0; + +err_disable_clk: + clk_disable_unprepare(i2c->clk); + return ret; } From 17a0f3acdc6ec8b89ad40f6e22165a4beee25663 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 27 Apr 2022 13:19:10 +0300 Subject: [PATCH 481/491] i2c: ismt: Provide a DMA buffer for Interrupt Cause Logging Before sending a MSI the hardware writes information pertinent to the interrupt cause to a memory location pointed by SMTICL register. This memory holds three double words where the least significant bit tells whether the interrupt cause of master/target/error is valid. The driver does not use this but we need to set it up because otherwise it will perform DMA write to the default address (0) and this will cause an IOMMU fault such as below: DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Write] Request device [00:12.0] PASID ffffffff fault addr 0 [fault reason 05] PTE Write access is not set To prevent this from happening, provide a proper DMA buffer for this that then gets mapped by the IOMMU accordingly. Signed-off-by: Mika Westerberg Reviewed-by: From: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ismt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index c0364314877ec..c16157ee8c520 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -82,6 +82,7 @@ #define ISMT_DESC_ENTRIES 2 /* number of descriptor entries */ #define ISMT_MAX_RETRIES 3 /* number of SMBus retries to attempt */ +#define ISMT_LOG_ENTRIES 3 /* number of interrupt cause log entries */ /* Hardware Descriptor Constants - Control Field */ #define ISMT_DESC_CWRL 0x01 /* Command/Write Length */ @@ -175,6 +176,8 @@ struct ismt_priv { u8 head; /* ring buffer head pointer */ struct completion cmp; /* interrupt completion */ u8 buffer[I2C_SMBUS_BLOCK_MAX + 16]; /* temp R/W data buffer */ + dma_addr_t log_dma; + u32 *log; }; static const struct pci_device_id ismt_ids[] = { @@ -411,6 +414,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, memset(desc, 0, sizeof(struct ismt_desc)); desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, read_write); + /* Always clear the log entries */ + memset(priv->log, 0, ISMT_LOG_ENTRIES * sizeof(u32)); + /* Initialize common control bits */ if (likely(pci_dev_msi_enabled(priv->pci_dev))) desc->control = ISMT_DESC_INT | ISMT_DESC_FAIR; @@ -708,6 +714,8 @@ static void ismt_hw_init(struct ismt_priv *priv) /* initialize the Master Descriptor Base Address (MDBA) */ writeq(priv->io_rng_dma, priv->smba + ISMT_MSTR_MDBA); + writeq(priv->log_dma, priv->smba + ISMT_GR_SMTICL); + /* initialize the Master Control Register (MCTRL) */ writel(ISMT_MCTRL_MEIE, priv->smba + ISMT_MSTR_MCTRL); @@ -795,6 +803,12 @@ static int ismt_dev_init(struct ismt_priv *priv) priv->head = 0; init_completion(&priv->cmp); + priv->log = dmam_alloc_coherent(&priv->pci_dev->dev, + ISMT_LOG_ENTRIES * sizeof(u32), + &priv->log_dma, GFP_KERNEL); + if (!priv->log) + return -ENOMEM; + return 0; } From 03a35bc856ddc09f2cc1f4701adecfbf3b464cb3 Mon Sep 17 00:00:00 2001 From: Piyush Malgujar Date: Wed, 11 May 2022 06:36:59 -0700 Subject: [PATCH 482/491] drivers: i2c: thunderx: Allow driver to work with ACPI defined TWSI controllers Due to i2c->adap.dev.fwnode not being set, ACPI_COMPANION() wasn't properly found for TWSI controllers. Signed-off-by: Szymon Balcerak Signed-off-by: Piyush Malgujar Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-thunderx-pcidrv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-thunderx-pcidrv.c b/drivers/i2c/busses/i2c-thunderx-pcidrv.c index 12c90aa0900e6..a77cd86fe75ed 100644 --- a/drivers/i2c/busses/i2c-thunderx-pcidrv.c +++ b/drivers/i2c/busses/i2c-thunderx-pcidrv.c @@ -213,6 +213,7 @@ static int thunder_i2c_probe_pci(struct pci_dev *pdev, i2c->adap.bus_recovery_info = &octeon_i2c_recovery_info; i2c->adap.dev.parent = dev; i2c->adap.dev.of_node = pdev->dev.of_node; + i2c->adap.dev.fwnode = dev->fwnode; snprintf(i2c->adap.name, sizeof(i2c->adap.name), "Cavium ThunderX i2c adapter at %s", dev_name(dev)); i2c_set_adapdata(&i2c->adap, i2c); From 451ed8058c69a3fee29fa9e2967a4e22a221fe75 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 20 May 2022 15:42:36 +0530 Subject: [PATCH 483/491] perf test: Fix "all PMU test" to skip hv_24x7/hv_gpci tests on powerpc "perf all PMU test" picks the input events from "perf list --raw-dump pmu" list and runs "perf stat -e" for each of the event in the list. In case of powerpc, the PowerVM environment supports events from hv_24x7 and hv_gpci PMU which is of example format like below: - hv_24x7/CPM_ADJUNCT_INST,domain=?,core=?/ - hv_gpci/event,partition_id=?/ The value for "?" needs to be filled in depending on system and respective event. CPM_ADJUNCT_INST needs have core value and domain value. hv_gpci event needs partition_id. Similarly, there are other events for hv_24x7 and hv_gpci having "?" in event format. Hence skip these events on powerpc platform since values like partition_id, domain is specific to system and event. Fixes: 3d5ac9effcc640d5 ("perf test: Workload test of all PMUs") Signed-off-by: Athira Jajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: linuxppc-dev@lists.ozlabs.org Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Link: https://lore.kernel.org/r/20220520101236.17249-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat_all_pmu.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh index b30dba455f36c..9c9ef33e0b3c6 100755 --- a/tools/perf/tests/shell/stat_all_pmu.sh +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -5,6 +5,16 @@ set -e for p in $(perf list --raw-dump pmu); do + # In powerpc, skip the events for hv_24x7 and hv_gpci. + # These events needs input values to be filled in for + # core, chip, partition id based on system. + # Example: hv_24x7/CPM_ADJUNCT_INST,domain=?,core=?/ + # hv_gpci/event,partition_id=?/ + # Hence skip these events for ppc. + if echo "$p" |grep -Eq 'hv_24x7|hv_gpci' ; then + echo "Skipping: Event '$p' in powerpc" + continue + fi echo "Testing $p" result=$(perf stat -e "$p" true 2>&1) if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "" ; then From 01b28e4a58152e8906eeb5f1b55a0c404c48c7c8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 18 May 2022 07:51:25 -0700 Subject: [PATCH 484/491] perf regs x86: Fix arch__intr_reg_mask() for the hybrid platform The X86 specific arch__intr_reg_mask() is to check whether the kernel and hardware can collect XMM registers. But it doesn't work on some hybrid platform. Without the patch on ADL-N: $ perf record -I? available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15 The config of the test event doesn't contain the PMU information. The kernel may fail to initialize it on the correct hybrid PMU and return the wrong non-supported information. Add the PMU information into the config for the hybrid platform. The same register set is supported among different hybrid PMUs. Checking the first available one is good enough. With the patch on ADL-N: $ perf record -I? available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 XMM9 XMM10 XMM11 XMM12 XMM13 XMM14 XMM15 Fixes: 6466ec14aaf44ff1 ("perf regs x86: Add X86 specific arch__intr_reg_mask()") Reported-by: Ammy Yi Signed-off-by: Kan Liang Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20220518145125.1494156-1-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/perf_regs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 207c56805c551..0ed177991ad05 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -9,6 +9,8 @@ #include "../../../util/perf_regs.h" #include "../../../util/debug.h" #include "../../../util/event.h" +#include "../../../util/pmu.h" +#include "../../../util/pmu-hybrid.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), @@ -284,12 +286,22 @@ uint64_t arch__intr_reg_mask(void) .disabled = 1, .exclude_kernel = 1, }; + struct perf_pmu *pmu; int fd; /* * In an unnamed union, init it here to build on older gcc versions */ attr.sample_period = 1; + if (perf_pmu__has_hybrid()) { + /* + * The same register set is supported among different hybrid PMUs. + * Only check the first available one. + */ + pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list); + attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + } + event_attr_init(&attr); fd = sys_perf_event_open(&attr, 0, -1, -1, 0); From caaaa55477e23cec9761f7c981b144dd5ecc0bf3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 17 May 2022 13:41:44 -0700 Subject: [PATCH 485/491] perf test: Avoid shell test description infinite loop for_each_shell_test() is already strict in expecting tests to be files and executable. It is sometimes possible when it iterates over all files that it finds one that is executable and lacks a newline character. When this happens the loop never terminates as it doesn't check for EOF. Add the EOF check to make this loop at least bounded by the file size. If the description is returned as NULL then also skip the test. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Marco Elver Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sohaib Mohamed Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220517204144.645913-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index fac3717d9ba1b..d336cda94a115 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -279,6 +279,7 @@ static const char *shell_test__description(char *description, size_t size, { FILE *fp; char filename[PATH_MAX]; + int ch; path__join(filename, sizeof(filename), path, name); fp = fopen(filename, "r"); @@ -286,7 +287,9 @@ static const char *shell_test__description(char *description, size_t size, return NULL; /* Skip shebang */ - while (fgetc(fp) != '\n'); + do { + ch = fgetc(fp); + } while (ch != EOF && ch != '\n'); description = fgets(description, size, fp); fclose(fp); @@ -417,7 +420,8 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width, .priv = &st, }; - if (!perf_test__matches(test_suite.desc, curr, argc, argv)) + if (test_suite.desc == NULL || + !perf_test__matches(test_suite.desc, curr, argc, argv)) continue; st.file = ent->d_name; From f8ac1c478424a9a14669b8cef7389b1e14e5229d Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 20 May 2022 10:11:58 +0200 Subject: [PATCH 486/491] perf bench numa: Address compiler error on s390 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compilation on s390 results in this error: # make DEBUG=y bench/numa.o ... bench/numa.c: In function ‘__bench_numa’: bench/numa.c:1749:81: error: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size between 10 and 20 [-Werror=format-truncation=] 1749 | snprintf(tname, sizeof(tname), "process%d:thread%d", p, t); ^~ ... bench/numa.c:1749:64: note: directive argument in the range [-2147483647, 2147483646] ... # The maximum length of the %d replacement is 11 characters because of the negative sign. Therefore extend the array by two more characters. Output after: # make DEBUG=y bench/numa.o > /dev/null 2>&1; ll bench/numa.o -rw-r--r-- 1 root root 418320 May 19 09:11 bench/numa.o # Fixes: 3aff8ba0a4c9c919 ("perf bench numa: Avoid possible truncation when using snprintf()") Suggested-by: Namhyung Kim Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20220520081158.2990006-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index d5289fa58a4f1..20eed1e53f809 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1740,7 +1740,7 @@ static int __bench_numa(const char *name) "GB/sec,", "total-speed", "GB/sec total speed"); if (g->p.show_details >= 2) { - char tname[14 + 2 * 10 + 1]; + char tname[14 + 2 * 11 + 1]; struct thread_data *td; for (p = 0; p < g->p.nr_proc; p++) { for (t = 0; t < g->p.nr_threads; t++) { From cfd7092c31aed7288725cb922bc10d9d52eac302 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 11 May 2022 17:19:59 +0530 Subject: [PATCH 487/491] perf test session topology: Fix test to skip the test in guest environment The session topology test fails in powerpc pSeries platform. Test logs: <<>> Session topology : FAILED! <<>> This testcases tests cpu topology by checking the core_id and socket_id stored in perf_env from perf session. The data from perf session is compared with the cpu topology information from "/sys/devices/system/cpu/cpuX/topology" like core_id, physical_package_id. In case of virtual environment, detail like physical_package_id is restricted to be exposed. Hence physical_package_id is set to -1. The testcase fails on such platforms since socket_id can't be fetched from topology info. Skip the testcase in powerpc if physical_package_id returns -1. Reviewed-by: Kajol Jain Signed-off-by: Athira Rajeev --- Tested-by: Disha Goel Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220511114959.84002-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index ee1e3dcbc0bdb..d23a9e322ff52 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -109,6 +109,17 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) && strncmp(session->header.env.arch, "aarch64", 7)) return TEST_SKIP; + /* + * In powerpc pSeries platform, not all the topology information + * are exposed via sysfs. Due to restriction, detail like + * physical_package_id will be set to -1. Hence skip this + * test if physical_package_id returns -1 for cpu from perf_cpu_map. + */ + if (strncmp(session->header.env.arch, "powerpc", 7)) { + if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1) + return TEST_SKIP; + } + TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); for (i = 0; i < session->header.env.nr_cpus_avail; i++) { From 8994e97be3eb3c3a7b59d6223018ffab8c272e2d Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 11 May 2022 17:24:38 +0530 Subject: [PATCH 488/491] perf test bpf: Skip test if clang is not present Perf BPF filter test fails in environment where "clang" is not installed. Test failure logs: <<>> 42: BPF filter : 42.1: Basic BPF filtering : Skip 42.2: BPF pinning : FAILED! 42.3: BPF prologue generation : FAILED! <<>> Enabling verbose option provided debug logs which says clang/llvm needs to be installed. Snippet of verbose logs: <<>> 42.2: BPF pinning : --- start --- test child forked, pid 61423 ERROR: unable to find clang. Hint: Try to install latest clang/llvm to support BPF. Check your $PATH <> Failed to compile test case: 'Basic BPF llvm compile' Unable to get BPF object, fix kbuild first test child finished with -1 ---- end ---- BPF filter subtest 2: FAILED! <<>> Here subtests, "BPF pinning" and "BPF prologue generation" failed and logs shows clang/llvm is needed. After installing clang, testcase passes. Reason on why subtest failure happens though logs has proper debug information: Main function __test__bpf calls test_llvm__fetch_bpf_obj by passing 4th argument as true ( 4th arguments maps to parameter "force" in test_llvm__fetch_bpf_obj ). But this will cause test_llvm__fetch_bpf_obj to skip the check for clang/llvm. Snippet of code part which checks for clang based on parameter "force" in test_llvm__fetch_bpf_obj: <<>> if (!force && (!llvm_param.user_set_param && <<>> Since force is set to "false", test won't get skipped and fails to compile test case. The BPF code compilation needs clang, So pass the fourth argument as "false" and also skip the test if reason for return is "TEST_SKIP" After the patch: <<>> 42: BPF filter : 42.1: Basic BPF filtering : Skip 42.2: BPF pinning : Skip 42.3: BPF prologue generation : Skip <<>> Fixes: ba1fae431e74bb42 ("perf test: Add 'perf test BPF'") Reviewed-by: Kajol Jain Signed-off-by: Athira Jajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: linuxppc-dev@lists.ozlabs.org Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Wang Nan Link: https://lore.kernel.org/r/20220511115438.84032-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 57b9591f7cbb4..17c023823713d 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -222,11 +222,11 @@ static int __test__bpf(int idx) ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, bpf_testcase_table[idx].prog_id, - true, NULL); + false, NULL); if (ret != TEST_OK || !obj_buf || !obj_buf_sz) { pr_debug("Unable to get BPF object, %s\n", bpf_testcase_table[idx].msg_compile_fail); - if (idx == 0) + if ((idx == 0) || (ret == TEST_SKIP)) return TEST_SKIP; else return TEST_FAIL; @@ -364,9 +364,11 @@ static int test__bpf_prologue_test(struct test_suite *test __maybe_unused, static struct test_case bpf_tests[] = { #ifdef HAVE_LIBBPF_SUPPORT TEST_CASE("Basic BPF filtering", basic_bpf_test), - TEST_CASE("BPF pinning", bpf_pinning), + TEST_CASE_REASON("BPF pinning", bpf_pinning, + "clang isn't installed or environment missing BPF support"), #ifdef HAVE_BPF_PROLOGUE - TEST_CASE("BPF prologue generation", bpf_prologue_test), + TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, + "clang isn't installed or environment missing BPF support"), #else TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"), #endif From 51d0bf99b8342be82369aa63eff343bf5df586dd Mon Sep 17 00:00:00 2001 From: Chengdong Li Date: Tue, 17 May 2022 09:57:26 +0800 Subject: [PATCH 489/491] perf session: Fix Intel LBR callstack entries and nr print message When generating callstack information from branch_stack(Intel LBR), the actual number of callstack entry should be bigger than the number of branch_stack, for example: branch_stack records: B() -> C() A() -> B() converted callstack records should be: C() B() A() though, the number of callstack equals to the number of branch stack plus 1. This patch fixes above issue in branch_stack__printf(). For example, # echo 'scale=2000; 4*a(1)' > cmd # perf record --call-graph lbr bc -l < cmd Before applying this patch, `perf script -D` output: 1220022677386876 0x2a40 [0xd8]: PERF_RECORD_SAMPLE(IP, 0x4002): 17990/17990: 0x40a6d6 period: 894172 addr: 0 ... LBR call chain: nr:8 ..... 0: fffffffffffffe00 ..... 1: 000000000040a410 ..... 2: 000000000040573c ..... 3: 0000000000408650 ..... 4: 00000000004022f2 ..... 5: 00000000004015f5 ..... 6: 00007f5ed6dcb553 ..... 7: 0000000000401698 ... FP chain: nr:2 ..... 0: fffffffffffffe00 ..... 1: 000000000040a6d8 ... branch callstack: nr:6 # which is not consistent with LBR records. ..... 0: 000000000040a410 ..... 1: 0000000000408650 # ditto ..... 2: 00000000004022f2 ..... 3: 00000000004015f5 ..... 4: 00007f5ed6dcb553 ..... 5: 0000000000401698 ... thread: bc:17990 ...... dso: /usr/bin/bc bc 17990 1220022.677386: 894172 cycles: 40a410 [unknown] (/usr/bin/bc) 40573c [unknown] (/usr/bin/bc) 408650 [unknown] (/usr/bin/bc) 4022f2 [unknown] (/usr/bin/bc) 4015f5 [unknown] (/usr/bin/bc) 7f5ed6dcb553 __libc_start_main+0xf3 (/usr/lib64/libc-2.17.so) 401698 [unknown] (/usr/bin/bc) After applied: 1220022677386876 0x2a40 [0xd8]: PERF_RECORD_SAMPLE(IP, 0x4002): 17990/17990: 0x40a6d6 period: 894172 addr: 0 ... LBR call chain: nr:8 ..... 0: fffffffffffffe00 ..... 1: 000000000040a410 ..... 2: 000000000040573c ..... 3: 0000000000408650 ..... 4: 00000000004022f2 ..... 5: 00000000004015f5 ..... 6: 00007f5ed6dcb553 ..... 7: 0000000000401698 ... FP chain: nr:2 ..... 0: fffffffffffffe00 ..... 1: 000000000040a6d8 ... branch callstack: nr:7 ..... 0: 000000000040a410 ..... 1: 000000000040573c ..... 2: 0000000000408650 ..... 3: 00000000004022f2 ..... 4: 00000000004015f5 ..... 5: 00007f5ed6dcb553 ..... 6: 0000000000401698 ... thread: bc:17990 ...... dso: /usr/bin/bc bc 17990 1220022.677386: 894172 cycles: 40a410 [unknown] (/usr/bin/bc) 40573c [unknown] (/usr/bin/bc) 408650 [unknown] (/usr/bin/bc) 4022f2 [unknown] (/usr/bin/bc) 4015f5 [unknown] (/usr/bin/bc) 7f5ed6dcb553 __libc_start_main+0xf3 (/usr/lib64/libc-2.17.so) 401698 [unknown] (/usr/bin/bc) Change from v1: - refined code style according to Jiri's review comments. Signed-off-by: Chengdong Li Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: Andi Kleen Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: likexu@tencent.com Link: https://lore.kernel.org/r/20220517015726.96131-1-chengdongli@tencent.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f9a320694b855..a7f93f5a1ac81 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1151,9 +1151,20 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) struct branch_entry *entries = perf_sample__branch_entries(sample); uint64_t i; - printf("%s: nr:%" PRIu64 "\n", - !callstack ? "... branch stack" : "... branch callstack", - sample->branch_stack->nr); + if (!callstack) { + printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr); + } else { + /* the reason of adding 1 to nr is because after expanding + * branch stack it generates nr + 1 callstack records. e.g., + * B()->C() + * A()->B() + * the final callstack should be: + * C() + * B() + * A() + */ + printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1); + } for (i = 0; i < sample->branch_stack->nr; i++) { struct branch_entry *e = &entries[i]; @@ -1169,8 +1180,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) (unsigned)e->flags.reserved, e->flags.type ? branch_type_name(e->flags.type) : ""); } else { - printf("..... %2"PRIu64": %016" PRIx64 "\n", - i, i > 0 ? e->from : e->to); + if (i == 0) { + printf("..... %2"PRIu64": %016" PRIx64 "\n" + "..... %2"PRIu64": %016" PRIx64 "\n", + i, e->to, i+1, e->from); + } else { + printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from); + } } } } From 2aeb8c86d49967552394d5e723f87454cb53f501 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 21 May 2022 08:18:28 +0100 Subject: [PATCH 490/491] afs: Fix afs_getattr() to refetch file status if callback break occurred If a callback break occurs (change notification), afs_getattr() needs to issue an FS.FetchStatus RPC operation to update the status of the file being examined by the stat-family of system calls. Fix afs_getattr() to do this if AFS_VNODE_CB_PROMISED has been cleared on a vnode by a callback break. Skip this if AT_STATX_DONT_SYNC is set. This can be tested by appending to a file on one AFS client and then using "stat -L" to examine its length on a machine running kafs. This can also be watched through tracing on the kafs machine. The callback break is seen: kworker/1:1-46 [001] ..... 978.910812: afs_cb_call: c=0000005f YFSCB.CallBack kworker/1:1-46 [001] ...1. 978.910829: afs_cb_break: 100058:23b4c:242d2c2 b=2 s=1 break-cb kworker/1:1-46 [001] ..... 978.911062: afs_call_done: c=0000005f ret=0 ab=0 [0000000082994ead] And then the stat command generated no traffic if unpatched, but with this change a call to fetch the status can be observed: stat-4471 [000] ..... 986.744122: afs_make_fs_call: c=000000ab 100058:023b4c:242d2c2 YFS.FetchStatus stat-4471 [000] ..... 986.745578: afs_call_done: c=000000ab ret=0 ab=0 [0000000087fc8c84] Fixes: 08e0e7c82eea ("[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC.") Reported-by: Markus Suvanto Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Tested-by: Markus Suvanto Tested-by: kafs-testing+fedora34_64checkkafs-build-496@auristor.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=216010 Link: https://lore.kernel.org/r/165308359800.162686.14122417881564420962.stgit@warthog.procyon.org.uk/ # v1 Signed-off-by: Linus Torvalds --- fs/afs/inode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 2fe402483ad5b..30b066299d39f 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -740,10 +740,22 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path, { struct inode *inode = d_inode(path->dentry); struct afs_vnode *vnode = AFS_FS_I(inode); - int seq = 0; + struct key *key; + int ret, seq = 0; _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); + if (!(query_flags & AT_STATX_DONT_SYNC) && + !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + ret = afs_validate(vnode, key); + key_put(key); + if (ret < 0) + return ret; + } + do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); generic_fillattr(&init_user_ns, inode, stat); From 4b0986a3613c92f4ec1bdc7f60ec66fea135991f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 May 2022 09:52:31 -1000 Subject: [PATCH 491/491] Linux 5.18 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5033c0577c6da..7d5b0bfe79602 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Superb Owl # *DOCUMENTATION*