From 711e26c00e4c7b7cef0420c76a61e6d818e12687 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Sat, 9 Oct 2021 08:59:00 +0000 Subject: [PATCH 001/361] firmware: tegra: Fix error application of sizeof() to pointer Application of sizeof() to pointer yields the number of bytes of the pointer, but it should use the length of buffer in the code. Fixes: 06c2d9a078ab ("firmware: tegra: Reduce stack usage") Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Thierry Reding --- drivers/firmware/tegra/bpmp-debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/tegra/bpmp-debugfs.c b/drivers/firmware/tegra/bpmp-debugfs.c index 6d66fe03fb6af..fd89899aeeed9 100644 --- a/drivers/firmware/tegra/bpmp-debugfs.c +++ b/drivers/firmware/tegra/bpmp-debugfs.c @@ -77,13 +77,14 @@ static const char *get_filename(struct tegra_bpmp *bpmp, const char *root_path, *filename = NULL; char *root_path_buf; size_t root_len; + size_t root_path_buf_len = 512; - root_path_buf = kzalloc(512, GFP_KERNEL); + root_path_buf = kzalloc(root_path_buf_len, GFP_KERNEL); if (!root_path_buf) goto out; root_path = dentry_path(bpmp->debugfs_mirror, root_path_buf, - sizeof(root_path_buf)); + root_path_buf_len); if (IS_ERR(root_path)) goto out; From 423e85e97aaf69e5198bbec6811e3825c8b5019a Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Tue, 16 Nov 2021 10:46:16 +0200 Subject: [PATCH 002/361] ARM: rockchip: Use memcpy_toio instead of memcpy on smp bring-up This fixes a potential kernel panic on memcpy when FORTIFY_SOURCE is enabled. Because memory is iomem use appropriate function for accessing it. Signed-off-by: Ivan T. Ivanov Link: https://lore.kernel.org/r/20211116084616.24811-1-iivanov@suse.de Signed-off-by: Heiko Stuebner --- arch/arm/mach-rockchip/platsmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index d60856898d97a..5ec58d004b7de 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -189,7 +189,7 @@ static int __init rockchip_smp_prepare_sram(struct device_node *node) rockchip_boot_fn = __pa_symbol(secondary_startup); /* copy the trampoline to sram, that runs during startup of the core */ - memcpy(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz); + memcpy_toio(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz); flush_cache_all(); outer_clean_range(0, trampoline_sz); From 6dd0053683804427529ef3523f7872f473440a19 Mon Sep 17 00:00:00 2001 From: Artem Lapkin Date: Mon, 15 Nov 2021 16:33:21 +0800 Subject: [PATCH 003/361] arm64: dts: rockchip: remove mmc-hs400-enhanced-strobe from rk3399-khadas-edge Remove mmc-hs400-enhanced-strobe from the rk3399-khadas-edge dts to improve compatibility with a wider range of eMMC chips. Before (BJTD4R 29.1 GiB): [ 7.001493] mmc2: CQHCI version 5.10 [ 7.027971] mmc2: SDHCI controller on fe330000.mmc [fe330000.mmc] using ADMA ....... [ 7.207086] mmc2: mmc_select_hs400es failed, error -110 [ 7.207129] mmc2: error -110 whilst initialising MMC card [ 7.308893] mmc2: mmc_select_hs400es failed, error -110 [ 7.308921] mmc2: error -110 whilst initialising MMC card [ 7.427524] mmc2: mmc_select_hs400es failed, error -110 [ 7.427546] mmc2: error -110 whilst initialising MMC card [ 7.590993] mmc2: mmc_select_hs400es failed, error -110 [ 7.591012] mmc2: error -110 whilst initialising MMC card After: [ 6.960785] mmc2: CQHCI version 5.10 [ 6.984672] mmc2: SDHCI controller on fe330000.mmc [fe330000.mmc] using ADMA [ 7.175021] mmc2: Command Queue Engine enabled [ 7.175053] mmc2: new HS400 MMC card at address 0001 [ 7.175808] mmcblk2: mmc2:0001 BJTD4R 29.1 GiB [ 7.176033] mmcblk2boot0: mmc2:0001 BJTD4R 4.00 MiB [ 7.176245] mmcblk2boot1: mmc2:0001 BJTD4R 4.00 MiB [ 7.176495] mmcblk2rpmb: mmc2:0001 BJTD4R 4.00 MiB, chardev (242:0) Fixes: c2aacceedc86 ("arm64: dts: rockchip: Add support for Khadas Edge/Edge-V/Captain boards") Signed-off-by: Artem Lapkin Link: https://lore.kernel.org/r/20211115083321.2627461-1-art@khadas.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index d5c7648c841dc..f1fcc6b5b402c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -705,7 +705,6 @@ &sdhci { bus-width = <8>; mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; non-removable; status = "okay"; }; From 772fb46109f635dd75db20c86b7eaf48efa46cef Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 2 Nov 2021 18:29:07 +0000 Subject: [PATCH 004/361] arm64: dts: rockchip: fix rk3308-roc-cc vcc-sd supply Correct a typo in the vin-supply property. The input supply is always-on, so this mistake doesn't affect whether the supply is actually enabled correctly. Fixes: 4403e1237be3 ("arm64: dts: rockchip: Add devicetree for board roc-rk3308-cc") Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20211102182908.3409670-2-john@metanate.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index 665b2e69455dd..ea6820902ede0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -97,7 +97,7 @@ regulator-max-microvolt = <3300000>; regulator-always-on; regulator-boot-on; - vim-supply = <&vcc_io>; + vin-supply = <&vcc_io>; }; vdd_core: vdd-core { From 2b454a90e2ccdd6e03f88f930036da4df577be76 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 2 Nov 2021 18:29:08 +0000 Subject: [PATCH 005/361] arm64: dts: rockchip: fix rk3399-leez-p710 vcc3v3-lan supply Correct a typo in the vin-supply property. The input supply is always-on, so this mistake doesn't affect whether the supply is actually enabled correctly. Fixes: fc702ed49a86 ("arm64: dts: rockchip: Add dts for Leez RK3399 P710 SBC") Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20211102182908.3409670-3-john@metanate.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts index 7c93f840bc64f..e890166e7fd43 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts @@ -55,7 +55,7 @@ regulator-boot-on; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - vim-supply = <&vcc3v3_sys>; + vin-supply = <&vcc3v3_sys>; }; vcc3v3_sys: vcc3v3-sys { From 8240e87f16d17a9592c9d67857a3dcdbcb98f10d Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Wed, 27 Oct 2021 16:37:25 +0200 Subject: [PATCH 006/361] arm64: dts: rockchip: fix audio-supply for Rock Pi 4 As stated in the schematics [1] and [2] P5 the APIO5 domain is supplied by RK808-D Buck4, which in our case vcc1v8_codec - i.e. a 1.8 V regulator. Currently only white noise comes from the ES8316's output, which - for whatever reason - came up only after the the correct switch from i2s0_8ch_bus to i2s0_2ch_bus for i2s0's pinctrl was done. Fix this by setting the correct regulator for audio-supply. [1] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/rockpi4_v13_sch_20181112.pdf [2] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/rockpi_4c_v12_sch_20200620.pdf Fixes: 1b5715c602fd ("arm64: dts: rockchip: add ROCK Pi 4 DTS support") Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20211027143726.165809-1-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 98136c88fa497..6a434be628193 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -502,7 +502,7 @@ status = "okay"; bt656-supply = <&vcc_3v0>; - audio-supply = <&vcc_3v0>; + audio-supply = <&vcc1v8_codec>; sdmmc-supply = <&vcc_sdio>; gpio1830-supply = <&vcc_3v0>; }; From aef4b9a89a376a9cabe5e744729914e7766c59bb Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Wed, 20 Oct 2021 11:59:23 +0200 Subject: [PATCH 007/361] arm64: dts: rockchip: fix poweroff on helios64 Adding the rockchip,system-power-controller property here will use the rk808 to power off the system. Fixes: 09e006cfb43e ("arm64: dts: rockchip: Add basic support for Kobol's Helios64") Signed-off-by: Florian Klink Tested-by: Dennis Gilmore Link: https://lore.kernel.org/r/20211020095926.735938-2-flokli@flokli.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts index 63c7681843daa..b6ac00f646137 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts @@ -276,6 +276,7 @@ clock-output-names = "xin32k", "rk808-clkout2"; pinctrl-names = "default"; pinctrl-0 = <&pmic_int_l>; + rockchip,system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; vcc3-supply = <&vcc5v0_sys>; From 885633075847f475f26a29249d772cc0da85d8cd Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Mon, 25 Oct 2021 12:16:56 -0600 Subject: [PATCH 008/361] dmaengine: dw-axi-dmac: Fix uninitialized variable in axi_chan_block_xfer_start() Coverity complains of an uninitialized variable: 5. uninit_use_in_call: Using uninitialized value config.dst_per when calling axi_chan_config_write. [show details] 6. uninit_use_in_call: Using uninitialized value config.hs_sel_src when calling axi_chan_config_write. [show details] CID 121164 (#1-3 of 3): Uninitialized scalar variable (UNINIT) 7. uninit_use_in_call: Using uninitialized value config.src_per when calling axi_chan_config_write. [show details] 418 axi_chan_config_write(chan, &config); Fix this by initializing the structure to 0 which should at least be benign in axi_chan_config_write(). Also fix what looks like a cut-n-paste error when initializing config.hs_sel_dst. Fixes: 824351668a413 ("dmaengine: dw-axi-dmac: support DMAX_NUM_CHANNELS > 8") Cc: Eugeniy Paltsev Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Tim Gardner Link: https://lore.kernel.org/r/20211025181656.31658-1-tim.gardner@canonical.com Signed-off-by: Vinod Koul --- drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c index cd0d745eb0714..33baf1591a490 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -373,7 +373,7 @@ static void axi_chan_block_xfer_start(struct axi_dma_chan *chan, struct axi_dma_desc *first) { u32 priority = chan->chip->dw->hdata->priority[chan->id]; - struct axi_dma_chan_config config; + struct axi_dma_chan_config config = {}; u32 irq_mask; u8 lms = 0; /* Select AXI0 master for LLI fetching */ @@ -391,7 +391,7 @@ static void axi_chan_block_xfer_start(struct axi_dma_chan *chan, config.tt_fc = DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC; config.prior = priority; config.hs_sel_dst = DWAXIDMAC_HS_SEL_HW; - config.hs_sel_dst = DWAXIDMAC_HS_SEL_HW; + config.hs_sel_src = DWAXIDMAC_HS_SEL_HW; switch (chan->direction) { case DMA_MEM_TO_DEV: dw_axi_dma_set_byte_halfword(chan, true); From 1ffc6f359f7ab114ad0d2bbe6a85cbd848709ab2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 9 Nov 2021 22:09:56 +0100 Subject: [PATCH 009/361] dmaengine: dw-edma: Fix return value check for dma_set_mask_and_coherent() The commit in the Fixes: tag has changed the logic of the code and now it is likely that the probe will return an early success (0), even if not completely executed. This should lead to a crash or similar issue later on when the code accesses to some never allocated resources. Change the '!err' into a 'err' when checking if 'dma_set_mask_and_coherent()' has failed or not. While at it, simplify the code and remove the "can't success code" related to 32 DMA mask. As stated in [1], 'dma_set_mask_and_coherent(DMA_BIT_MASK(64))' can't fail if 'dev->dma_mask' is non-NULL. And if it is NULL, it would fail for the same reason when tried with DMA_BIT_MASK(32). [1]: https://lkml.org/lkml/2021/6/7/398 Fixes: ecb8c88bd31c ("dmaengine: dw-edma-pcie: switch from 'pci_' to 'dma_' API") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/935fbb40ae930c5fe87482a41dcb73abf2257973.1636492127.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-pcie.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c index 198f6cd8ac1be..cee7aa231d7b1 100644 --- a/drivers/dma/dw-edma/dw-edma-pcie.c +++ b/drivers/dma/dw-edma/dw-edma-pcie.c @@ -187,17 +187,9 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, /* DMA configuration */ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { + if (err) { pci_err(pdev, "DMA mask 64 set failed\n"); return err; - } else { - pci_err(pdev, "DMA mask 64 set failed\n"); - - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - pci_err(pdev, "DMA mask 32 set failed\n"); - return err; - } } /* Data structure allocation */ From fa51b16d05583c7aebbc06330afb50276243d198 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 17 Nov 2021 10:03:51 -0700 Subject: [PATCH 010/361] dmaengine: idxd: fix calling wq quiesce inside spinlock Dan reports that smatch has found idxd_wq_quiesce() is being called inside the idxd->dev_lock. idxd_wq_quiesce() calls wait_for_completion() and therefore it can sleep. Move the call outside of the spinlock as it does not need device lock. Fixes: 5b0c68c473a1 ("dmaengine: idxd: support reporting of halt interrupt") Reported-by: Dan Carpenter Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163716858508.1721911.15051495873516709923.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 17f2f8a31b630..cf2c8bc4f147a 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -137,10 +137,10 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) INIT_WORK(&idxd->work, idxd_device_reinit); queue_work(idxd->wq, &idxd->work); } else { - spin_lock(&idxd->dev_lock); idxd->state = IDXD_DEV_HALTED; idxd_wqs_quiesce(idxd); idxd_wqs_unmap_portal(idxd); + spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", From 2d5446da5acecf9c67db1c9d55ae2c3e5de01f8d Mon Sep 17 00:00:00 2001 From: Guodong Liu Date: Wed, 10 Nov 2021 15:19:00 +0800 Subject: [PATCH 011/361] pinctrl: mediatek: fix global-out-of-bounds issue When eint virtual eint number is greater than gpio number, it maybe produce 'desc[eint_n]' size globle-out-of-bounds issue. Signed-off-by: Guodong Liu Signed-off-by: Zhiyong Tao Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20211110071900.4490-2-zhiyong.tao@mediatek.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 91553b2fc1605..53779822348da 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -285,8 +285,12 @@ static int mtk_xt_get_gpio_n(void *data, unsigned long eint_n, desc = (const struct mtk_pin_desc *)hw->soc->pins; *gpio_chip = &hw->chip; - /* Be greedy to guess first gpio_n is equal to eint_n */ - if (desc[eint_n].eint.eint_n == eint_n) + /* + * Be greedy to guess first gpio_n is equal to eint_n. + * Only eint virtual eint number is greater than gpio number. + */ + if (hw->soc->npins > eint_n && + desc[eint_n].eint.eint_n == eint_n) *gpio_n = eint_n; else *gpio_n = mtk_xt_find_eint_num(hw, eint_n); From 6331b8765cd0634a4e4cdcc1a6f1a74196616b94 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 16 Jun 2021 15:46:44 +0800 Subject: [PATCH 012/361] riscv: dts: unleashed: Add gpio card detect to mmc-spi-slot Per HiFive Unleashed schematics, the card detect signal of the micro SD card is connected to gpio pin #11, which should be reflected in the DT via the property, as described in Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt. [1] https://sifive.cdn.prismic.io/sifive/c52a8e32-05ce-4aaf-95c8-7bf8453f8698_hifive-unleashed-a00-schematics-1.pdf Signed-off-by: Bin Meng Fixes: d573b5558abb ("riscv: dts: add initial board data for the SiFive HiFive Unmatched") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index ba304d4c455c2..ced0d4e479385 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -76,6 +76,7 @@ spi-max-frequency = <20000000>; voltage-ranges = <3300 3300>; disable-wp; + gpios = <&gpio 11 GPIO_ACTIVE_LOW>; }; }; From 298d03c2d7f1b5daacb6d4f4053fd3d677d67087 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 16 Jun 2021 15:46:45 +0800 Subject: [PATCH 013/361] riscv: dts: unmatched: Add gpio card detect to mmc-spi-slot Per HiFive Unmatched schematics, the card detect signal of the micro SD card is connected to gpio pin #15, which should be reflected in the DT via the property, as described in Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt. [1] https://sifive.cdn.prismic.io/sifive/6a06d6c0-6e66-49b5-8e9e-e68ce76f4192_hifive-unmatched-schematics-v3.pdf Signed-off-by: Bin Meng Fixes: d573b5558abb ("riscv: dts: add initial board data for the SiFive HiFive Unmatched") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 4f66919215f6e..3c796d64cf51f 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -2,6 +2,7 @@ /* Copyright (c) 2020 SiFive, Inc */ #include "fu740-c000.dtsi" +#include #include /* Clock frequency (in Hz) of the PCB crystal for rtcclk */ @@ -223,6 +224,7 @@ spi-max-frequency = <20000000>; voltage-ranges = <3300 3300>; disable-wp; + gpios = <&gpio 15 GPIO_ACTIVE_LOW>; }; }; From 1b8d2789dad0005fd5e7d35dab26a8e1203fb6da Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 24 Nov 2021 12:07:39 -0500 Subject: [PATCH 014/361] dm btree remove: fix use after free in rebalance_children() Move dm_tm_unlock() after dm_tm_dec(). Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-btree-remove.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 70532335c7c7e..cb670f16e98e9 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -423,9 +423,9 @@ static int rebalance_children(struct shadow_spine *s, memcpy(n, dm_block_data(child), dm_bm_block_size(dm_tm_get_bm(info->tm))); - dm_tm_unlock(info->tm, child); dm_tm_dec(info->tm, dm_block_location(child)); + dm_tm_unlock(info->tm, child); return 0; } From 8979ead988d20887e563b77b16792594f76aa07a Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 22 Nov 2021 23:24:38 +0100 Subject: [PATCH 015/361] arm64: dts: apple: change ethernet0 device type to ethernet Fixes make dtbs_check errors for t8103-j274.dts due to missing pci properties. Fixes: e1bebf978151 ("arm64: dts: apple: j274: Expose PCI node for the Ethernet MAC address") Reviewed-by: Mark Kettenis Signed-off-by: Janne Grunau Tested-by: Hector Martin Signed-off-by: Hector Martin --- arch/arm64/boot/dts/apple/t8103-j274.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/apple/t8103-j274.dts b/arch/arm64/boot/dts/apple/t8103-j274.dts index 33a80f9501dca..02c36301e9850 100644 --- a/arch/arm64/boot/dts/apple/t8103-j274.dts +++ b/arch/arm64/boot/dts/apple/t8103-j274.dts @@ -60,7 +60,7 @@ &port02 { bus-range = <3 3>; - ethernet0: pci@0,0 { + ethernet0: ethernet@0,0 { reg = <0x30000 0x0 0x0 0x0 0x0>; /* To be filled by the loader */ local-mac-address = [00 10 18 00 00 00]; From 48c06708e63e71b4395e4159797366aa03be10ff Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Thu, 18 Nov 2021 12:58:24 +0100 Subject: [PATCH 016/361] mac80211: fix TCP performance on mesh interface sta is NULL for mesh point (resolved later), so sk pacing parameters were not applied. Signed-off-by: Maxime Bizon Link: https://lore.kernel.org/r/66f51659416ac35d6b11a313bd3ffe8b8a43dd55.camel@freebox.fr Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 278945e3e08ac..51ec5f9bc2e09 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4191,11 +4191,11 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, ieee80211_aggr_check(sdata, sta, skb); + sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); + if (sta) { struct ieee80211_fast_tx *fast_tx; - sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); - fast_tx = rcu_dereference(sta->fast_tx); if (fast_tx && From d5e568c3a4ec2ddd23e7dc5ad5b0c64e4f22981a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 22 Nov 2021 12:47:40 +0100 Subject: [PATCH 017/361] mac80211: track only QoS data frames for admission control For admission control, obviously all of that only works for QoS data frames, otherwise we cannot even access the QoS field in the header. Syzbot reported (see below) an uninitialized value here due to a status of a non-QoS nullfunc packet, which isn't even long enough to contain the QoS header. Fix this to only do anything for QoS data packets. Reported-by: syzbot+614e82b88a1a4973e534@syzkaller.appspotmail.com Fixes: 02219b3abca5 ("mac80211: add WMM admission control support") Link: https://lore.kernel.org/r/20211122124737.dad29e65902a.Ieb04587afacb27c14e0de93ec1bfbefb238cc2a0@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 54ab0e1ef6ca5..37f7d975f3dac 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2452,11 +2452,18 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, u16 tx_time) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u16 tid = ieee80211_get_tid(hdr); - int ac = ieee80211_ac_from_tid(tid); - struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; + u16 tid; + int ac; + struct ieee80211_sta_tx_tspec *tx_tspec; unsigned long now = jiffies; + if (!ieee80211_is_data_qos(hdr->frame_control)) + return; + + tid = ieee80211_get_tid(hdr); + ac = ieee80211_ac_from_tid(tid); + tx_tspec = &ifmgd->tx_tspec[ac]; + if (likely(!tx_tspec->admitted_time)) return; From 18688c80ad8a8dd50523dc9276e929932cac86d4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Nov 2021 21:43:23 +0100 Subject: [PATCH 018/361] mac80211: fix rate control for retransmitted frames Since retransmission clears info->control, rate control needs to be called again, otherwise the driver might crash due to invalid rates. Cc: stable@vger.kernel.org # 5.14+ Reported-by: Aaro Koskinen Reported-by: Robert W Fixes: 03c3911d2d67 ("mac80211: call ieee80211_tx_h_rate_ctrl() when dequeue") Signed-off-by: Felix Fietkau Tested-by: Aaro Koskinen Link: https://lore.kernel.org/r/20211122204323.9787-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 51ec5f9bc2e09..86a54df3aabdd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1822,15 +1822,15 @@ static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); ieee80211_tx_result res = TX_CONTINUE; + if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) + CALL_TXH(ieee80211_tx_h_rate_ctrl); + if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) { __skb_queue_tail(&tx->skbs, tx->skb); tx->skb = NULL; goto txh_done; } - if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) - CALL_TXH(ieee80211_tx_h_rate_ctrl); - CALL_TXH(ieee80211_tx_h_michael_mic_add); CALL_TXH(ieee80211_tx_h_sequence); CALL_TXH(ieee80211_tx_h_fragment); From 73111efacd3c6d9e644acca1d132566932be8af0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 24 Nov 2021 10:40:24 +0100 Subject: [PATCH 019/361] mac80211: fix regression in SSN handling of addba tx Some drivers that do their own sequence number allocation (e.g. ath9k) rely on being able to modify params->ssn on starting tx ampdu sessions. This was broken by a change that modified it to use sta->tid_seq[tid] instead. Cc: stable@vger.kernel.org Fixes: 31d8bb4e07f8 ("mac80211: agg-tx: refactor sending addba") Reported-by: Eneas U de Queiroz Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20211124094024.43222-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 4 ++-- net/mac80211/sta_info.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 430a585875388..c1558dd2d2443 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -480,8 +480,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, - tid_tx->dialog_token, - sta->tid_seq[tid] >> 4, + tid_tx->dialog_token, tid_tx->ssn, buf_size, tid_tx->timeout); WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)); @@ -523,6 +522,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); + tid_tx->ssn = params.ssn; if (ret == IEEE80211_AMPDU_TX_START_DELAY_ADDBA) { return; } else if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ba27967820084..e7443fc4669c8 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -199,6 +199,7 @@ struct tid_ampdu_tx { u8 stop_initiator; bool tx_stop; u16 buf_size; + u16 ssn; u16 failed_bar_ssn; bool bar_pending; From 942bd1070c3a39d1302fc5db73d60c86e3033c81 Mon Sep 17 00:00:00 2001 From: Xing Song Date: Tue, 23 Nov 2021 11:31:23 +0800 Subject: [PATCH 020/361] mac80211: set up the fwd_skb->dev for mesh forwarding Mesh forwarding requires that the fwd_skb->dev is set up for TX handling, otherwise the following warning will be generated, so set it up for the pending frames. [ 72.835674 ] WARNING: CPU: 0 PID: 1193 at __skb_flow_dissect+0x284/0x1298 [ 72.842379 ] Modules linked in: ksmbd pppoe ppp_async l2tp_ppp ... [ 72.962020 ] CPU: 0 PID: 1193 Comm: kworker/u5:1 Tainted: P S 5.4.137 #0 [ 72.969938 ] Hardware name: MT7622_MT7531 RFB (DT) [ 72.974659 ] Workqueue: napi_workq napi_workfn [ 72.979025 ] pstate: 60000005 (nZCv daif -PAN -UAO) [ 72.983822 ] pc : __skb_flow_dissect+0x284/0x1298 [ 72.988444 ] lr : __skb_flow_dissect+0x54/0x1298 [ 72.992977 ] sp : ffffffc010c738c0 [ 72.996293 ] x29: ffffffc010c738c0 x28: 0000000000000000 [ 73.001615 ] x27: 000000000000ffc2 x26: ffffff800c2eb818 [ 73.006937 ] x25: ffffffc010a987c8 x24: 00000000000000ce [ 73.012259 ] x23: ffffffc010c73a28 x22: ffffffc010a99c60 [ 73.017581 ] x21: 000000000000ffc2 x20: ffffff80094da800 [ 73.022903 ] x19: 0000000000000000 x18: 0000000000000014 [ 73.028226 ] x17: 00000000084d16af x16: 00000000d1fc0bab [ 73.033548 ] x15: 00000000715f6034 x14: 000000009dbdd301 [ 73.038870 ] x13: 00000000ea4dcbc3 x12: 0000000000000040 [ 73.044192 ] x11: 000000000eb00ff0 x10: 0000000000000000 [ 73.049513 ] x9 : 000000000eb00073 x8 : 0000000000000088 [ 73.054834 ] x7 : 0000000000000000 x6 : 0000000000000001 [ 73.060155 ] x5 : 0000000000000000 x4 : 0000000000000000 [ 73.065476 ] x3 : ffffffc010a98000 x2 : 0000000000000000 [ 73.070797 ] x1 : 0000000000000000 x0 : 0000000000000000 [ 73.076120 ] Call trace: [ 73.078572 ] __skb_flow_dissect+0x284/0x1298 [ 73.082846 ] __skb_get_hash+0x7c/0x228 [ 73.086629 ] ieee80211_txq_may_transmit+0x7fc/0x17b8 [mac80211] [ 73.092564 ] ieee80211_tx_prepare_skb+0x20c/0x268 [mac80211] [ 73.098238 ] ieee80211_tx_pending+0x144/0x330 [mac80211] [ 73.103560 ] tasklet_action_common.isra.16+0xb4/0x158 [ 73.108618 ] tasklet_action+0x2c/0x38 [ 73.112286 ] __do_softirq+0x168/0x3b0 [ 73.115954 ] do_softirq.part.15+0x88/0x98 [ 73.119969 ] __local_bh_enable_ip+0xb0/0xb8 [ 73.124156 ] napi_workfn+0x58/0x90 [ 73.127565 ] process_one_work+0x20c/0x478 [ 73.131579 ] worker_thread+0x50/0x4f0 [ 73.135249 ] kthread+0x124/0x128 [ 73.138484 ] ret_from_fork+0x10/0x1c Signed-off-by: Xing Song Tested-By: Frank Wunderlich Link: https://lore.kernel.org/r/20211123033123.2684-1-xing.song@mediatek.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9541a4c30aca7..0544563ede522 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2944,6 +2944,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (!fwd_skb) goto out; + fwd_skb->dev = sdata->dev; fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY); info = IEEE80211_SKB_CB(fwd_skb); From 8f9dcc29566626f683843ccac6113a12208315ca Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Sat, 2 Oct 2021 08:53:29 -0600 Subject: [PATCH 021/361] mac80211: fix a memory leak where sta_info is not freed The following is from a system that went OOM due to a memory leak: wlan0: Allocated STA 74:83:c2:64:0b:87 wlan0: Allocated STA 74:83:c2:64:0b:87 wlan0: IBSS finish 74:83:c2:64:0b:87 (---from ieee80211_ibss_add_sta) wlan0: Adding new IBSS station 74:83:c2:64:0b:87 wlan0: moving STA 74:83:c2:64:0b:87 to state 2 wlan0: moving STA 74:83:c2:64:0b:87 to state 3 wlan0: Inserted STA 74:83:c2:64:0b:87 wlan0: IBSS finish 74:83:c2:64:0b:87 (---from ieee80211_ibss_work) wlan0: Adding new IBSS station 74:83:c2:64:0b:87 wlan0: moving STA 74:83:c2:64:0b:87 to state 2 wlan0: moving STA 74:83:c2:64:0b:87 to state 3 . . wlan0: expiring inactive not authorized STA 74:83:c2:64:0b:87 wlan0: moving STA 74:83:c2:64:0b:87 to state 2 wlan0: moving STA 74:83:c2:64:0b:87 to state 1 wlan0: Removed STA 74:83:c2:64:0b:87 wlan0: Destroyed STA 74:83:c2:64:0b:87 The ieee80211_ibss_finish_sta() is called twice on the same STA from 2 different locations. On the second attempt, the allocated STA is not destroyed creating a kernel memory leak. This is happening because sta_info_insert_finish() does not call sta_info_free() the second time when the STA already exists (returns -EEXIST). Note that the caller sta_info_insert_rcu() assumes STA is destroyed upon errors. Same fix is applied to -ENOMEM. Signed-off-by: Ahmed Zaki Link: https://lore.kernel.org/r/20211002145329.3125293-1-anzaki@gmail.com [change the error path label to use the existing code] Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 51b49f0d3ad48..840ad1a860fa2 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -644,13 +644,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { err = -EEXIST; - goto out_err; + goto out_cleanup; } sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL); if (!sinfo) { err = -ENOMEM; - goto out_err; + goto out_cleanup; } local->num_sta++; @@ -706,8 +706,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_drop_sta: local->num_sta--; synchronize_net(); + out_cleanup: cleanup_single_sta(sta); - out_err: mutex_unlock(&local->sta_mtx); kfree(sinfo); rcu_read_lock(); From 1dc2f2b81a6a9895da59f3915760f6c0c3074492 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 25 Nov 2021 18:33:16 -0800 Subject: [PATCH 022/361] hv: utils: add PTP_1588_CLOCK to Kconfig to fix build The hyperv utilities use PTP clock interfaces and should depend a a kconfig symbol such that they will be built as a loadable module or builtin so that linker errors do not happen. Prevents these build errors: ld: drivers/hv/hv_util.o: in function `hv_timesync_deinit': hv_util.c:(.text+0x37d): undefined reference to `ptp_clock_unregister' ld: drivers/hv/hv_util.o: in function `hv_timesync_init': hv_util.c:(.text+0x738): undefined reference to `ptp_clock_register' Fixes: 3716a49a81ba ("hv_utils: implement Hyper-V PTP source") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Arnd Bergmann Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Wei Liu Cc: Dexuan Cui Cc: linux-hyperv@vger.kernel.org Cc: Greg Kroah-Hartman Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20211126023316.25184-1-rdunlap@infradead.org Signed-off-by: Wei Liu --- drivers/hv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index dd12af20e467e..0747a8f1fceec 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -19,6 +19,7 @@ config HYPERV_TIMER config HYPERV_UTILS tristate "Microsoft Hyper-V Utilities driver" depends on HYPERV && CONNECTOR && NLS + depends on PTP_1588_CLOCK_OPTIONAL help Select this option to enable the Hyper-V Utilities. From 3dc709e518b47386e6af937eaec37bb36539edfd Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 26 Nov 2021 12:11:53 +0800 Subject: [PATCH 023/361] powerpc/85xx: Fix oops when CONFIG_FSL_PMC=n When CONFIG_FSL_PMC is set to n, no value is assigned to cpu_up_prepare in the mpc85xx_pm_ops structure. As a result, oops is triggered in smp_85xx_start_cpu(). smp: Bringing up secondary CPUs ... kernel tried to execute user page (0) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch (NULL pointer?) Faulting instruction address: 0x00000000 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [00000000] 0x0 LR [c0021d2c] smp_85xx_kick_cpu+0xe8/0x568 Call Trace: [c1051da8] [c0021cb8] smp_85xx_kick_cpu+0x74/0x568 (unreliable) [c1051de8] [c0011460] __cpu_up+0xc0/0x228 [c1051e18] [c0031bbc] bringup_cpu+0x30/0x224 [c1051e48] [c0031f3c] cpu_up.constprop.0+0x180/0x33c [c1051e88] [c00322e8] bringup_nonboot_cpus+0x88/0xc8 [c1051eb8] [c07e67bc] smp_init+0x30/0x78 [c1051ed8] [c07d9e28] kernel_init_freeable+0x118/0x2a8 [c1051f18] [c00032d8] kernel_init+0x14/0x124 [c1051f38] [c0010278] ret_from_kernel_thread+0x14/0x1c Fixes: c45361abb918 ("powerpc/85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n") Reported-by: Martin Kennedy Signed-off-by: Xiaoming Ni Tested-by: Martin Kennedy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211126041153.16926-1-nixiaoming@huawei.com --- arch/powerpc/platforms/85xx/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 83f4a6389a282..d7081e9af65c7 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -220,7 +220,7 @@ static int smp_85xx_start_cpu(int cpu) local_irq_save(flags); hard_irq_disable(); - if (qoriq_pm_ops) + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare) qoriq_pm_ops->cpu_up_prepare(cpu); /* if cpu is not spinning, reset it */ @@ -292,7 +292,7 @@ static int smp_85xx_kick_cpu(int nr) booting_thread_hwid = cpu_thread_in_core(nr); primary = cpu_first_thread_sibling(nr); - if (qoriq_pm_ops) + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare) qoriq_pm_ops->cpu_up_prepare(nr); /* From 9222ba68c3f4065f6364b99cc641b6b019ef2d42 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Nov 2021 23:21:41 -0800 Subject: [PATCH 024/361] Input: i8042 - add deferred probe support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've got a bug report about the non-working keyboard on ASUS ZenBook UX425UA. It seems that the PS/2 device isn't ready immediately at boot but takes some seconds to get ready. Until now, the only workaround is to defer the probe, but it's available only when the driver is a module. However, many distros, including openSUSE as in the original report, build the PS/2 input drivers into kernel, hence it won't work easily. This patch adds the support for the deferred probe for i8042 stuff as a workaround of the problem above. When the deferred probe mode is enabled and the device couldn't be probed, it'll be repeated with the standard deferred probe mechanism. The deferred probe mode is enabled either via the new option i8042.probe_defer or via the quirk table entry. As of this patch, the quirk table contains only ASUS ZenBook UX425UA. The deferred probe part is based on Fabio's initial work. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1190256 Signed-off-by: Takashi Iwai Tested-by: Samuel Čavoj Link: https://lore.kernel.org/r/20211117063757.11380-1-tiwai@suse.de Signed-off-by: Dmitry Torokhov --- .../admin-guide/kernel-parameters.txt | 2 + drivers/input/serio/i8042-x86ia64io.h | 14 +++++ drivers/input/serio/i8042.c | 54 ++++++++++++------- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cb89dbdedc463..ddfa50578a3e1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1627,6 +1627,8 @@ architectures force reset to be always executed i8042.unlock [HW] Unlock (ignore) the keylock i8042.kbdreset [HW] Reset device connected to KBD port + i8042.probe_defer + [HW] Allow deferred probing upon i8042 probe errors i810= [HW,DRM] diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index aedd055410443..1acc7c8449294 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -995,6 +995,17 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = { { } }; +static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = { + { + /* ASUS ZenBook UX425UA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), + }, + }, + { } +}; + #endif /* CONFIG_X86 */ #ifdef CONFIG_PNP @@ -1315,6 +1326,9 @@ static int __init i8042_platform_init(void) if (dmi_check_system(i8042_dmi_kbdreset_table)) i8042_kbdreset = true; + if (dmi_check_system(i8042_dmi_probe_defer_table)) + i8042_probe_defer = true; + /* * A20 was already enabled during early kernel init. But some buggy * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 0b9f1d0a8f8b0..3fc0a89cc785c 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -45,6 +45,10 @@ static bool i8042_unlock; module_param_named(unlock, i8042_unlock, bool, 0); MODULE_PARM_DESC(unlock, "Ignore keyboard lock."); +static bool i8042_probe_defer; +module_param_named(probe_defer, i8042_probe_defer, bool, 0); +MODULE_PARM_DESC(probe_defer, "Allow deferred probing."); + enum i8042_controller_reset_mode { I8042_RESET_NEVER, I8042_RESET_ALWAYS, @@ -711,7 +715,7 @@ static int i8042_set_mux_mode(bool multiplex, unsigned char *mux_version) * LCS/Telegraphics. */ -static int __init i8042_check_mux(void) +static int i8042_check_mux(void) { unsigned char mux_version; @@ -740,10 +744,10 @@ static int __init i8042_check_mux(void) /* * The following is used to test AUX IRQ delivery. */ -static struct completion i8042_aux_irq_delivered __initdata; -static bool i8042_irq_being_tested __initdata; +static struct completion i8042_aux_irq_delivered; +static bool i8042_irq_being_tested; -static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id) +static irqreturn_t i8042_aux_test_irq(int irq, void *dev_id) { unsigned long flags; unsigned char str, data; @@ -770,7 +774,7 @@ static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id) * verifies success by readinng CTR. Used when testing for presence of AUX * port. */ -static int __init i8042_toggle_aux(bool on) +static int i8042_toggle_aux(bool on) { unsigned char param; int i; @@ -798,7 +802,7 @@ static int __init i8042_toggle_aux(bool on) * the presence of an AUX interface. */ -static int __init i8042_check_aux(void) +static int i8042_check_aux(void) { int retval = -1; bool irq_registered = false; @@ -1005,7 +1009,7 @@ static int i8042_controller_init(void) if (i8042_command(&ctr[n++ % 2], I8042_CMD_CTL_RCTR)) { pr_err("Can't read CTR while initializing i8042\n"); - return -EIO; + return i8042_probe_defer ? -EPROBE_DEFER : -EIO; } } while (n < 2 || ctr[0] != ctr[1]); @@ -1320,7 +1324,7 @@ static void i8042_shutdown(struct platform_device *dev) i8042_controller_reset(false); } -static int __init i8042_create_kbd_port(void) +static int i8042_create_kbd_port(void) { struct serio *serio; struct i8042_port *port = &i8042_ports[I8042_KBD_PORT_NO]; @@ -1349,7 +1353,7 @@ static int __init i8042_create_kbd_port(void) return 0; } -static int __init i8042_create_aux_port(int idx) +static int i8042_create_aux_port(int idx) { struct serio *serio; int port_no = idx < 0 ? I8042_AUX_PORT_NO : I8042_MUX_PORT_NO + idx; @@ -1386,13 +1390,13 @@ static int __init i8042_create_aux_port(int idx) return 0; } -static void __init i8042_free_kbd_port(void) +static void i8042_free_kbd_port(void) { kfree(i8042_ports[I8042_KBD_PORT_NO].serio); i8042_ports[I8042_KBD_PORT_NO].serio = NULL; } -static void __init i8042_free_aux_ports(void) +static void i8042_free_aux_ports(void) { int i; @@ -1402,7 +1406,7 @@ static void __init i8042_free_aux_ports(void) } } -static void __init i8042_register_ports(void) +static void i8042_register_ports(void) { int i; @@ -1443,7 +1447,7 @@ static void i8042_free_irqs(void) i8042_aux_irq_registered = i8042_kbd_irq_registered = false; } -static int __init i8042_setup_aux(void) +static int i8042_setup_aux(void) { int (*aux_enable)(void); int error; @@ -1485,7 +1489,7 @@ static int __init i8042_setup_aux(void) return error; } -static int __init i8042_setup_kbd(void) +static int i8042_setup_kbd(void) { int error; @@ -1535,7 +1539,7 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb, return 0; } -static int __init i8042_probe(struct platform_device *dev) +static int i8042_probe(struct platform_device *dev) { int error; @@ -1600,6 +1604,7 @@ static struct platform_driver i8042_driver = { .pm = &i8042_pm_ops, #endif }, + .probe = i8042_probe, .remove = i8042_remove, .shutdown = i8042_shutdown, }; @@ -1610,7 +1615,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = { static int __init i8042_init(void) { - struct platform_device *pdev; int err; dbg_init(); @@ -1626,17 +1630,29 @@ static int __init i8042_init(void) /* Set this before creating the dev to allow i8042_command to work right away */ i8042_present = true; - pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0); - if (IS_ERR(pdev)) { - err = PTR_ERR(pdev); + err = platform_driver_register(&i8042_driver); + if (err) goto err_platform_exit; + + i8042_platform_device = platform_device_alloc("i8042", -1); + if (!i8042_platform_device) { + err = -ENOMEM; + goto err_unregister_driver; } + err = platform_device_add(i8042_platform_device); + if (err) + goto err_free_device; + bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block); panic_blink = i8042_panic_blink; return 0; +err_free_device: + platform_device_put(i8042_platform_device); +err_unregister_driver: + platform_driver_unregister(&i8042_driver); err_platform_exit: i8042_platform_exit(); return err; From e1f5e848209a1b51ccae50721b27684c6f9d978f Mon Sep 17 00:00:00 2001 From: Jeff LaBundy Date: Sun, 28 Nov 2021 23:41:42 -0800 Subject: [PATCH 025/361] Input: iqs626a - prohibit inlining of channel parsing functions Some automated builds report a stack frame size in excess of 2 kB for iqs626_probe(); the culprit appears to be the call to iqs626_parse_prop(). To solve this problem, specify noinline_for_stack for all of the iqs626_parse_*() helper functions which are called inside a for loop within iqs626_parse_prop(). As a result, a build with '-Wframe-larger-than' as low as 512 is free of any such warnings. Reported-by: kernel test robot Signed-off-by: Jeff LaBundy Link: https://lore.kernel.org/r/20211129004104.453930-1-jeff@labundy.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/iqs626a.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/input/misc/iqs626a.c b/drivers/input/misc/iqs626a.c index d57e996732cf4..23b5dd9552dcc 100644 --- a/drivers/input/misc/iqs626a.c +++ b/drivers/input/misc/iqs626a.c @@ -456,9 +456,10 @@ struct iqs626_private { unsigned int suspend_mode; }; -static int iqs626_parse_events(struct iqs626_private *iqs626, - const struct fwnode_handle *ch_node, - enum iqs626_ch_id ch_id) +static noinline_for_stack int +iqs626_parse_events(struct iqs626_private *iqs626, + const struct fwnode_handle *ch_node, + enum iqs626_ch_id ch_id) { struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg; struct i2c_client *client = iqs626->client; @@ -604,9 +605,10 @@ static int iqs626_parse_events(struct iqs626_private *iqs626, return 0; } -static int iqs626_parse_ati_target(struct iqs626_private *iqs626, - const struct fwnode_handle *ch_node, - enum iqs626_ch_id ch_id) +static noinline_for_stack int +iqs626_parse_ati_target(struct iqs626_private *iqs626, + const struct fwnode_handle *ch_node, + enum iqs626_ch_id ch_id) { struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg; struct i2c_client *client = iqs626->client; @@ -885,9 +887,10 @@ static int iqs626_parse_trackpad(struct iqs626_private *iqs626, return 0; } -static int iqs626_parse_channel(struct iqs626_private *iqs626, - const struct fwnode_handle *ch_node, - enum iqs626_ch_id ch_id) +static noinline_for_stack int +iqs626_parse_channel(struct iqs626_private *iqs626, + const struct fwnode_handle *ch_node, + enum iqs626_ch_id ch_id) { struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg; struct i2c_client *client = iqs626->client; From 1d72d9f960ccf1052a0630a68c3d358791dbdaaa Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 29 Nov 2021 00:08:13 -0800 Subject: [PATCH 026/361] Input: elantech - fix stack out of bound access in elantech_change_report_id() The array param[] in elantech_change_report_id() must be at least 3 bytes, because elantech_read_reg_params() is calling ps2_command() with PSMOUSE_CMD_GETINFO, that is going to access 3 bytes from param[], but it's defined in the stack as an array of 2 bytes, therefore we have a potential stack out-of-bounds access here, also confirmed by KASAN: [ 6.512374] BUG: KASAN: stack-out-of-bounds in __ps2_command+0x372/0x7e0 [ 6.512397] Read of size 1 at addr ffff8881024d77c2 by task kworker/2:1/118 [ 6.512416] CPU: 2 PID: 118 Comm: kworker/2:1 Not tainted 5.13.0-22-generic #22+arighi20211110 [ 6.512428] Hardware name: LENOVO 20T8000QGE/20T8000QGE, BIOS R1AET32W (1.08 ) 08/14/2020 [ 6.512436] Workqueue: events_long serio_handle_event [ 6.512453] Call Trace: [ 6.512462] show_stack+0x52/0x58 [ 6.512474] dump_stack+0xa1/0xd3 [ 6.512487] print_address_description.constprop.0+0x1d/0x140 [ 6.512502] ? __ps2_command+0x372/0x7e0 [ 6.512516] __kasan_report.cold+0x7d/0x112 [ 6.512527] ? _raw_write_lock_irq+0x20/0xd0 [ 6.512539] ? __ps2_command+0x372/0x7e0 [ 6.512552] kasan_report+0x3c/0x50 [ 6.512564] __asan_load1+0x6a/0x70 [ 6.512575] __ps2_command+0x372/0x7e0 [ 6.512589] ? ps2_drain+0x240/0x240 [ 6.512601] ? dev_printk_emit+0xa2/0xd3 [ 6.512612] ? dev_vprintk_emit+0xc5/0xc5 [ 6.512621] ? __kasan_check_write+0x14/0x20 [ 6.512634] ? mutex_lock+0x8f/0xe0 [ 6.512643] ? __mutex_lock_slowpath+0x20/0x20 [ 6.512655] ps2_command+0x52/0x90 [ 6.512670] elantech_ps2_command+0x4f/0xc0 [psmouse] [ 6.512734] elantech_change_report_id+0x1e6/0x256 [psmouse] [ 6.512799] ? elantech_report_trackpoint.constprop.0.cold+0xd/0xd [psmouse] [ 6.512863] ? ps2_command+0x7f/0x90 [ 6.512877] elantech_query_info.cold+0x6bd/0x9ed [psmouse] [ 6.512943] ? elantech_setup_ps2+0x460/0x460 [psmouse] [ 6.513005] ? psmouse_reset+0x69/0xb0 [psmouse] [ 6.513064] ? psmouse_attr_set_helper+0x2a0/0x2a0 [psmouse] [ 6.513122] ? phys_pmd_init+0x30e/0x521 [ 6.513137] elantech_init+0x8a/0x200 [psmouse] [ 6.513200] ? elantech_init_ps2+0xf0/0xf0 [psmouse] [ 6.513249] ? elantech_query_info+0x440/0x440 [psmouse] [ 6.513296] ? synaptics_send_cmd+0x60/0x60 [psmouse] [ 6.513342] ? elantech_query_info+0x440/0x440 [psmouse] [ 6.513388] ? psmouse_try_protocol+0x11e/0x170 [psmouse] [ 6.513432] psmouse_extensions+0x65d/0x6e0 [psmouse] [ 6.513476] ? psmouse_try_protocol+0x170/0x170 [psmouse] [ 6.513519] ? mutex_unlock+0x22/0x40 [ 6.513526] ? ps2_command+0x7f/0x90 [ 6.513536] ? psmouse_probe+0xa3/0xf0 [psmouse] [ 6.513580] psmouse_switch_protocol+0x27d/0x2e0 [psmouse] [ 6.513624] psmouse_connect+0x272/0x530 [psmouse] [ 6.513669] serio_driver_probe+0x55/0x70 [ 6.513679] really_probe+0x190/0x720 [ 6.513689] driver_probe_device+0x160/0x1f0 [ 6.513697] device_driver_attach+0x119/0x130 [ 6.513705] ? device_driver_attach+0x130/0x130 [ 6.513713] __driver_attach+0xe7/0x1a0 [ 6.513720] ? device_driver_attach+0x130/0x130 [ 6.513728] bus_for_each_dev+0xfb/0x150 [ 6.513738] ? subsys_dev_iter_exit+0x10/0x10 [ 6.513748] ? _raw_write_unlock_bh+0x30/0x30 [ 6.513757] driver_attach+0x2d/0x40 [ 6.513764] serio_handle_event+0x199/0x3d0 [ 6.513775] process_one_work+0x471/0x740 [ 6.513785] worker_thread+0x2d2/0x790 [ 6.513794] ? process_one_work+0x740/0x740 [ 6.513802] kthread+0x1b4/0x1e0 [ 6.513809] ? set_kthread_struct+0x80/0x80 [ 6.513816] ret_from_fork+0x22/0x30 [ 6.513832] The buggy address belongs to the page: [ 6.513838] page:00000000bc35e189 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1024d7 [ 6.513847] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff) [ 6.513860] raw: 0017ffffc0000000 dead000000000100 dead000000000122 0000000000000000 [ 6.513867] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 6.513872] page dumped because: kasan: bad access detected [ 6.513879] addr ffff8881024d77c2 is located in stack of task kworker/2:1/118 at offset 34 in frame: [ 6.513887] elantech_change_report_id+0x0/0x256 [psmouse] [ 6.513941] this frame has 1 object: [ 6.513947] [32, 34) 'param' [ 6.513956] Memory state around the buggy address: [ 6.513962] ffff8881024d7680: f2 f2 f2 f2 f2 00 00 f3 f3 00 00 00 00 00 00 00 [ 6.513969] ffff8881024d7700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 6.513976] >ffff8881024d7780: 00 00 00 00 f1 f1 f1 f1 02 f3 f3 f3 00 00 00 00 [ 6.513982] ^ [ 6.513988] ffff8881024d7800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 6.513995] ffff8881024d7880: 00 f1 f1 f1 f1 03 f2 03 f2 03 f3 f3 f3 00 00 00 [ 6.514000] ================================================================== Define param[] in elantech_change_report_id() as an array of 3 bytes to prevent the out-of-bounds access in the stack. Fixes: e4c9062717fe ("Input: elantech - fix protocol errors for some trackpoints in SMBus mode") BugLink: https://bugs.launchpad.net/bugs/1945590 Signed-off-by: Andrea Righi Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/20211116095559.24395-1-andrea.righi@canonical.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 956d9cd347964..ece97f8c6a3e3 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1588,7 +1588,13 @@ static const struct dmi_system_id no_hw_res_dmi_table[] = { */ static int elantech_change_report_id(struct psmouse *psmouse) { - unsigned char param[2] = { 0x10, 0x03 }; + /* + * NOTE: the code is expecting to receive param[] as an array of 3 + * items (see __ps2_command()), even if in this case only 2 are + * actually needed. Make sure the array size is 3 to avoid potential + * stack out-of-bound accesses. + */ + unsigned char param[3] = { 0x10, 0x03 }; if (elantech_write_reg_params(psmouse, 0x7, param) || elantech_read_reg_params(psmouse, 0x7, param) || From af9d3a2984dc3501acd612c657127517a1beaf9d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 09:19:49 +0100 Subject: [PATCH 027/361] mac80211: add docs for ssn in struct tid_ampdu_tx As pointed out by Stephen, add the missing docs. Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20211129091948.1327ec82beab.Iecc5975406a3028d35c65ff8d2dec31a693888d3@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index e7443fc4669c8..379fd367197f9 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -176,6 +176,7 @@ struct sta_info; * @failed_bar_ssn: ssn of the last failed BAR tx attempt * @bar_pending: BAR needs to be re-sent * @amsdu: support A-MSDU withing A-MDPU + * @ssn: starting sequence number of the session * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. From 1eda919126b420fee6b8d546f7f728fbbd4b8f11 Mon Sep 17 00:00:00 2001 From: Finn Behrens Date: Sat, 27 Nov 2021 11:28:53 +0100 Subject: [PATCH 028/361] nl80211: reset regdom when reloading regdb Reload the regdom when the regulatory db is reloaded. Otherwise, the user had to change the regulatoy domain to a different one and then reset it to the correct one to have a new regulatory db take effect after a reload. Signed-off-by: Finn Behrens Link: https://lore.kernel.org/r/YaIIZfxHgqc/UTA7@gimli.kloenk.dev [edit commit message] Signed-off-by: Johannes Berg --- include/net/regulatory.h | 1 + net/wireless/reg.c | 27 +++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 47f06f6f5a67c..0cf9335431e07 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -83,6 +83,7 @@ struct regulatory_request { enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; + bool reload; enum environment_cap country_ie_env; struct list_head list; }; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index df87c7f3a0492..61f1bf1bc4a73 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -133,6 +133,7 @@ static u32 reg_is_indoor_portid; static void restore_regulatory_settings(bool reset_user, bool cached); static void print_regdomain(const struct ieee80211_regdomain *rd); +static void reg_process_hint(struct regulatory_request *reg_request); static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { @@ -1098,6 +1099,8 @@ int reg_reload_regdb(void) const struct firmware *fw; void *db; int err; + const struct ieee80211_regdomain *current_regdomain; + struct regulatory_request *request; err = request_firmware(&fw, "regulatory.db", ®_pdev->dev); if (err) @@ -1118,8 +1121,27 @@ int reg_reload_regdb(void) if (!IS_ERR_OR_NULL(regdb)) kfree(regdb); regdb = db; - rtnl_unlock(); + /* reset regulatory domain */ + current_regdomain = get_cfg80211_regdom(); + + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + request->wiphy_idx = WIPHY_IDX_INVALID; + request->alpha2[0] = current_regdomain->alpha2[0]; + request->alpha2[1] = current_regdomain->alpha2[1]; + request->initiator = NL80211_USER_REG_HINT_USER; + request->user_reg_hint_type = NL80211_USER_REG_HINT_USER; + request->reload = true; + + reg_process_hint(request); + +out_unlock: + rtnl_unlock(); out: release_firmware(fw); return err; @@ -2690,7 +2712,8 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET) + (treatment == REG_REQ_ALREADY_SET && + !user_request->reload)) return REG_REQ_IGNORE; user_request->intersect = treatment == REG_REQ_INTERSECT; From 9d7482771fac8d8e38e763263f2ca0ca12dd22c6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Nov 2021 17:54:04 +0300 Subject: [PATCH 029/361] tee: amdtee: fix an IS_ERR() vs NULL bug The __get_free_pages() function does not return error pointers it returns NULL so fix this condition to avoid a NULL dereference. Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Signed-off-by: Dan Carpenter Acked-by: Rijo Thomas Signed-off-by: Jens Wiklander --- drivers/tee/amdtee/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index da6b88e80dc07..297dc62bca298 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -203,9 +203,8 @@ static int copy_ta_binary(struct tee_context *ctx, void *ptr, void **ta, *ta_size = roundup(fw->size, PAGE_SIZE); *ta = (void *)__get_free_pages(GFP_KERNEL, get_order(*ta_size)); - if (IS_ERR(*ta)) { - pr_err("%s: get_free_pages failed 0x%llx\n", __func__, - (u64)*ta); + if (!*ta) { + pr_err("%s: get_free_pages failed\n", __func__); rc = -ENOMEM; goto rel_fw; } From c2584017f757da3c84a743b607d6cfc763ebcc2b Mon Sep 17 00:00:00 2001 From: Vyacheslav Bocharov Date: Thu, 25 Nov 2021 16:02:47 +0300 Subject: [PATCH 030/361] arm64: meson: fix dts for JetHub D1 Fix misplace of cpu_cooling_maps for JetHub D1, move it to right place. Fixes: 8e279fb29039 ("arm64: dts: meson-axg: add support for JetHub D1") Signed-off-by: Vyacheslav Bocharov Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20211125130246.1086627-1-adeep@lexina.in --- .../amlogic/meson-axg-jethome-jethub-j100.dts | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j100.dts b/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j100.dts index 52ebe371df268..561eec21b4deb 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j100.dts +++ b/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j100.dts @@ -134,23 +134,23 @@ type = "critical"; }; }; - }; - cpu_cooling_maps: cooling-maps { - map0 { - trip = <&cpu_passive>; - cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; - }; + cpu_cooling_maps: cooling-maps { + map0 { + trip = <&cpu_passive>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; - map1 { - trip = <&cpu_hot>; - cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + map1 { + trip = <&cpu_hot>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; }; }; }; From 5ad77b1272fce36604779efe6e2036c500e6fe7a Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 9 Jun 2021 22:20:09 +0200 Subject: [PATCH 031/361] arm64: meson: remove COMMON_CLK This reverts commit aea7a80ad5effd48f44a7a08c3903168be038a43. Selecting COMMON_CLK is not necessary, it is already selected by CONFIG_ARM64 Reported-by: Geert Uytterhoeven Signed-off-by: Jerome Brunet Reviewed-by: Kevin Hilman Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20210609202009.1424879-1-jbrunet@baylibre.com --- arch/arm64/Kconfig.platforms | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 1aa8b70732186..54e3910e8b9bd 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -161,7 +161,6 @@ config ARCH_MEDIATEK config ARCH_MESON bool "Amlogic Platforms" - select COMMON_CLK help This enables support for the arm64 based Amlogic SoCs such as the s905, S905X/D, S912, A113X/D or S905X/D2 From 4356fd6041877ef0c91a2f6051b59bb1a8961ca2 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 22 Nov 2021 23:24:39 +0100 Subject: [PATCH 032/361] dt-bindings: i2c: apple,i2c: allow multiple compatibles The intention was to have a SoC-specific and base compatible string to allow forward compatibility and SoC specific quirks, Fixes: df7c4a8c1b47 ("dt-bindings: i2c: Add Apple I2C controller bindings") Signed-off-by: Janne Grunau Cc: Mark Kettenis Reviewed-by: Sven Peter Reviewed-by: Mark Kettenis Tested-by: Hector Martin Acked-by: Wolfram Sang Acked-by: Rob Herring Signed-off-by: Hector Martin --- Documentation/devicetree/bindings/i2c/apple,i2c.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml index 22fc8483256f1..82b953181a522 100644 --- a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml @@ -20,9 +20,9 @@ allOf: properties: compatible: - enum: - - apple,t8103-i2c - - apple,i2c + items: + - const: apple,t8103-i2c + - const: apple,i2c reg: maxItems: 1 @@ -51,7 +51,7 @@ unevaluatedProperties: false examples: - | i2c@35010000 { - compatible = "apple,t8103-i2c"; + compatible = "apple,t8103-i2c", "apple,i2c"; reg = <0x35010000 0x4000>; interrupt-parent = <&aic>; interrupts = <0 627 4>; From 0668639eaf14813a39a8d3e0e6597d568581d4ea Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 22 Nov 2021 23:24:40 +0100 Subject: [PATCH 033/361] arm64: dts: apple: add #interrupt-cells property to pinctrl nodes Required for devices trying to use pinctrl devices as interrupt controller. Fixes: 0a8282b83119 ("arm64: apple: Add pinctrl nodes") Signed-off-by: Janne Grunau Cc: Mark Kettenis Reviewed-by: Sven Peter Reviewed-by: Mark Kettenis Tested-by: Hector Martin Signed-off-by: Hector Martin --- arch/arm64/boot/dts/apple/t8103.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index fc8b2bb06ffe8..c320c8baeb417 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -143,6 +143,7 @@ apple,npins = <212>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&aic>; interrupts = , , @@ -169,6 +170,7 @@ apple,npins = <42>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&aic>; interrupts = , , @@ -189,6 +191,7 @@ apple,npins = <23>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&aic>; interrupts = , , @@ -209,6 +212,7 @@ apple,npins = <16>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&aic>; interrupts = , , From 973e5245637accc4002843f6b888495a6a7762bc Mon Sep 17 00:00:00 2001 From: Hu Weiwen Date: Mon, 22 Nov 2021 22:22:12 +0800 Subject: [PATCH 034/361] ceph: fix duplicate increment of opened_inodes metric opened_inodes is incremented twice when the same inode is opened twice with O_RDONLY and O_WRONLY respectively. To reproduce, run this python script, then check the metrics: import os for _ in range(10000): fd_r = os.open('a', os.O_RDONLY) fd_w = os.open('a', os.O_WRONLY) os.close(fd_r) os.close(fd_w) Fixes: 1dd8d4708136 ("ceph: metrics for opened files, pinned caps and opened inodes") Signed-off-by: Hu Weiwen Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b9460b6fb76f7..c447fa2e2d1fe 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4350,7 +4350,7 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb); int bits = (fmode << 1) | 1; - bool is_opened = false; + bool already_opened = false; int i; if (count == 1) @@ -4358,19 +4358,19 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { - if (bits & (1 << i)) - ci->i_nr_by_mode[i] += count; - /* - * If any of the mode ref is larger than 1, + * If any of the mode ref is larger than 0, * that means it has been already opened by * others. Just skip checking the PIN ref. */ - if (i && ci->i_nr_by_mode[i] > 1) - is_opened = true; + if (i && ci->i_nr_by_mode[i]) + already_opened = true; + + if (bits & (1 << i)) + ci->i_nr_by_mode[i] += count; } - if (!is_opened) + if (!already_opened) percpu_counter_inc(&mdsc->metric.opened_inodes); spin_unlock(&ci->i_ceph_lock); } From e485d028bb1075d6167558b47f63e10713ad2034 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 23 Nov 2021 07:30:38 -0500 Subject: [PATCH 035/361] ceph: initialize i_size variable in ceph_sync_read Newer compilers seem to determine that this variable being uninitialized isn't a problem, but older compilers (from the RHEL8 era) seem to choke on it and complain that it could be used uninitialized. Go ahead and initialize the variable at declaration time to silence potential compiler warnings. Fixes: c3d8e0b5de48 ("ceph: return the real size read when it hits EOF") Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 02a0a0fd9ccd5..b24442e27e4e0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -847,7 +847,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, ssize_t ret; u64 off = iocb->ki_pos; u64 len = iov_iter_count(to); - u64 i_size; + u64 i_size = i_size_read(inode); dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); From ee2a095d3b24f300a5e11944d208801e928f108c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 30 Nov 2021 19:20:34 +0800 Subject: [PATCH 036/361] ceph: initialize pathlen variable in reconnect_caps_cb The smatch static checker warned about an uninitialized symbol usage in this function, in the case where ceph_mdsc_build_path returns an error. It turns out that that case is harmless, but it just looks sketchy. Initialize the variable at declaration time, and remove the unneeded setting of it later. Fixes: a33f6432b3a6 ("ceph: encode inodes' parent/d_name in cap reconnect message") Reported-by: Dan Carpenter Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 250aad330a106..c30eefc0ac193 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3683,7 +3683,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_pagelist *pagelist = recon_state->pagelist; struct dentry *dentry; char *path; - int pathlen, err; + int pathlen = 0, err; u64 pathbase; u64 snap_follows; @@ -3703,7 +3703,6 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, } } else { path = NULL; - pathlen = 0; pathbase = 0; } From fd84bfdddd169c219c3a637889a8b87f70a072c2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 29 Nov 2021 12:16:39 +0100 Subject: [PATCH 037/361] ceph: fix up non-directory creation in SGID directories Ceph always inherits the SGID bit if it is set on the parent inode, while the generic inode_init_owner does not do this in a few cases where it can create a possible security problem (cf. [1]). Update ceph to strip the SGID bit just as inode_init_owner would. This bug was detected by the mapped mount testsuite in [3]. The testsuite tests all core VFS functionality and semantics with and without mapped mounts. That is to say it functions as a generic VFS testsuite in addition to a mapped mount testsuite. While working on mapped mount support for ceph, SIGD inheritance was the only failing test for ceph after the port. The same bug was detected by the mapped mount testsuite in XFS in January 2021 (cf. [2]). [1]: commit 0fa3ecd87848 ("Fix up non-directory creation in SGID directories") [2]: commit 01ea173e103e ("xfs: fix up non-directory creation in SGID directories") [3]: https://git.kernel.org/fs/xfs/xfstests-dev.git Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b24442e27e4e0..c138e8126286c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -605,13 +605,25 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, in.cap.realm = cpu_to_le64(ci->i_snap_realm->ino); in.cap.flags = CEPH_CAP_FLAG_AUTH; in.ctime = in.mtime = in.atime = iinfo.btime; - in.mode = cpu_to_le32((u32)mode); in.truncate_seq = cpu_to_le32(1); in.truncate_size = cpu_to_le64(-1ULL); in.xattr_version = cpu_to_le64(1); in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid())); - in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_mode & S_ISGID ? - dir->i_gid : current_fsgid())); + if (dir->i_mode & S_ISGID) { + in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid)); + + /* Directories always inherit the setgid bit. */ + if (S_ISDIR(mode)) + mode |= S_ISGID; + else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) && + !in_group_p(dir->i_gid) && + !capable_wrt_inode_uidgid(&init_user_ns, dir, CAP_FSETID)) + mode &= ~S_ISGID; + } else { + in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid())); + } + in.mode = cpu_to_le32((u32)mode); + in.nlink = cpu_to_le32(1); in.max_size = cpu_to_le64(lo->stripe_unit); From cb25b11943cbcc5a34531129952870420f8be858 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 1 Nov 2021 19:36:30 -0500 Subject: [PATCH 038/361] ARM: socfpga: dts: fix qspi node compatible The QSPI flash node needs to have the required "jedec,spi-nor" in the compatible string. Fixes: 1df99da8953 ("ARM: dts: socfpga: Enable QSPI in Arria10 devkit") Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts | 2 +- arch/arm/boot/dts/socfpga_arria5_socdk.dts | 2 +- arch/arm/boot/dts/socfpga_cyclone5_socdk.dts | 2 +- arch/arm/boot/dts/socfpga_cyclone5_sockit.dts | 2 +- arch/arm/boot/dts/socfpga_cyclone5_socrates.dts | 2 +- arch/arm/boot/dts/socfpga_cyclone5_sodia.dts | 2 +- arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts index 2b645642b9352..2a745522404d6 100644 --- a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts +++ b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts @@ -12,7 +12,7 @@ flash0: n25q00@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00aa"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_arria5_socdk.dts b/arch/arm/boot/dts/socfpga_arria5_socdk.dts index 90e676e7019f2..1b02d46496a85 100644 --- a/arch/arm/boot/dts/socfpga_arria5_socdk.dts +++ b/arch/arm/boot/dts/socfpga_arria5_socdk.dts @@ -119,7 +119,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts index 6f138b2b26163..51bb436784e24 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts @@ -124,7 +124,7 @@ flash0: n25q00@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; /* chip select */ spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts index c155ff02eb6e0..cae9ddd5ed38b 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts @@ -169,7 +169,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts index 8d5d3996f6f27..ca18b959e6559 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts @@ -80,7 +80,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; m25p,fast-read; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts index 99a71757cdf46..3f7aa7bf0863a 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts @@ -116,7 +116,7 @@ flash0: n25q512a@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q512a"; + compatible = "micron,n25q512a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts index a060718758b67..25874e1b9c829 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts @@ -224,7 +224,7 @@ n25q128@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q128"; + compatible = "micron,n25q128", "jedec,spi-nor"; reg = <0>; /* chip select */ spi-max-frequency = <100000000>; m25p,fast-read; @@ -241,7 +241,7 @@ n25q00@1 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <1>; /* chip select */ spi-max-frequency = <100000000>; m25p,fast-read; From cd57eb3c403cb864e5558874ecd57dd954a5a7f7 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 3 Dec 2021 19:15:41 +0200 Subject: [PATCH 039/361] ASoC: SOF: Intel: pci-tgl: add ADL-N support Add PCI DID for Intel AlderLake-N. Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20211203171542.1021399-1-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tgl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index f2ea34df9741d..302068fd0b810 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -114,6 +114,8 @@ static const struct pci_device_id sof_pci_ids[] = { .driver_data = (unsigned long)&adl_desc}, { PCI_DEVICE(0x8086, 0x51cc), /* ADL-M */ .driver_data = (unsigned long)&adl_desc}, + { PCI_DEVICE(0x8086, 0x54c8), /* ADL-N */ + .driver_data = (unsigned long)&adl_desc}, { 0, } }; MODULE_DEVICE_TABLE(pci, sof_pci_ids); From de7dd9092cd38384f774d345cccafe81b4b866b0 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 3 Dec 2021 19:15:42 +0200 Subject: [PATCH 040/361] ASoC: SOF: Intel: pci-tgl: add new ADL-P variant Add a PCI DID for a variant of Intel AlderLake-P. Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20211203171542.1021399-2-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tgl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index 302068fd0b810..fd46210f17303 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -112,6 +112,8 @@ static const struct pci_device_id sof_pci_ids[] = { .driver_data = (unsigned long)&adls_desc}, { PCI_DEVICE(0x8086, 0x51c8), /* ADL-P */ .driver_data = (unsigned long)&adl_desc}, + { PCI_DEVICE(0x8086, 0x51cd), /* ADL-P */ + .driver_data = (unsigned long)&adl_desc}, { PCI_DEVICE(0x8086, 0x51cc), /* ADL-M */ .driver_data = (unsigned long)&adl_desc}, { PCI_DEVICE(0x8086, 0x54c8), /* ADL-N */ From 85223d609c99eaa07cc598632b426cb33753526f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 6 Dec 2021 13:43:06 +0100 Subject: [PATCH 041/361] regulator: dt-bindings: samsung,s5m8767: add missing op_mode to bucks While converting bindings to dtschema, the buck regulators lost "op_mode" property. The "op_mode" is a valid property for all regulators (both LDOs and bucks), so add it. Reported-by: Rob Herring Fixes: fab58debc137 ("regulator: dt-bindings: samsung,s5m8767: convert to dtschema") Cc: Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211206124306.14006-1-krzysztof.kozlowski@canonical.com Signed-off-by: Mark Brown --- .../bindings/regulator/samsung,s5m8767.yaml | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml index 80a63d47790a2..c98929a213e93 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml +++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml @@ -51,6 +51,19 @@ patternProperties: description: Properties for single BUCK regulator. + properties: + op_mode: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3] + default: 1 + description: | + Describes the different operating modes of the regulator with power + mode change in SOC. The different possible values are: + 0 - always off mode + 1 - on in normal mode + 2 - low power mode + 3 - suspend mode + required: - regulator-name @@ -63,6 +76,18 @@ patternProperties: Properties for single BUCK regulator. properties: + op_mode: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3] + default: 1 + description: | + Describes the different operating modes of the regulator with power + mode change in SOC. The different possible values are: + 0 - always off mode + 1 - on in normal mode + 2 - low power mode + 3 - suspend mode + s5m8767,pmic-ext-control-gpios: maxItems: 1 description: | From db6689b643d8653092f5853751ea2cdbc299f8d3 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Mon, 6 Dec 2021 18:19:31 +0800 Subject: [PATCH 042/361] spi: change clk_disable_unprepare to clk_unprepare The corresponding API for clk_prepare is clk_unprepare, other than clk_disable_unprepare. Fix this by changing clk_disable_unprepare to clk_unprepare. Fixes: 5762ab71eb24 ("spi: Add support for Armada 3700 SPI Controller") Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20211206101931.2816597-1-mudongliangabcd@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 46feafe4e201c..d8cc4b270644a 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -901,7 +901,7 @@ static int a3700_spi_probe(struct platform_device *pdev) return 0; error_clk: - clk_disable_unprepare(spi->clk); + clk_unprepare(spi->clk); error: spi_master_put(master); out: From 44ee250aeeabb28b52a10397ac17ffb8bfe94839 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20=C4=8Cavoj?= Date: Sat, 4 Dec 2021 13:17:36 -0800 Subject: [PATCH 043/361] Input: i8042 - enable deferred probe quirk for ASUS UM325UA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ASUS UM325UA suffers from the same issue as the ASUS UX425UA, which is a very similar laptop. The i8042 device is not usable immediately after boot and fails to initialize, requiring a deferred retry. Enable the deferred probe quirk for the UM325UA. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1190256 Signed-off-by: Samuel Čavoj Link: https://lore.kernel.org/r/20211204015615.232948-1-samuel@cavoj.net Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 1acc7c8449294..148a7c5fd0e22 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -1003,6 +1003,13 @@ static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), }, }, + { + /* ASUS ZenBook UM325UA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), + }, + }, { } }; From a2fd46cd3dbb83b373ba74f4043f8dae869c65f1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 6 Dec 2021 23:15:09 -0800 Subject: [PATCH 044/361] Input: goodix - try not to touch the reset-pin on x86/ACPI devices Unless the controller is not responding at boot or after suspend/resume, the driver never resets the controller on x86/ACPI platforms. The driver still requesting the reset pin at probe() though in case it needs it. Until now the driver has always requested the reset pin with GPIOD_IN as type. The idea being to put the pin in high-impedance mode to save power until the driver actually wants to issue a reset. But this means that just requesting the pin can cause issues, since requesting it in another mode then GPIOD_ASIS may cause the pinctrl driver to touch the pin settings. We have already had issues before due to a bug in the pinctrl-cherryview.c driver which has been fixed in commit 921daeeca91b ("pinctrl: cherryview: Preserve CHV_PADCTRL1_INVRXTX_TXDATA flag on GPIOs"). And now it turns out that requesting the reset-pin as GPIOD_IN also stops the touchscreen from working on the GPD P2 max mini-laptop. The behavior of putting the pin in high-impedance mode relies on there being some external pull-up to keep it high and there seems to be no pull-up on the GPD P2 max, causing things to break. This commit fixes this by requesting the reset pin as is when using the x86/ACPI code paths to lookup the GPIOs; and by not dropping it back into input-mode in case the driver does end up issuing a reset for error-recovery. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209061 Fixes: a7d4b171660c ("Input: goodix - add support for getting IRQ + reset GPIOs on Cherry Trail devices") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211206091116.44466-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 30 +++++++++++++++++++++++++----- drivers/input/touchscreen/goodix.h | 1 + 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 906b5a6b52d17..e7efc32043e7c 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -695,10 +695,16 @@ int goodix_reset_no_int_sync(struct goodix_ts_data *ts) usleep_range(6000, 10000); /* T4: > 5ms */ - /* end select I2C slave addr */ - error = gpiod_direction_input(ts->gpiod_rst); - if (error) - goto error; + /* + * Put the reset pin back in to input / high-impedance mode to save + * power. Only do this in the non ACPI case since some ACPI boards + * don't have a pull-up, so there the reset pin must stay active-high. + */ + if (ts->irq_pin_access_method == IRQ_PIN_ACCESS_GPIO) { + error = gpiod_direction_input(ts->gpiod_rst); + if (error) + goto error; + } return 0; @@ -832,6 +838,14 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) return -EINVAL; } + /* + * Normally we put the reset pin in input / high-impedance mode to save + * power. But some x86/ACPI boards don't have a pull-up, so for the ACPI + * case, leave the pin as is. This results in the pin not being touched + * at all on x86/ACPI boards, except when needed for error-recover. + */ + ts->gpiod_rst_flags = GPIOD_ASIS; + return devm_acpi_dev_add_driver_gpios(dev, gpio_mapping); } #else @@ -857,6 +871,12 @@ static int goodix_get_gpio_config(struct goodix_ts_data *ts) return -EINVAL; dev = &ts->client->dev; + /* + * By default we request the reset pin as input, leaving it in + * high-impedance when not resetting the controller to save power. + */ + ts->gpiod_rst_flags = GPIOD_IN; + ts->avdd28 = devm_regulator_get(dev, "AVDD28"); if (IS_ERR(ts->avdd28)) { error = PTR_ERR(ts->avdd28); @@ -894,7 +914,7 @@ static int goodix_get_gpio_config(struct goodix_ts_data *ts) ts->gpiod_int = gpiod; /* Get the reset line GPIO pin number */ - gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, GPIOD_IN); + gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, ts->gpiod_rst_flags); if (IS_ERR(gpiod)) { error = PTR_ERR(gpiod); if (error != -EPROBE_DEFER) diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h index 62138f930d1aa..02065d1c32635 100644 --- a/drivers/input/touchscreen/goodix.h +++ b/drivers/input/touchscreen/goodix.h @@ -87,6 +87,7 @@ struct goodix_ts_data { struct gpio_desc *gpiod_rst; int gpio_count; int gpio_int_idx; + enum gpiod_flags gpiod_rst_flags; char id[GOODIX_ID_MAX_LEN + 1]; char cfg_name[64]; u16 version; From 81e818869be522bc8fa6f7df1b92d7e76537926c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 6 Dec 2021 23:29:27 -0800 Subject: [PATCH 045/361] Input: goodix - add id->model mapping for the "9111" model Add d->model mapping for the "9111" model, this fixes uses using a wrong config_len of 240 bytes while the "9111" model uses only 186 bytes of config. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211206164747.197309-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index e7efc32043e7c..87263eb9e5a43 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -102,6 +102,7 @@ static const struct goodix_chip_id goodix_chip_ids[] = { { .id = "911", .data = >911_chip_data }, { .id = "9271", .data = >911_chip_data }, { .id = "9110", .data = >911_chip_data }, + { .id = "9111", .data = >911_chip_data }, { .id = "927", .data = >911_chip_data }, { .id = "928", .data = >911_chip_data }, From 06d59d626a0a49c7bfeae52aa6f793538209f2fc Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 1 Dec 2021 15:39:52 +0200 Subject: [PATCH 046/361] MAINTAINERS: update Kalle Valo's email I switched to using kvalo@kernel.org, update MAINTAINERS file. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211201133952.31744-1-kvalo@kernel.org --- MAINTAINERS | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7a2345ce85213..22bea147c7d80 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3056,7 +3056,7 @@ F: Documentation/devicetree/bindings/phy/phy-ath79-usb.txt F: drivers/phy/qualcomm/phy-ath79-usb.c ATHEROS ATH GENERIC UTILITIES -M: Kalle Valo +M: Kalle Valo L: linux-wireless@vger.kernel.org S: Supported F: drivers/net/wireless/ath/* @@ -3071,7 +3071,7 @@ W: https://wireless.wiki.kernel.org/en/users/Drivers/ath5k F: drivers/net/wireless/ath/ath5k/ ATHEROS ATH6KL WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath6kl @@ -13238,7 +13238,7 @@ F: include/uapi/linux/if_* F: include/uapi/linux/netdevice.h NETWORKING DRIVERS (WIRELESS) -M: Kalle Valo +M: Kalle Valo L: linux-wireless@vger.kernel.org S: Maintained Q: http://patchwork.kernel.org/project/linux-wireless/list/ @@ -15694,7 +15694,7 @@ T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/qt1010* QUALCOMM ATHEROS ATH10K WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: ath10k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k @@ -15702,7 +15702,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git F: drivers/net/wireless/ath/ath10k/ QUALCOMM ATHEROS ATH11K WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: ath11k@lists.infradead.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git @@ -15866,7 +15866,7 @@ F: Documentation/devicetree/bindings/media/*venus* F: drivers/media/platform/qcom/venus/ QUALCOMM WCN36XX WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: wcn36xx@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/wcn36xx From 54baf56eaa40aa5cdcd02b3c20d593e4e1211220 Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Mon, 8 Nov 2021 20:34:38 -0800 Subject: [PATCH 047/361] clk: Don't parent clks until the parent is fully registered Before commit fc0c209c147f ("clk: Allow parents to be specified without string names") child clks couldn't find their parent until the parent clk was added to a list in __clk_core_init(). After that commit, child clks can reference their parent clks directly via a clk_hw pointer, or they can lookup that clk_hw pointer via DT if the parent clk is registered with an OF clk provider. The common clk framework treats hw->core being non-NULL as "the clk is registered" per the logic within clk_core_fill_parent_index(): parent = entry->hw->core; /* * We have a direct reference but it isn't registered yet? * Orphan it and let clk_reparent() update the orphan status * when the parent is registered. */ if (!parent) Therefore we need to be extra careful to not set hw->core until the clk is fully registered with the clk framework. Otherwise we can get into a situation where a child finds a parent clk and we move the child clk off the orphan list when the parent isn't actually registered, wrecking our enable accounting and breaking critical clks. Consider the following scenario: CPU0 CPU1 ---- ---- struct clk_hw clkBad; struct clk_hw clkA; clkA.init.parent_hws = { &clkBad }; clk_hw_register(&clkA) clk_hw_register(&clkBad) ... __clk_register() hw->core = core ... __clk_register() __clk_core_init() clk_prepare_lock() __clk_init_parent() clk_core_get_parent_by_index() clk_core_fill_parent_index() if (entry->hw) { parent = entry->hw->core; At this point, 'parent' points to clkBad even though clkBad hasn't been fully registered yet. Ouch! A similar problem can happen if a clk controller registers orphan clks that are referenced in the DT node of another clk controller. Let's fix all this by only setting the hw->core pointer underneath the clk prepare lock in __clk_core_init(). This way we know that clk_core_fill_parent_index() can't see hw->core be non-NULL until the clk is fully registered. Fixes: fc0c209c147f ("clk: Allow parents to be specified without string names") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20211109043438.4639-1-quic_mdtipton@quicinc.com [sboyd@kernel.org: Reword commit text, update comment] Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index f467d63bbf1ee..566ee2c78709e 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3418,6 +3418,14 @@ static int __clk_core_init(struct clk_core *core) clk_prepare_lock(); + /* + * Set hw->core after grabbing the prepare_lock to synchronize with + * callers of clk_core_fill_parent_index() where we treat hw->core + * being NULL as the clk not being registered yet. This is crucial so + * that clks aren't parented until their parent is fully registered. + */ + core->hw->core = core; + ret = clk_pm_runtime_get(core); if (ret) goto unlock; @@ -3582,8 +3590,10 @@ static int __clk_core_init(struct clk_core *core) out: clk_pm_runtime_put(core); unlock: - if (ret) + if (ret) { hlist_del_init(&core->child_node); + core->hw->core = NULL; + } clk_prepare_unlock(); @@ -3847,7 +3857,6 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) core->num_parents = init->num_parents; core->min_rate = 0; core->max_rate = ULONG_MAX; - hw->core = core; ret = clk_core_populate_parent_map(core, init); if (ret) @@ -3865,7 +3874,7 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) goto fail_create_clk; } - clk_core_link_consumer(hw->core, hw->clk); + clk_core_link_consumer(core, hw->clk); ret = __clk_core_init(core); if (!ret) From 34f35f8f14bc406efc06ee4ff73202c6fd245d15 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Wed, 8 Dec 2021 10:32:39 +0100 Subject: [PATCH 048/361] ipmi: ssif: initialize ssif_info->client early During probe ssif_info->client is dereferenced in error path. However, it is set when some of the error checking has already been done. This causes following kernel crash if an error path is taken: [ 30.645593][ T674] ipmi_ssif 0-000e: ipmi_ssif: Not probing, Interface already present [ 30.657616][ T674] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000088 ... [ 30.657723][ T674] pc : __dev_printk+0x28/0xa0 [ 30.657732][ T674] lr : _dev_err+0x7c/0xa0 ... [ 30.657772][ T674] Call trace: [ 30.657775][ T674] __dev_printk+0x28/0xa0 [ 30.657778][ T674] _dev_err+0x7c/0xa0 [ 30.657781][ T674] ssif_probe+0x548/0x900 [ipmi_ssif 62ce4b08badc1458fd896206d9ef69a3c31f3d3e] [ 30.657791][ T674] i2c_device_probe+0x37c/0x3c0 ... Initialize ssif_info->client before any error path can be taken. Clear i2c_client data in the error path to prevent the dangling pointer from leaking. Fixes: c4436c9149c5 ("ipmi_ssif: avoid registering duplicate ssif interface") Cc: stable@vger.kernel.org # 5.4.x Suggested-by: Takashi Iwai Signed-off-by: Mian Yousaf Kaukab Message-Id: <20211208093239.4432-1-ykaukab@suse.de> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 0c62e578749ef..48aab77abebf1 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1659,6 +1659,9 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) } } + ssif_info->client = client; + i2c_set_clientdata(client, ssif_info); + rv = ssif_check_and_remove(client, ssif_info); /* If rv is 0 and addr source is not SI_ACPI, continue probing */ if (!rv && ssif_info->addr_source == SI_ACPI) { @@ -1679,9 +1682,6 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) ipmi_addr_src_to_str(ssif_info->addr_source), client->addr, client->adapter->name, slave_addr); - ssif_info->client = client; - i2c_set_clientdata(client, ssif_info); - /* Now check for system interface capabilities */ msg[0] = IPMI_NETFN_APP_REQUEST << 2; msg[1] = IPMI_GET_SYSTEM_INTERFACE_CAPABILITIES_CMD; @@ -1881,6 +1881,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_err(&ssif_info->client->dev, "Unable to start IPMI SSIF: %d\n", rv); + i2c_set_clientdata(client, NULL); kfree(ssif_info); } kfree(resp); From d599f714b73e4177dfdfe64fce09175568288ee9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 3 Dec 2021 14:04:24 +0200 Subject: [PATCH 049/361] iwlwifi: mvm: don't crash on invalid rate w/o STA If we get to the WARN_ONCE(..., "Got a HT rate (...)", ...) here with a NULL sta, then we crash because mvmsta is bad and we try to dereference it. Fix that by printing -1 as the state if no station was given. Signed-off-by: Johannes Berg Fixes: 6761a718263a ("iwlwifi: mvm: add explicit check for non-data frames in get Tx rate") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20211203140410.1a1541d7dcb5.I606c746e11447fe168cf046376b70b04e278c3b4@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index bdd4ee4325483..76e0b7b45980d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -269,17 +269,18 @@ static u32 iwl_mvm_get_tx_rate(struct iwl_mvm *mvm, u8 rate_plcp; u32 rate_flags = 0; bool is_cck; - struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); /* info->control is only relevant for non HW rate control */ if (!ieee80211_hw_check(mvm->hw, HAS_RATE_CONTROL)) { + struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); + /* HT rate doesn't make sense for a non data frame */ WARN_ONCE(info->control.rates[0].flags & IEEE80211_TX_RC_MCS && !ieee80211_is_data(fc), "Got a HT rate (flags:0x%x/mcs:%d/fc:0x%x/state:%d) for a non data frame\n", info->control.rates[0].flags, info->control.rates[0].idx, - le16_to_cpu(fc), mvmsta->sta_state); + le16_to_cpu(fc), sta ? mvmsta->sta_state : -1); rate_idx = info->control.rates[0].idx; } From efdbfa0ad03e764419378485d1b8f6e7706fb1a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 4 Dec 2021 18:38:33 +0100 Subject: [PATCH 050/361] iwlwifi: fix LED dependencies The dependencies for LED configuration are highly inconsistent and too complicated at the moment. One of the results is a randconfig failure I get very rarely when LEDS_CLASS is in a loadable module, but the wireless core is built-in: WARNING: unmet direct dependencies detected for MAC80211_LEDS Depends on [n]: NET [=y] && WIRELESS [=y] && MAC80211 [=y] && (LEDS_CLASS [=m]=y || LEDS_CLASS [=m]=MAC80211 [=y]) Selected by [m]: - IWLEGACY [=m] && NETDEVICES [=y] && WLAN [=y] && WLAN_VENDOR_INTEL [=y] - IWLWIFI_LEDS [=y] && NETDEVICES [=y] && WLAN [=y] && WLAN_VENDOR_INTEL [=y] && IWLWIFI [=m] && (LEDS_CLASS [=m]=y || LEDS_CLASS [=m]=IWLWIFI [=m]) && (IWLMVM [=m] || IWLDVM [=m]) aarch64-linux-ld: drivers/net/wireless/ath/ath5k/led.o: in function `ath5k_register_led': led.c:(.text+0x60): undefined reference to `led_classdev_register_ext' aarch64-linux-ld: drivers/net/wireless/ath/ath5k/led.o: in function `ath5k_unregister_leds': led.c:(.text+0x200): undefined reference to `led_classdev_unregister' For iwlwifi, the dependency is wrong, since this config prevents the MAC80211_LEDS code from being part of a built-in MAC80211 driver. For iwlegacy, this is worse because the driver tries to force-enable the other subsystems, which is both a layering violation and a bug because it will still fail with MAC80211=y and IWLEGACY=m, leading to LEDS_CLASS being a module as well. The actual link failure in the ath5k driver is a result of MAC80211_LEDS being enabled but not usable. With the Kconfig logic fixed in the Intel drivers, the ath5k driver works as expected again. Signed-off-by: Arnd Bergmann Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211204173848.873293-1-arnd@kernel.org --- drivers/net/wireless/intel/iwlegacy/Kconfig | 4 ++-- drivers/net/wireless/intel/iwlwifi/Kconfig | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/Kconfig b/drivers/net/wireless/intel/iwlegacy/Kconfig index 24fe3f63c3215..7eacc8e58ee14 100644 --- a/drivers/net/wireless/intel/iwlegacy/Kconfig +++ b/drivers/net/wireless/intel/iwlegacy/Kconfig @@ -2,14 +2,13 @@ config IWLEGACY tristate select FW_LOADER - select NEW_LEDS - select LEDS_CLASS select LEDS_TRIGGERS select MAC80211_LEDS config IWL4965 tristate "Intel Wireless WiFi 4965AGN (iwl4965)" depends on PCI && MAC80211 + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 select IWLEGACY help This option enables support for @@ -38,6 +37,7 @@ config IWL4965 config IWL3945 tristate "Intel PRO/Wireless 3945ABG/BG Network Connection (iwl3945)" depends on PCI && MAC80211 + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 select IWLEGACY help Select to build the driver supporting the: diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig index 1085afbefba87..418ae4f870ab7 100644 --- a/drivers/net/wireless/intel/iwlwifi/Kconfig +++ b/drivers/net/wireless/intel/iwlwifi/Kconfig @@ -47,7 +47,7 @@ if IWLWIFI config IWLWIFI_LEDS bool - depends on LEDS_CLASS=y || LEDS_CLASS=IWLWIFI + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 depends on IWLMVM || IWLDVM select LEDS_TRIGGERS select MAC80211_LEDS From c68115fc537518b8ff2c54b3e2984a60977affed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 4 Dec 2021 18:38:34 +0100 Subject: [PATCH 051/361] brcmsmac: rework LED dependencies This is now the only driver that selects the LEDS_CLASS framework, which is normally user-selectable. While it doesn't strictly cause a bug, rework the Kconfig logic to be more consistent with what other drivers do, and only enable LED support in brcmsmac if the dependencies are all there, rather than using 'select' to enable what it needs. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211204173848.873293-2-arnd@kernel.org --- drivers/net/wireless/broadcom/brcm80211/Kconfig | 14 +++++++++----- .../wireless/broadcom/brcm80211/brcmsmac/Makefile | 2 +- .../net/wireless/broadcom/brcm80211/brcmsmac/led.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/Kconfig b/drivers/net/wireless/broadcom/brcm80211/Kconfig index 5bf2318763c55..3a1a35b5672f1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/Kconfig +++ b/drivers/net/wireless/broadcom/brcm80211/Kconfig @@ -7,16 +7,20 @@ config BRCMSMAC depends on MAC80211 depends on BCMA_POSSIBLE select BCMA - select NEW_LEDS if BCMA_DRIVER_GPIO - select LEDS_CLASS if BCMA_DRIVER_GPIO select BRCMUTIL select FW_LOADER select CORDIC help This module adds support for PCIe wireless adapters based on Broadcom - IEEE802.11n SoftMAC chipsets. It also has WLAN led support, which will - be available if you select BCMA_DRIVER_GPIO. If you choose to build a - module, the driver will be called brcmsmac.ko. + IEEE802.11n SoftMAC chipsets. If you choose to build a module, the + driver will be called brcmsmac.ko. + +config BRCMSMAC_LEDS + def_bool BRCMSMAC && BCMA_DRIVER_GPIO && MAC80211_LEDS + help + The brcmsmac LED support depends on the presence of the + BCMA_DRIVER_GPIO driver, and it only works if LED support + is enabled and reachable from the driver module. source "drivers/net/wireless/broadcom/brcm80211/brcmfmac/Kconfig" diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile index 482d7737764da..090757730ba60 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile @@ -42,6 +42,6 @@ brcmsmac-y := \ brcms_trace_events.o \ debug.o -brcmsmac-$(CONFIG_BCMA_DRIVER_GPIO) += led.o +brcmsmac-$(CONFIG_BRCMSMAC_LEDS) += led.o obj-$(CONFIG_BRCMSMAC) += brcmsmac.o diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h index d65f5c268fd77..2a5cbeb9e7831 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h @@ -24,7 +24,7 @@ struct brcms_led { struct gpio_desc *gpiod; }; -#ifdef CONFIG_BCMA_DRIVER_GPIO +#ifdef CONFIG_BRCMSMAC_LEDS void brcms_led_unregister(struct brcms_info *wl); int brcms_led_register(struct brcms_info *wl); #else From f7d55d2e439fa4430755d69a5d7ad16d43a5ebe6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 4 Dec 2021 18:38:35 +0100 Subject: [PATCH 052/361] mt76: mt7921: fix build regression After mt7921s got added, there are two possible build problems: a) mt7921s does not get built at all if mt7921e is not also enabled b) there is a link error when mt7921e is a loadable module, but mt7921s configured as built-in: ERROR: modpost: "mt7921_mac_sta_add" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mac_sta_assoc" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mac_sta_remove" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mac_write_txwi" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mcu_drv_pmctrl" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mcu_fill_message" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mcu_parse_response" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_ops" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_queue_rx_skb" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_update_channel" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! Fix both by making sure that Kbuild enters the subdirectory when either one is enabled. Fixes: 48fab5bbef40 ("mt76: mt7921: introduce mt7921s support") Signed-off-by: Arnd Bergmann Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211204173848.873293-3-arnd@kernel.org --- drivers/net/wireless/mediatek/mt76/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/Makefile b/drivers/net/wireless/mediatek/mt76/Makefile index 79ab850a45a28..c78ae4b897619 100644 --- a/drivers/net/wireless/mediatek/mt76/Makefile +++ b/drivers/net/wireless/mediatek/mt76/Makefile @@ -34,4 +34,4 @@ obj-$(CONFIG_MT76x2_COMMON) += mt76x2/ obj-$(CONFIG_MT7603E) += mt7603/ obj-$(CONFIG_MT7615_COMMON) += mt7615/ obj-$(CONFIG_MT7915E) += mt7915/ -obj-$(CONFIG_MT7921E) += mt7921/ +obj-$(CONFIG_MT7921_COMMON) += mt7921/ From ff9f9c6e74848170fcb45c8403c80d661484c8c9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Dec 2021 13:33:07 +0300 Subject: [PATCH 053/361] vduse: fix memory corruption in vduse_dev_ioctl() The "config.offset" comes from the user. There needs to a check to prevent it being out of bounds. The "config.offset" and "dev->config_size" variables are both type u32. So if the offset if out of bounds then the "dev->config_size - config.offset" subtraction results in a very high u32 value. The out of bounds offset can result in memory corruption. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211208103307.GA3778@kili Signed-off-by: Michael S. Tsirkin Cc: stable@vger.kernel.org --- drivers/vdpa/vdpa_user/vduse_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index c9204c62f339c..1a206f95d73a8 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -975,7 +975,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, break; ret = -EINVAL; - if (config.length == 0 || + if (config.offset > dev->config_size || + config.length == 0 || config.length > dev->config_size - config.offset) break; From 3ed21c1451a14d139e1ceb18f2fa70865ce3195a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Dec 2021 13:33:37 +0300 Subject: [PATCH 054/361] vdpa: check that offsets are within bounds In this function "c->off" is a u32 and "size" is a long. On 64bit systems if "c->off" is greater than "size" then "size - c->off" is a negative and we always return -E2BIG. But on 32bit systems the subtraction is type promoted to a high positive u32 value and basically any "c->len" is accepted. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Reported-by: Xie Yongji Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211208103337.GA4047@kili Signed-off-by: Michael S. Tsirkin Cc: stable@vger.kernel.org --- drivers/vhost/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 29cced1cd2778..e3c4f059b21a2 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -197,7 +197,7 @@ static int vhost_vdpa_config_validate(struct vhost_vdpa *v, struct vdpa_device *vdpa = v->vdpa; long size = vdpa->config->get_config_size(vdpa); - if (c->len == 0) + if (c->len == 0 || c->off > size) return -EINVAL; if (c->len > size - c->off) From dc1db0060c02d119fd4196924eff2d1129e9a442 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Dec 2021 18:09:56 +0300 Subject: [PATCH 055/361] vduse: check that offset is within bounds in get_config() This condition checks "len" but it does not check "offset" and that could result in an out of bounds read if "offset > dev->config_size". The problem is that since both variables are unsigned the "dev->config_size - offset" subtraction would result in a very high unsigned value. I think these checks might not be necessary because "len" and "offset" are supposed to already have been validated using the vhost_vdpa_config_validate() function. But I do not know the code perfectly, and I like to be safe. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211208150956.GA29160@kili Signed-off-by: Michael S. Tsirkin Cc: stable@vger.kernel.org --- drivers/vdpa/vdpa_user/vduse_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 1a206f95d73a8..eddcb64a910ac 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -655,7 +655,8 @@ static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, { struct vduse_dev *dev = vdpa_to_vduse(vdpa); - if (len > dev->config_size - offset) + if (offset > dev->config_size || + len > dev->config_size - offset) return; memcpy(buf, dev->config + offset, len); From 27d9839f17940e8edc475df616bbd9cf7ede8d05 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Dec 2021 09:50:18 +0100 Subject: [PATCH 056/361] virtio: always enter drivers/virtio/ When neither VIRTIO_PCI_LIB nor VIRTIO are enabled, but the alibaba vdpa driver is, the kernel runs into a link error because the legacy virtio module never gets built: x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_set_features': eni_vdpa.c:(.text+0x23f): undefined reference to `vp_legacy_set_features' x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_set_vq_state': eni_vdpa.c:(.text+0x2fe): undefined reference to `vp_legacy_get_queue_enable' x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_set_vq_address': eni_vdpa.c:(.text+0x376): undefined reference to `vp_legacy_set_queue_address' x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_set_vq_ready': eni_vdpa.c:(.text+0x3b4): undefined reference to `vp_legacy_set_queue_address' x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_free_irq': eni_vdpa.c:(.text+0x460): undefined reference to `vp_legacy_queue_vector' x86_64-linux-ld: eni_vdpa.c:(.text+0x4b7): undefined reference to `vp_legacy_config_vector' x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_reset': When VIRTIO_PCI_LIB was added, it was correctly added to drivers/Makefile as well, but for the legacy module, this is missing. Solve this by always entering drivers/virtio during the build and letting its Makefile take care of the individual options, rather than having a separate line for each sub-option. Fixes: 64b9f64f80a6 ("vdpa: introduce virtio pci driver") Fixes: e85087beedca ("eni_vdpa: add vDPA driver for Alibaba ENI") Fixes: d89c8169bd70 ("virtio-pci: introduce legacy device module") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211206085034.2836099-1-arnd@kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/Makefile b/drivers/Makefile index be5d40ae14882..a110338c860c7 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -41,8 +41,7 @@ obj-$(CONFIG_DMADEVICES) += dma/ # SOC specific infrastructure drivers. obj-y += soc/ -obj-$(CONFIG_VIRTIO) += virtio/ -obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio/ +obj-y += virtio/ obj-$(CONFIG_VDPA) += vdpa/ obj-$(CONFIG_XEN) += xen/ From 817fc978b5a29b039db0418a91072b31c9aab152 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Dec 2021 11:20:18 +0000 Subject: [PATCH 057/361] virtio_ring: Fix querying of maximum DMA mapping size for virtio device virtio_max_dma_size() returns the maximum DMA mapping size of the virtio device by querying dma_max_mapping_size() for the device when the DMA API is in use for the vring. Unfortunately, the device passed is initialised by register_virtio_device() and does not inherit the DMA configuration from its parent, resulting in SWIOTLB errors when bouncing is enabled and the default 256K mapping limit (IO_TLB_SEGSIZE) is not respected: | virtio-pci 0000:00:01.0: swiotlb buffer is full (sz: 294912 bytes), total 1024 (slots), used 725 (slots) Follow the pattern used elsewhere in the virtio_ring code when calling into the DMA layer and pass the parent device to dma_max_mapping_size() instead. Cc: Marc Zyngier Cc: Quentin Perret Cc: "Michael S. Tsirkin" Cc: Jason Wang Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20211201112018.25276-1-will@kernel.org Acked-by: Jason Wang Tested-by: Suzuki K Poulose Fixes: e6d6dd6c875e ("virtio: Introduce virtio_max_dma_size()") Cc: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: Christoph Hellwig Cc: Robin Murphy Signed-off-by: Steven Price Signed-off-by: Suzuki K Poulose Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6d2614e34470f..028b05d445460 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -268,7 +268,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev) size_t max_segment_size = SIZE_MAX; if (vring_use_dma_api(vdev)) - max_segment_size = dma_max_mapping_size(&vdev->dev); + max_segment_size = dma_max_mapping_size(vdev->dev.parent); return max_segment_size; } From 1db8f5fc2e5c66a5c51e1f6488e0ba7d45c29ae4 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 25 Nov 2021 20:18:23 -0500 Subject: [PATCH 058/361] virtio/vsock: fix the transport to work with VMADDR_CID_ANY The VMADDR_CID_ANY flag used by a socket means that the socket isn't bound to any specific CID. For example, a host vsock server may want to be bound with VMADDR_CID_ANY, so that a guest vsock client can connect to the host server with CID=VMADDR_CID_HOST (i.e. 2), and meanwhile, a host vsock client can connect to the same local server with CID=VMADDR_CID_LOCAL (i.e. 1). The current implementation sets the destination socket's svm_cid to a fixed CID value after the first client's connection, which isn't an expected operation. For example, if the guest client first connects to the host server, the server's svm_cid gets set to VMADDR_CID_HOST, then other host clients won't be able to connect to the server anymore. Reproduce steps: 1. Run the host server: socat VSOCK-LISTEN:1234,fork - 2. Run a guest client to connect to the host server: socat - VSOCK-CONNECT:2:1234 3. Run a host client to connect to the host server: socat - VSOCK-CONNECT:1:1234 Without this patch, step 3. above fails to connect, and socat complains "socat[1720] E connect(5, AF=40 cid:1 port:1234, 16): Connection reset by peer". With this patch, the above works well. Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Signed-off-by: Wei Wang Link: https://lore.kernel.org/r/20211126011823.1760-1-wei.w.wang@intel.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- net/vmw_vsock/virtio_transport_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 59ee1be5a6dd3..ec2c2afbf0d06 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1299,7 +1299,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, space_available = virtio_transport_space_update(sk, pkt); /* Update CID in case it has changed after a transport reset event */ - vsk->local_addr.svm_cid = dst.svm_cid; + if (vsk->local_addr.svm_cid != VMADDR_CID_ANY) + vsk->local_addr.svm_cid = dst.svm_cid; if (space_available) sk->sk_write_space(sk); From bb47620be322c5e9e372536cb6b54e17b3a00258 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 30 Nov 2021 06:29:49 +0200 Subject: [PATCH 059/361] vdpa: Consider device id larger than 31 virtio device id value can be more than 31. Hence, use BIT_ULL in assignment. Fixes: 33b347503f01 ("vdpa: Define vdpa mgmt device, ops and a netlink interface") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Parav Pandit Acked-by: Jason Wang Link: https://lore.kernel.org/r/20211130042949.88958-1-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 7332a74a4b00c..09bbe53c3ac4e 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -404,7 +404,8 @@ static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *m goto msg_err; while (mdev->id_table[i].device) { - supported_classes |= BIT(mdev->id_table[i].device); + if (mdev->id_table[i].device <= 63) + supported_classes |= BIT_ULL(mdev->id_table[i].device); i++; } From 09d97da660ff77df20984496aa0abcd6b88819f2 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 8 Dec 2021 17:27:19 +0800 Subject: [PATCH 060/361] MIPS: Only define pci_remap_iospace() for Ralink After commit 9f76779f2418 ("MIPS: implement architecture-specific 'pci_remap_iospace()'"), there exists the following warning on the Loongson64 platform: loongson-pci 1a000000.pci: IO 0x0018020000..0x001803ffff -> 0x0000020000 loongson-pci 1a000000.pci: MEM 0x0040000000..0x007fffffff -> 0x0040000000 ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1 at arch/mips/pci/pci-generic.c:55 pci_remap_iospace+0x84/0x90 resource start address is not zero ... Call Trace: [] show_stack+0x40/0x120 [] dump_stack_lvl+0x58/0x74 [] __warn+0xe0/0x110 [] warn_slowpath_fmt+0xa4/0xd0 [] pci_remap_iospace+0x84/0x90 [] devm_pci_remap_iospace+0x5c/0xb8 [] devm_of_pci_bridge_init+0x178/0x1f8 [] devm_pci_alloc_host_bridge+0x78/0x98 [] loongson_pci_probe+0x34/0x160 [] platform_probe+0x6c/0xe0 [] really_probe+0xbc/0x340 [] __driver_probe_device+0x98/0x110 [] driver_probe_device+0x50/0x118 [] __driver_attach+0x80/0x118 [] bus_for_each_dev+0x80/0xc8 [] bus_add_driver+0x130/0x210 [] driver_register+0x8c/0x150 [] do_one_initcall+0x54/0x288 [] kernel_init_freeable+0x27c/0x2e4 [] kernel_init+0x2c/0x134 [] ret_from_kernel_thread+0x14/0x1c ---[ end trace e4a0efe10aa5cce6 ]--- loongson-pci 1a000000.pci: error -19: failed to map resource [io 0x20000-0x3ffff] We can see that the resource start address is 0x0000020000, because the ISA Bridge used the zero address which is defined in the dts file arch/mips/boot/dts/loongson/ls7a-pch.dtsi: ISA Bridge: /bus@10000000/isa@18000000 IO 0x0000000018000000..0x000000001801ffff -> 0x0000000000000000 Based on the above analysis, the architecture-specific pci_remap_iospace() is not suitable for Loongson64, we should only define pci_remap_iospace() for Ralink on MIPS based on the commit background. Fixes: 9f76779f2418 ("MIPS: implement architecture-specific 'pci_remap_iospace()'") Suggested-by: Thomas Bogendoerfer Signed-off-by: Tiezhu Yang Tested-by: Sergio Paracuellos Acked-by: Sergio Paracuellos Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mach-ralink/spaces.h | 2 ++ arch/mips/include/asm/pci.h | 4 ---- arch/mips/pci/pci-generic.c | 2 ++ 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/mach-ralink/spaces.h b/arch/mips/include/asm/mach-ralink/spaces.h index 05d14c21c4178..f7af11ea2d612 100644 --- a/arch/mips/include/asm/mach-ralink/spaces.h +++ b/arch/mips/include/asm/mach-ralink/spaces.h @@ -6,5 +6,7 @@ #define PCI_IOSIZE SZ_64K #define IO_SPACE_LIMIT (PCI_IOSIZE - 1) +#define pci_remap_iospace pci_remap_iospace + #include #endif diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 421231f55935a..9ffc8192adae8 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -20,10 +20,6 @@ #include #include -#ifdef CONFIG_PCI_DRIVERS_GENERIC -#define pci_remap_iospace pci_remap_iospace -#endif - #ifdef CONFIG_PCI_DRIVERS_LEGACY /* diff --git a/arch/mips/pci/pci-generic.c b/arch/mips/pci/pci-generic.c index 18eb8a453a862..d2d68bac3d250 100644 --- a/arch/mips/pci/pci-generic.c +++ b/arch/mips/pci/pci-generic.c @@ -47,6 +47,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) pci_read_bridge_bases(bus); } +#ifdef pci_remap_iospace int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) { unsigned long vaddr; @@ -60,3 +61,4 @@ int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) set_io_port_base(vaddr); return 0; } +#endif From 842470c4e211f284a224842849b1fa81b130c154 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 20 Oct 2021 18:57:40 +0200 Subject: [PATCH 061/361] Revert "drm/fb-helper: improve DRM fbdev emulation device names" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b3484d2b03e4c940a9598aa841a52d69729c582a. That change attempted to improve the DRM drivers fbdev emulation device names to avoid having confusing names like "simpledrmdrmfb" in /proc/fb. But unfortunately, there are user-space programs such as pm-utils that match against the fbdev names and so broke after the mentioned commit. Since the names in /proc/fb are used by tools that consider it an uAPI, let's restore the old names even when this lead to silly names like the one mentioned above. Fixes: b3484d2b03e4 ("drm/fb-helper: improve DRM fbdev emulation device names") Reported-by: Johannes Stezenbach Signed-off-by: Javier Martinez Canillas Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211020165740.3011927-1-javierm@redhat.com --- drivers/gpu/drm/drm_fb_helper.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 8e7a124d6c5a3..22bf690910b25 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1743,7 +1743,13 @@ void drm_fb_helper_fill_info(struct fb_info *info, sizes->fb_width, sizes->fb_height); info->par = fb_helper; - snprintf(info->fix.id, sizeof(info->fix.id), "%s", + /* + * The DRM drivers fbdev emulation device name can be confusing if the + * driver name also has a "drm" suffix on it. Leading to names such as + * "simpledrmdrmfb" in /proc/fb. Unfortunately, it's an uAPI and can't + * be changed due user-space tools (e.g: pm-utils) matching against it. + */ + snprintf(info->fix.id, sizeof(info->fix.id), "%sdrmfb", fb_helper->dev->driver->name); } From 59ec71575ab440cd5ca0aa53b2a2985b3639fad4 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Mon, 29 Nov 2021 21:37:25 +0100 Subject: [PATCH 062/361] ucounts: Fix rlimit max values check The semantics of the rlimit max values differs from ucounts itself. When creating a new userns, we store the current rlimit of the process in ucount_max. Thus, the value of the limit in the parent userns is saved in the created one. The problem is that now we are taking the maximum value for counter from the same userns. So for init_user_ns it will always be RLIM_INFINITY. To fix the problem we need to check the counter value with the max value stored in userns. Reproducer: su - test -c "ulimit -u 3; sleep 5 & sleep 6 & unshare -U --map-root-user sh -c 'sleep 7 & sleep 8 & date; wait'" Before: [1] 175 [2] 176 Fri Nov 26 13:48:20 UTC 2021 [1]- Done sleep 5 [2]+ Done sleep 6 After: [1] 167 [2] 168 sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: Interrupted system call [1]- Done sleep 5 [2]+ Done sleep 6 Fixes: c54b245d0118 ("Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace") Reported-by: Gleb Fotengauer-Malinovskiy Signed-off-by: "Eric W. Biederman" Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/024ec805f6e16896f0b23e094773790d171d2c1c.1638218242.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- kernel/ucount.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 4f5613dac2273..7b32c356ebc5c 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -264,15 +264,16 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type) long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) { struct ucounts *iter; + long max = LONG_MAX; long ret = 0; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - long max = READ_ONCE(iter->ns->ucount_max[type]); long new = atomic_long_add_return(v, &iter->ucount[type]); if (new < 0 || new > max) ret = LONG_MAX; else if (iter == ucounts) ret = new; + max = READ_ONCE(iter->ns->ucount_max[type]); } return ret; } @@ -312,15 +313,16 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type) { /* Caller must hold a reference to ucounts */ struct ucounts *iter; + long max = LONG_MAX; long dec, ret = 0; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - long max = READ_ONCE(iter->ns->ucount_max[type]); long new = atomic_long_add_return(1, &iter->ucount[type]); if (new < 0 || new > max) goto unwind; if (iter == ucounts) ret = new; + max = READ_ONCE(iter->ns->ucount_max[type]); /* * Grab an extra ucount reference for the caller when * the rlimit count was previously 0. @@ -339,15 +341,16 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type) return 0; } -bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max) +bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long rlimit) { struct ucounts *iter; - if (get_ucounts_value(ucounts, type) > max) - return true; + long max = rlimit; + if (rlimit > LONG_MAX) + max = LONG_MAX; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - max = READ_ONCE(iter->ns->ucount_max[type]); if (get_ucounts_value(iter, type) > max) return true; + max = READ_ONCE(iter->ns->ucount_max[type]); } return false; } From 266423e60ea1b953fcc0cd97f3dad85857e434d1 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Mon, 6 Dec 2021 09:22:36 +0000 Subject: [PATCH 063/361] pinctrl: bcm2835: Change init order for gpio hogs ...and gpio-ranges pinctrl-bcm2835 is a combined pinctrl/gpio driver. Currently the gpio side is registered first, but this breaks gpio hogs (which are configured during gpiochip_add_data). Part of the hog initialisation is a call to pinctrl_gpio_request, and since the pinctrl driver hasn't yet been registered this results in an -EPROBE_DEFER from which it can never recover. Change the initialisation sequence to register the pinctrl driver first. This also solves a similar problem with the gpio-ranges property, which is required in order for released pins to be returned to inputs. Fixes: 73345a18d464b ("pinctrl: bcm2835: Pass irqchip when adding gpiochip") Signed-off-by: Phil Elwell Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20211206092237.4105895-2-phil@raspberrypi.com Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 29 +++++++++++++++------------ 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 2abcc6ce4eba3..b607d10e4cbd8 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -1244,6 +1244,18 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) raw_spin_lock_init(&pc->irq_lock[i]); } + pc->pctl_desc = *pdata->pctl_desc; + pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc); + if (IS_ERR(pc->pctl_dev)) { + gpiochip_remove(&pc->gpio_chip); + return PTR_ERR(pc->pctl_dev); + } + + pc->gpio_range = *pdata->gpio_range; + pc->gpio_range.base = pc->gpio_chip.base; + pc->gpio_range.gc = &pc->gpio_chip; + pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range); + girq = &pc->gpio_chip.irq; girq->chip = &bcm2835_gpio_irq_chip; girq->parent_handler = bcm2835_gpio_irq_handler; @@ -1251,8 +1263,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) girq->parents = devm_kcalloc(dev, BCM2835_NUM_IRQS, sizeof(*girq->parents), GFP_KERNEL); - if (!girq->parents) + if (!girq->parents) { + pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); return -ENOMEM; + } if (is_7211) { pc->wake_irq = devm_kcalloc(dev, BCM2835_NUM_IRQS, @@ -1307,21 +1321,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) err = gpiochip_add_data(&pc->gpio_chip, pc); if (err) { dev_err(dev, "could not add GPIO chip\n"); + pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); return err; } - pc->pctl_desc = *pdata->pctl_desc; - pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc); - if (IS_ERR(pc->pctl_dev)) { - gpiochip_remove(&pc->gpio_chip); - return PTR_ERR(pc->pctl_dev); - } - - pc->gpio_range = *pdata->gpio_range; - pc->gpio_range.base = pc->gpio_chip.base; - pc->gpio_range.gc = &pc->gpio_chip; - pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range); - return 0; } From 92816e2629808726af015c7f5b14adc8e4f8b147 Mon Sep 17 00:00:00 2001 From: Jie2x Zhou Date: Thu, 9 Dec 2021 10:02:30 +0800 Subject: [PATCH 064/361] selftests: net: Correct ping6 expected rc from 2 to 1 ./fcnal-test.sh -v -t ipv6_ping TEST: ping out, VRF bind - ns-B IPv6 LLA [FAIL] TEST: ping out, VRF bind - multicast IP [FAIL] ping6 is failing as it should. COMMAND: ip netns exec ns-A /bin/ping6 -c1 -w1 fe80::7c4c:bcff:fe66:a63a%red strace of ping6 shows it is failing with '1', so change the expected rc from 2 to 1. Fixes: c0644e71df33 ("selftests: Add ipv6 ping tests to fcnal-test") Reported-by: kernel test robot Suggested-by: David Ahern Signed-off-by: Jie2x Zhou Link: https://lore.kernel.org/r/20211209020230.37270-1-jie2x.zhou@intel.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index a1da013d847b9..29cc72d7c3d0a 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -2191,7 +2191,7 @@ ipv6_ping_vrf() log_start show_hint "Fails since VRF device does not support linklocal or multicast" run_cmd ${ping6} -c1 -w1 ${a} - log_test_addr ${a} $? 2 "ping out, VRF bind" + log_test_addr ${a} $? 1 "ping out, VRF bind" done for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} From 3fd6e12a401ead0345e4b7e6a73e117f0713e0c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 9 Dec 2021 21:18:13 -0800 Subject: [PATCH 065/361] Input: goodix - fix memory leak in goodix_firmware_upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses-Coverity-ID: 1493934 ("Resource leak") Signed-off-by: José Expósito Link: https://lore.kernel.org/r/20211208173321.26659-1-jose.exposito89@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix_fwupload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix_fwupload.c b/drivers/input/touchscreen/goodix_fwupload.c index c1e7a24130782..191d4f38d991e 100644 --- a/drivers/input/touchscreen/goodix_fwupload.c +++ b/drivers/input/touchscreen/goodix_fwupload.c @@ -207,7 +207,7 @@ static int goodix_firmware_upload(struct goodix_ts_data *ts) error = goodix_reset_no_int_sync(ts); if (error) - return error; + goto release; error = goodix_enter_upload_mode(ts->client); if (error) From edce10ee21f3916f5da34e55bbc03103c604ba70 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Wed, 8 Dec 2021 14:07:40 +0100 Subject: [PATCH 066/361] s390/kexec_file: print some more error messages Be kind and give some more information on what went wrong. Signed-off-by: Philipp Rudo Link: https://lore.kernel.org/r/20211208130741.5821-2-prudo@redhat.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 9975ad200d747..a8bfa7c8cbba3 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -7,6 +7,8 @@ * Author(s): Philipp Rudo */ +#define pr_fmt(fmt) "kexec: " fmt + #include #include #include @@ -290,9 +292,16 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab) { + const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; Elf_Rela *relas; int i, r_type; + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; + relas = (void *)pi->ehdr + relsec->sh_offset; for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { @@ -304,15 +313,27 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, sym = (void *)pi->ehdr + symtab->sh_offset; sym += ELF64_R_SYM(relas[i].r_info); - if (sym->st_shndx == SHN_UNDEF) + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + if (sym->st_shndx == SHN_UNDEF) { + pr_err("Undefined symbol: %s\n", name); return -ENOEXEC; + } - if (sym->st_shndx == SHN_COMMON) + if (sym->st_shndx == SHN_COMMON) { + pr_err("symbol '%s' in common section\n", name); return -ENOEXEC; + } if (sym->st_shndx >= pi->ehdr->e_shnum && - sym->st_shndx != SHN_ABS) + sym->st_shndx != SHN_ABS) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); return -ENOEXEC; + } loc = pi->purgatory_buf; loc += section->sh_offset; From 41967a37b8eedfee15b81406a9f3015be90d3980 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Wed, 8 Dec 2021 14:07:41 +0100 Subject: [PATCH 067/361] s390/kexec_file: fix error handling when applying relocations arch_kexec_apply_relocations_add currently ignores all errors returned by arch_kexec_do_relocs. This means that every unknown relocation is silently skipped causing unpredictable behavior while the relocated code runs. Fix this by checking for errors and fail kexec_file_load if an unknown relocation type is encountered. The problem was found after gcc changed its behavior and used R_390_PLT32DBL relocations for brasl instruction and relied on ld to resolve the relocations in the final link in case direct calls are possible. As the purgatory code is only linked partially (option -r) ld didn't resolve the relocations leaving them for arch_kexec_do_relocs. But arch_kexec_do_relocs doesn't know how to handle R_390_PLT32DBL relocations so they were silently skipped. This ultimately caused an endless loop in the purgatory as the brasl instructions kept branching to itself. Fixes: 71406883fd35 ("s390/kexec_file: Add kexec_file_load system call") Reported-by: Tao Liu Signed-off-by: Philipp Rudo Link: https://lore.kernel.org/r/20211208130741.5821-3-prudo@redhat.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index a8bfa7c8cbba3..876cdd3c994e0 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -296,6 +296,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *sechdrs; Elf_Rela *relas; int i, r_type; + int ret; /* String & section header string table */ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; @@ -347,7 +348,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, addr = section->sh_addr + relas[i].r_offset; r_type = ELF64_R_TYPE(relas[i].r_info); - arch_kexec_do_relocs(r_type, loc, val, addr); + ret = arch_kexec_do_relocs(r_type, loc, val, addr); + if (ret) { + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } } return 0; } From ac8fc6af1ab62b2e5d57ddadc8bd4c9433c49a72 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 8 Dec 2021 16:15:03 +0100 Subject: [PATCH 068/361] s390/ftrace: remove preempt_disable()/preempt_enable() pair It looks like commit ce5e48036c9e76a2 ("ftrace: disable preemption when recursion locked") missed a spot in kprobe_ftrace_handler() in arch/s390/kernel/ftrace.c. Remove the superfluous preempt_disable/enable_notrace() there too. Fixes: ce5e48036c9e76a2 ("ftrace: disable preemption when recursion locked") Signed-off-by: Jerome Marchand Link: https://lore.kernel.org/r/20211208151503.1510381-1-jmarchan@redhat.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/ftrace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 5510c7d10ddc3..21d62d8b6b9af 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -290,7 +290,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -318,7 +317,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } __this_cpu_write(current_kprobe, NULL); out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); From abf0e8e4ef25478a4390115e6a953d589d1f9ffd Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Thu, 9 Dec 2021 08:38:17 +0100 Subject: [PATCH 069/361] s390/kexec: handle R_390_PLT32DBL rela in arch_kexec_apply_relocations_add() Starting with gcc 11.3, the C compiler will generate PLT-relative function calls even if they are local and do not require it. Later on during linking, the linker will replace all PLT-relative calls to local functions with PC-relative ones. Unfortunately, the purgatory code of kexec/kdump is not being linked as a regular executable or shared library would have been, and therefore, all PLT-relative addresses remain in the generated purgatory object code unresolved. This leads to the situation where the purgatory code is being executed during kdump with all PLT-relative addresses unresolved. And this results in endless loops within the purgatory code. Furthermore, the clang C compiler has always behaved like described above and this commit should fix kdump for kernels built with the latter. Because the purgatory code is no regular executable or shared library, contains only calls to local functions and has no PLT, all R_390_PLT32DBL relocation entries can be resolved just like a R_390_PC32DBL one. * https://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_zSeries/x1633.html#AEN1699 Relocation entries of purgatory code generated with gcc 11.3 ------------------------------------------------------------ $ readelf -r linux/arch/s390/purgatory/purgatory.o Relocation section '.rela.text' at offset 0x370 contains 5 entries: Offset Info Type Sym. Value Sym. Name + Addend 00000000005c 000c00000013 R_390_PC32DBL 0000000000000000 purgatory_sha_regions + 2 00000000007a 000d00000014 R_390_PLT32DBL 0000000000000000 sha256_update + 2 00000000008c 000e00000014 R_390_PLT32DBL 0000000000000000 sha256_final + 2 000000000092 000800000013 R_390_PC32DBL 0000000000000000 .LC0 + 2 0000000000a0 000f00000014 R_390_PLT32DBL 0000000000000000 memcmp + 2 Relocation entries of purgatory code generated with gcc 11.2 ------------------------------------------------------------ $ readelf -r linux/arch/s390/purgatory/purgatory.o Relocation section '.rela.text' at offset 0x368 contains 5 entries: Offset Info Type Sym. Value Sym. Name + Addend 00000000005c 000c00000013 R_390_PC32DBL 0000000000000000 purgatory_sha_regions + 2 00000000007a 000d00000013 R_390_PC32DBL 0000000000000000 sha256_update + 2 00000000008c 000e00000013 R_390_PC32DBL 0000000000000000 sha256_final + 2 000000000092 000800000013 R_390_PC32DBL 0000000000000000 .LC0 + 2 0000000000a0 000f00000013 R_390_PC32DBL 0000000000000000 memcmp + 2 Signed-off-by: Alexander Egorenkov Reported-by: Tao Liu Suggested-by: Philipp Rudo Reviewed-by: Philipp Rudo Cc: Link: https://lore.kernel.org/r/20211209073817.82196-1-egorenar@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 876cdd3c994e0..8f43575a4dd32 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -348,6 +348,10 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, addr = section->sh_addr + relas[i].r_offset; r_type = ELF64_R_TYPE(relas[i].r_info); + + if (r_type == R_390_PLT32DBL) + r_type = R_390_PC32DBL; + ret = arch_kexec_do_relocs(r_type, loc, val, addr); if (ret) { pr_err("Unknown rela relocation: %d\n", r_type); From 5dcf0c3084eb098bbb702f2f5ee55666047997d4 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 9 Dec 2021 15:21:06 +0100 Subject: [PATCH 070/361] s390: enable switchdev support in defconfig The HiperSockets Converged Interface (HSCI) introduced with commit 4e20e73e631a ("s390/qeth: Switchdev event handler") requires CONFIG_SWITCHDEV=y to be usable. Similarly when using Linux controlled SR-IOV capable PF devices with the mlx5_core driver CONFIG_SWITCHDEV=y as well as CONFIG_MLX5_ESWITCH=y are necessary to actually get link on the created VFs. So let's add these to the defconfig to make both types of devices usable. Note also that these options are already enabled in most current distribution kernels. Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 2 ++ arch/s390/configs/defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index b626bc6e0eaf9..e45cc27716dee 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -117,6 +117,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -511,6 +512,7 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y +CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 0056cab273723..1c750bfca2d8d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -109,6 +109,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -502,6 +503,7 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y +CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set From ab443c53916730862cec202078d36fd4008bea79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Dec 2021 06:20:46 -0800 Subject: [PATCH 071/361] sch_cake: do not call cake_destroy() from cake_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qdiscs are not supposed to call their own destroy() method from init(), because core stack already does that. syzbot was able to trigger use after free: DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 21902 at kernel/locking/mutex.c:586 __mutex_lock_common kernel/locking/mutex.c:586 [inline] WARNING: CPU: 0 PID: 21902 at kernel/locking/mutex.c:586 __mutex_lock+0x9ec/0x12f0 kernel/locking/mutex.c:740 Modules linked in: CPU: 0 PID: 21902 Comm: syz-executor189 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__mutex_lock_common kernel/locking/mutex.c:586 [inline] RIP: 0010:__mutex_lock+0x9ec/0x12f0 kernel/locking/mutex.c:740 Code: 08 84 d2 0f 85 19 08 00 00 8b 05 97 38 4b 04 85 c0 0f 85 27 f7 ff ff 48 c7 c6 20 00 ac 89 48 c7 c7 a0 fe ab 89 e8 bf 76 ba ff <0f> 0b e9 0d f7 ff ff 48 8b 44 24 40 48 8d b8 c8 08 00 00 48 89 f8 RSP: 0018:ffffc9000627f290 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88802315d700 RSI: ffffffff815f1db8 RDI: fffff52000c4fe44 RBP: ffff88818f28e000 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815ebb5e R11: 0000000000000000 R12: 0000000000000000 R13: dffffc0000000000 R14: ffffc9000627f458 R15: 0000000093c30000 FS: 0000555556abc400(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fda689c3303 CR3: 000000001cfbb000 CR4: 0000000000350ef0 Call Trace: tcf_chain0_head_change_cb_del+0x2e/0x3d0 net/sched/cls_api.c:810 tcf_block_put_ext net/sched/cls_api.c:1381 [inline] tcf_block_put_ext net/sched/cls_api.c:1376 [inline] tcf_block_put+0xbc/0x130 net/sched/cls_api.c:1394 cake_destroy+0x3f/0x80 net/sched/sch_cake.c:2695 qdisc_create.constprop.0+0x9da/0x10f0 net/sched/sch_api.c:1293 tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5571 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2496 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x904/0xdf0 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2409 ___sys_sendmsg+0xf3/0x170 net/socket.c:2463 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2492 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f1bb06badb9 Code: Unable to access opcode bytes at RIP 0x7f1bb06bad8f. RSP: 002b:00007fff3012a658 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f1bb06badb9 RDX: 0000000000000000 RSI: 00000000200007c0 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000000000003 R10: 0000000000000003 R11: 0000000000000246 R12: 00007fff3012a688 R13: 00007fff3012a6a0 R14: 00007fff3012a6e0 R15: 00000000000013c2 Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20211210142046.698336-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 3c2300d144681..857aaebd49f43 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2736,7 +2736,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), GFP_KERNEL); if (!q->tins) - goto nomem; + return -ENOMEM; for (i = 0; i < CAKE_MAX_TINS; i++) { struct cake_tin_data *b = q->tins + i; @@ -2766,10 +2766,6 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->min_netlen = ~0; q->min_adjlen = ~0; return 0; - -nomem: - cake_destroy(sch); - return -ENOMEM; } static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) From 345e004d023343d38088fdfea39688aa11e06ccf Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 10 Dec 2021 00:46:31 +0100 Subject: [PATCH 072/361] bpf: Fix incorrect state pruning for <8B spill/fill Commit 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill") introduced support in the verifier to track <8B spill/fills of scalars. The backtracking logic for the precision bit was however skipping spill/fills of less than 8B. That could cause state pruning to consider two states equivalent when they shouldn't be. As an example, consider the following bytecode snippet: 0: r7 = r1 1: call bpf_get_prandom_u32 2: r6 = 2 3: if r0 == 0 goto pc+1 4: r6 = 3 ... 8: [state pruning point] ... /* u32 spill/fill */ 10: *(u32 *)(r10 - 8) = r6 11: r8 = *(u32 *)(r10 - 8) 12: r0 = 0 13: if r8 == 3 goto pc+1 14: r0 = 1 15: exit The verifier first walks the path with R6=3. Given the support for <8B spill/fills, at instruction 13, it knows the condition is true and skips instruction 14. At that point, the backtracking logic kicks in but stops at the fill instruction since it only propagates the precision bit for 8B spill/fill. When the verifier then walks the path with R6=2, it will consider it safe at instruction 8 because R6 is not marked as needing precision. Instruction 14 is thus never walked and is then incorrectly removed as 'dead code'. It's also possible to lead the verifier to accept e.g. an out-of-bound memory access instead of causing an incorrect dead code elimination. This regression was found via Cilium's bpf-next CI where it was causing a conntrack map update to be silently skipped because the code had been removed by the verifier. This commit fixes it by enabling support for <8B spill/fills in the bactracking logic. In case of a <8B spill/fill, the full 8B stack slot will be marked as needing precision. Then, in __mark_chain_precision, any tracked register spilled in a marked slot will itself be marked as needing precision, regardless of the spill size. This logic makes two assumptions: (1) only 8B-aligned spill/fill are tracked and (2) spilled registers are only tracked if the spill and fill sizes are equal. Commit ef979017b837 ("bpf: selftest: Add verifier tests for <8-byte scalar spill and refill") covers the first assumption and the next commit in this patchset covers the second. Fixes: 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill") Signed-off-by: Paul Chaignon Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f3001937bbb93..f2f1ed34cfe9b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2379,8 +2379,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, */ if (insn->src_reg != BPF_REG_FP) return 0; - if (BPF_SIZE(insn->code) != BPF_DW) - return 0; /* dreg = *(u64 *)[fp - off] was a fill from the stack. * that [fp - off] slot contains scalar that needs to be @@ -2403,8 +2401,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, /* scalars can only be spilled into stack */ if (insn->dst_reg != BPF_REG_FP) return 0; - if (BPF_SIZE(insn->code) != BPF_DW) - return 0; spi = (-insn->off - 1) / BPF_REG_SIZE; if (spi >= 64) { verbose(env, "BUG spi %d\n", spi); From 0be2516f865f5a876837184a8385163ff64a5889 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 10 Dec 2021 00:47:00 +0100 Subject: [PATCH 073/361] selftests/bpf: Tests for state pruning with u32 spill/fill This patch adds tests for the verifier's tracking for spilled, <8B registers. The first two test cases ensure the verifier doesn't incorrectly prune states in case of <8B spill/fills. The last one simply checks that a filled u64 register is marked unknown if the register spilled in the same slack slot was less than 8B. The map value access at the end of the first program is only incorrect for the path R6=32. If the precision bit for register R8 isn't backtracked through the u32 spill/fill, the R6=32 path is pruned at instruction 9 and the program is incorrectly accepted. The second program is a variation of the same with u32 spills and a u64 fill. The additional instructions to introduce the first pruning point may be a bit fragile as they depend on the heuristics for pruning points in the verifier (currently at least 8 instructions and 2 jumps). If the heuristics are changed, the pruning point may move (e.g., to the subsequent jump) or disappear, which would cause the test to always pass. Signed-off-by: Paul Chaignon Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/search_pruning.c | 71 +++++++++++++++++++ .../selftests/bpf/verifier/spill_fill.c | 32 +++++++++ 2 files changed, 103 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c index 7e50cb80873a5..682519769fe3c 100644 --- a/tools/testing/selftests/bpf/verifier/search_pruning.c +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -132,6 +132,77 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, +{ + "precision tracking for u32 spill/fill", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV32_IMM(BPF_REG_6, 32), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_MOV32_IMM(BPF_REG_6, 4), + /* Additional insns to introduce a pruning point. */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + /* u32 spill/fill */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_10, -8), + /* out-of-bound map value access for r6=32 */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 15 }, + .result = REJECT, + .errstr = "R0 min value is outside of the allowed memory range", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "precision tracking for u32 spills, u64 fill", + .insns = { + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_7, 0xffffffff), + /* Additional insns to introduce a pruning point. */ + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_ALU32_IMM(BPF_DIV, BPF_REG_3, 0), + /* u32 spills, u64 fill */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, -8), + /* if r8 != X goto pc+1 r8 known in fallthrough branch */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0xffffffff, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + /* if r8 == X goto pc+1 condition always true on first + * traversal, so starts backtracking to mark r8 as requiring + * precision. r7 marked as needing precision. r6 not marked + * since it's not tracked. + */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 0xffffffff, 1), + /* fails if r8 correctly marked unknown after fill. */ + BPF_ALU32_IMM(BPF_DIV, BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "div by zero", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, { "allocated_stack", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 7ab3de1087614..6c907144311f8 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -175,6 +175,38 @@ .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + "Spill u32 const scalars. Refill as u64. Offset to skb->data", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + /* r6 = 0 */ + BPF_MOV32_IMM(BPF_REG_6, 0), + /* r7 = 20 */ + BPF_MOV32_IMM(BPF_REG_7, 20), + /* *(u32 *)(r10 -4) = r6 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4), + /* *(u32 *)(r10 -8) = r7 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8), + /* r4 = *(u64 *)(r10 -8) */ + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8), + /* r0 = r2 */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */ + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, { "Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data", .insns = { From bcd0f93353326954817a4f9fa55ec57fb38acbb0 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Thu, 9 Dec 2021 16:28:39 +0800 Subject: [PATCH 074/361] phonet: refcount leak in pep_sock_accep sock_hold(sk) is invoked in pep_sock_accept(), but __sock_put(sk) is not invoked in subsequent failure branches(pep_accept_conn() != 0). Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20211209082839.33985-1-hbh25y@gmail.com Signed-off-by: Jakub Kicinski --- net/phonet/pep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index a1525916885ae..b4f90afb0638b 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -868,6 +868,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, err = pep_accept_conn(newsk, skb); if (err) { + __sock_put(sk); sock_put(newsk); newsk = NULL; goto drop; From 71ddeac8cd1d217744a0e060ff520e147c9328d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Dec 2021 10:50:58 -0800 Subject: [PATCH 075/361] inet_diag: fix kernel-infoleak for UDP sockets KMSAN reported a kernel-infoleak [1], that can exploited by unpriv users. After analysis it turned out UDP was not initializing r->idiag_expires. Other users of inet_sk_diag_fill() might make the same mistake in the future, so fix this in inet_sk_diag_fill(). [1] BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:121 [inline] BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:156 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x69d/0x25c0 lib/iov_iter.c:670 instrument_copy_to_user include/linux/instrumented.h:121 [inline] copyout lib/iov_iter.c:156 [inline] _copy_to_iter+0x69d/0x25c0 lib/iov_iter.c:670 copy_to_iter include/linux/uio.h:155 [inline] simple_copy_to_iter+0xf3/0x140 net/core/datagram.c:519 __skb_datagram_iter+0x2cb/0x1280 net/core/datagram.c:425 skb_copy_datagram_iter+0xdc/0x270 net/core/datagram.c:533 skb_copy_datagram_msg include/linux/skbuff.h:3657 [inline] netlink_recvmsg+0x660/0x1c60 net/netlink/af_netlink.c:1974 sock_recvmsg_nosec net/socket.c:944 [inline] sock_recvmsg net/socket.c:962 [inline] sock_read_iter+0x5a9/0x630 net/socket.c:1035 call_read_iter include/linux/fs.h:2156 [inline] new_sync_read fs/read_write.c:400 [inline] vfs_read+0x1631/0x1980 fs/read_write.c:481 ksys_read+0x28c/0x520 fs/read_write.c:619 __do_sys_read fs/read_write.c:629 [inline] __se_sys_read fs/read_write.c:627 [inline] __x64_sys_read+0xdb/0x120 fs/read_write.c:627 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was created at: slab_post_alloc_hook mm/slab.h:524 [inline] slab_alloc_node mm/slub.c:3251 [inline] __kmalloc_node_track_caller+0xe0c/0x1510 mm/slub.c:4974 kmalloc_reserve net/core/skbuff.c:354 [inline] __alloc_skb+0x545/0xf90 net/core/skbuff.c:426 alloc_skb include/linux/skbuff.h:1126 [inline] netlink_dump+0x3d5/0x16a0 net/netlink/af_netlink.c:2245 __netlink_dump_start+0xd1c/0xee0 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:254 [inline] inet_diag_handler_cmd+0x2e7/0x400 net/ipv4/inet_diag.c:1343 sock_diag_rcv_msg+0x24a/0x620 netlink_rcv_skb+0x447/0x800 net/netlink/af_netlink.c:2491 sock_diag_rcv+0x63/0x80 net/core/sock_diag.c:276 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x1095/0x1360 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x16f3/0x1870 net/netlink/af_netlink.c:1916 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] sock_write_iter+0x594/0x690 net/socket.c:1057 do_iter_readv_writev+0xa7f/0xc70 do_iter_write+0x52c/0x1500 fs/read_write.c:851 vfs_writev fs/read_write.c:924 [inline] do_writev+0x63f/0xe30 fs/read_write.c:967 __do_sys_writev fs/read_write.c:1040 [inline] __se_sys_writev fs/read_write.c:1037 [inline] __x64_sys_writev+0xe5/0x120 fs/read_write.c:1037 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Bytes 68-71 of 312 are uninitialized Memory access of size 312 starts at ffff88812ab54000 Data copied to user address 0000000020001440 CPU: 1 PID: 6365 Comm: syz-executor801 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 3c4d05c80567 ("inet_diag: Introduce the inet socket dumping routine") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20211209185058.53917-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_diag.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c8fa6e7f7d124..581b5b2d72a5b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -261,6 +261,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; + r->idiag_expires = 0; if (inet_diag_msg_attrs_fill(sk, skb, r, ext, sk_user_ns(NETLINK_CB(cb->skb).sk), @@ -314,9 +315,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); - } else { - r->idiag_timer = 0; - r->idiag_expires = 0; } if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { From 94f2a444f28a649926c410eb9a38afb13a83ebe0 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 10 Dec 2021 10:57:22 +0100 Subject: [PATCH 076/361] net: usb: qmi_wwan: add Telit 0x1070 composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following Telit FN990 composition: 0x1070: tty, adb, rmnet, tty, tty, tty, tty Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20211210095722.22269-1-dnlplm@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 86b814e99224c..f510e82194705 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1358,6 +1358,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ From ee60e626d536da4c710b3634afe68fe7c6d69b59 Mon Sep 17 00:00:00 2001 From: Filip Pokryvka Date: Fri, 10 Dec 2021 18:50:32 +0100 Subject: [PATCH 077/361] netdevsim: don't overwrite read only ethtool parms Ethtool ring feature has _max_pending attributes read-only. Set only read-write attributes in nsim_set_ringparam. This patch is useful, if netdevsim device is set-up using NetworkManager, because NetworkManager sends 0 as MAX values, as it is pointless to retrieve them in extra call, because they should be read-only. Then, the device is left in incosistent state (value > MAX). Fixes: a7fc6db099b5 ("netdevsim: support ethtool ring and coalesce settings") Signed-off-by: Filip Pokryvka Link: https://lore.kernel.org/r/20211210175032.411872-1-fpokryvk@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/ethtool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 0ab6a40be6114..a6a713b31aad9 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -77,7 +77,10 @@ static int nsim_set_ringparam(struct net_device *dev, { struct netdevsim *ns = netdev_priv(dev); - memcpy(&ns->ethtool.ring, ring, sizeof(ns->ethtool.ring)); + ns->ethtool.ring.rx_pending = ring->rx_pending; + ns->ethtool.ring.rx_jumbo_pending = ring->rx_jumbo_pending; + ns->ethtool.ring.rx_mini_pending = ring->rx_mini_pending; + ns->ethtool.ring.tx_pending = ring->tx_pending; return 0; } From 3748939bce3fc7a15ef07161826507fbe410bb7a Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 10 Dec 2021 15:25:23 +0800 Subject: [PATCH 078/361] selftests: icmp_redirect: pass xfail=0 to log_test() If any sub-test in this icmp_redirect.sh is failing but not expected to fail. The script will complain: ./icmp_redirect.sh: line 72: [: 1: unary operator expected This is because when the sub-test is not expected to fail, we won't pass any value for the xfail local variable in log_test() and thus it's empty. Fix this by passing 0 as the 4th variable to log_test() for non-xfail cases. v2: added fixes tag Fixes: 0a36a75c6818 ("selftests: icmp_redirect: support expected failures") Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/icmp_redirect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index ecbf57f264ed9..7b9d6e31b8e7d 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -311,7 +311,7 @@ check_exception() ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -E -v 'mtu|redirected' | grep -q "cache" fi - log_test $? 0 "IPv4: ${desc}" + log_test $? 0 "IPv4: ${desc}" 0 # No PMTU info for test "redirect" and "mtu exception plus redirect" if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then From 27cbf64a766e86f068ce6214f04c00ceb4db1af4 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Fri, 10 Dec 2021 21:09:33 +0800 Subject: [PATCH 079/361] net: hns3: fix use-after-free bug in hclgevf_send_mbx_msg Currently, the hns3_remove function firstly uninstall client instance, and then uninstall acceletion engine device. The netdevice is freed in client instance uninstall process, but acceletion engine device uninstall process still use it to trace runtime information. This causes a use after free problem. So fixes it by check the instance register state to avoid use after free. Fixes: d8355240cf8f ("net: hns3: add trace event support for PF/VF mailbox") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index fdc66fae09601..c5ac6ecf36e10 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -114,7 +114,8 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, memcpy(&req->msg, send_msg, sizeof(struct hclge_vf_to_pf_msg)); - trace_hclge_vf_mbx_send(hdev, req); + if (test_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state)) + trace_hclge_vf_mbx_send(hdev, req); /* synchronous send */ if (need_resp) { From 6dde452bceca3f2ed2b33bc46a16ff5682a03a2e Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Fri, 10 Dec 2021 21:09:34 +0800 Subject: [PATCH 080/361] net: hns3: fix race condition in debugfs When multiple threads concurrently access the debugfs content, data and pointer exceptions may occur. Therefore, mutex lock protection is added for debugfs. Fixes: 5e69ea7ee2a6 ("net: hns3: refactor the debugfs process") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 ++ .../ethernet/hisilicon/hns3/hns3_debugfs.c | 20 +++++++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 3f7a9a4c59d56..63f5abcc6bf41 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -839,6 +839,8 @@ struct hnae3_handle { u8 netdev_flags; struct dentry *hnae3_dbgfs; + /* protects concurrent contention between debugfs commands */ + struct mutex dbgfs_lock; /* Network interface message level enabled bits */ u32 msg_enable; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 081295bff7654..c381f8af67f08 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -1226,6 +1226,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, if (ret) return ret; + mutex_lock(&handle->dbgfs_lock); save_buf = &hns3_dbg_cmd[index].buf; if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) || @@ -1238,15 +1239,15 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, read_buf = *save_buf; } else { read_buf = kvzalloc(hns3_dbg_cmd[index].buf_len, GFP_KERNEL); - if (!read_buf) - return -ENOMEM; + if (!read_buf) { + ret = -ENOMEM; + goto out; + } /* save the buffer addr until the last read operation */ *save_buf = read_buf; - } - /* get data ready for the first time to read */ - if (!*ppos) { + /* get data ready for the first time to read */ ret = hns3_dbg_read_cmd(dbg_data, hns3_dbg_cmd[index].cmd, read_buf, hns3_dbg_cmd[index].buf_len); if (ret) @@ -1255,8 +1256,10 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, size = simple_read_from_buffer(buffer, count, ppos, read_buf, strlen(read_buf)); - if (size > 0) + if (size > 0) { + mutex_unlock(&handle->dbgfs_lock); return size; + } out: /* free the buffer for the last read operation */ @@ -1265,6 +1268,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, *save_buf = NULL; } + mutex_unlock(&handle->dbgfs_lock); return ret; } @@ -1337,6 +1341,8 @@ int hns3_dbg_init(struct hnae3_handle *handle) debugfs_create_dir(hns3_dbg_dentry[i].name, handle->hnae3_dbgfs); + mutex_init(&handle->dbgfs_lock); + for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) { if ((hns3_dbg_cmd[i].cmd == HNAE3_DBG_CMD_TM_NODES && ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2) || @@ -1363,6 +1369,7 @@ int hns3_dbg_init(struct hnae3_handle *handle) return 0; out: + mutex_destroy(&handle->dbgfs_lock); debugfs_remove_recursive(handle->hnae3_dbgfs); handle->hnae3_dbgfs = NULL; return ret; @@ -1378,6 +1385,7 @@ void hns3_dbg_uninit(struct hnae3_handle *handle) hns3_dbg_cmd[i].buf = NULL; } + mutex_destroy(&handle->dbgfs_lock); debugfs_remove_recursive(handle->hnae3_dbgfs); handle->hnae3_dbgfs = NULL; } From 7e0147592b5c4f9e2eb8c54a7857a56d4863f74e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 11 Dec 2021 10:11:30 -0700 Subject: [PATCH 081/361] selftests: Add duplicate config only for MD5 VRF tests Commit referenced below added configuration in the default VRF that duplicates a VRF to check MD5 passwords are properly used and fail when expected. That config should not be added all the time as it can cause tests to pass that should not (by matching on default VRF setup when it should not). Move the duplicate setup to a function that is only called for the MD5 tests and add a cleanup function to remove it after the MD5 tests. Fixes: 5cad8bce26e0 ("fcnal-test: Add TCP MD5 tests for VRF") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 26 +++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 29cc72d7c3d0a..dd7437dd2680b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -455,6 +455,22 @@ cleanup() ip netns del ${NSC} >/dev/null 2>&1 } +cleanup_vrf_dup() +{ + ip link del ${NSA_DEV2} >/dev/null 2>&1 + ip netns pids ${NSC} | xargs kill 2>/dev/null + ip netns del ${NSC} >/dev/null 2>&1 +} + +setup_vrf_dup() +{ + # some VRF tests use ns-C which has the same config as + # ns-B but for a device NOT in the VRF + create_ns ${NSC} "-" "-" + connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ + ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 +} + setup() { local with_vrf=${1} @@ -484,12 +500,6 @@ setup() ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV} ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV} - - # some VRF tests use ns-C which has the same config as - # ns-B but for a device NOT in the VRF - create_ns ${NSC} "-" "-" - connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ - ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 else ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV} ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV} @@ -1240,7 +1250,9 @@ ipv4_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests + setup_vrf_dup ipv4_tcp_md5 + cleanup_vrf_dup # # enable VRF global server @@ -2719,7 +2731,9 @@ ipv6_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests + setup_vrf_dup ipv6_tcp_md5 + cleanup_vrf_dup # # enable VRF global server From 0f108ae4452025fef529671998f6c7f1c4526790 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 11 Dec 2021 10:21:08 -0700 Subject: [PATCH 082/361] selftests: Fix raw socket bind tests with VRF Commit referenced below added negative socket bind tests for VRF. The socket binds should fail since the address to bind to is in a VRF yet the socket is not bound to the VRF or a device within it. Update the expected return code to check for 1 (bind failure) so the test passes when the bind fails as expected. Add a 'show_hint' comment to explain why the bind is expected to fail. Fixes: 75b2b2b3db4c ("selftests: Add ipv4 address bind tests to fcnal-test") Reported-by: Li Zhijian Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index dd7437dd2680b..4340477863d36 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -1810,8 +1810,9 @@ ipv4_addr_bind_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start + show_hint "Socket not bound to VRF, but address is in VRF" run_cmd nettest -s -R -P icmp -l ${a} -b - log_test_addr ${a} $? 0 "Raw socket bind to local address" + log_test_addr ${a} $? 1 "Raw socket bind to local address" log_start run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b From 28a2686c185e84b6aa6a4d9c9a972360eb7ca266 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 11 Dec 2021 11:26:16 -0700 Subject: [PATCH 083/361] selftests: Fix IPv6 address bind tests IPv6 allows binding a socket to a device then binding to an address not on the device (__inet6_bind -> ipv6_chk_addr with strict flag not set). Update the bind tests to reflect legacy behavior. Fixes: 34d0302ab861 ("selftests: Add ipv6 address bind tests to fcnal-test") Reported-by: Li Zhijian Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 4340477863d36..ad2982b72e02b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -3429,11 +3429,14 @@ ipv6_addr_bind_novrf() run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. So this test passes + # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Should fail with 'Cannot assign requested address'" + show_hint "Tecnically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" + log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } ipv6_addr_bind_vrf() @@ -3474,10 +3477,15 @@ ipv6_addr_bind_vrf() run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. The only restriction + # is that the address is valid in the L3 domain. So this test + # passes when it really should not a=${VRF_IP6} log_start + show_hint "Tecnically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind" + log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" a=${NSA_LO_IP6} log_start From c9b12b59e2ea4c3c7cedec7efb071b649652f3a9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 6 Dec 2021 11:50:16 +0100 Subject: [PATCH 084/361] s390/entry: fix duplicate tracking of irq nesting level In the current code, when exiting from idle, rcu_irq_enter() is called twice during irq entry: irq_entry_enter()-> rcu_irq_enter() irq_enter() -> rcu_irq_enter() This may lead to wrong results from rcu_is_cpu_rrupt_from_idle() because of a wrong dynticks nmi nesting count. Fix this by only calling irq_enter_rcu(). Cc: # 5.12+ Reported-by: Mark Rutland Fixes: 56e62a737028 ("s390: convert to generic entry") Signed-off-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/kernel/irq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 0df83ecaa2e0c..cb70996823401 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -138,7 +138,7 @@ void noinstr do_io_irq(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); int from_idle; - irq_enter(); + irq_enter_rcu(); if (user_mode(regs)) { update_timer_sys(); @@ -158,7 +158,8 @@ void noinstr do_io_irq(struct pt_regs *regs) do_irq_async(regs, IO_INTERRUPT); } while (MACHINE_IS_LPAR && irq_pending(regs)); - irq_exit(); + irq_exit_rcu(); + set_irq_regs(old_regs); irqentry_exit(regs, state); @@ -172,7 +173,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); int from_idle; - irq_enter(); + irq_enter_rcu(); if (user_mode(regs)) { update_timer_sys(); @@ -190,7 +191,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) do_irq_async(regs, EXT_INTERRUPT); - irq_exit(); + irq_exit_rcu(); set_irq_regs(old_regs); irqentry_exit(regs, state); From 85bf17b28f97ca2749968d8786dc423db320d9c2 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 10 Dec 2021 10:38:27 +0100 Subject: [PATCH 085/361] recordmcount.pl: look for jgnop instruction as well as bcrl on s390 On s390, recordmcount.pl is looking for "bcrl 0," instructions in the objdump -d outpout. However since binutils 2.37, objdump -d display "jgnop " for the same instruction. Update the mcount_regex so that it accepts both. Signed-off-by: Jerome Marchand Reviewed-by: Miroslav Benes Acked-by: Steven Rostedt (VMware) Cc: Link: https://lore.kernel.org/r/20211210093827.1623286-1-jmarchan@redhat.com Signed-off-by: Heiko Carstens --- scripts/recordmcount.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 7d631aaa0ae11..52a000b057a57 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -219,7 +219,7 @@ } elsif ($arch eq "s390" && $bits == 64) { if ($cc =~ /-DCC_USING_HOTPATCH/) { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*brcl\\s*0,[0-9a-f]+ <([^\+]*)>\$"; + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(bcrl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$"; $mcount_adjust = 0; } $alignment = 8; From fce15c45d3fbd9fc1feaaf3210d8e3f8b33dfd3a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 6 Nov 2021 10:02:44 -0700 Subject: [PATCH 086/361] hwmon: (lm90) Fix usage of CONFIG2 register in detect function The detect function had a comment "Make compiler happy" when id did not read the second configuration register. As it turns out, the code was checking the contents of this register for manufacturer ID 0xA1 (NXP Semiconductor/Philips), but never actually read the register. So it wasn't surprising that the compiler complained, and it indeed had a point. Fix the code to read the register contents for manufacturer ID 0xa1. At the same time, the code was reading the register for manufacturer ID 0x41 (Analog Devices), but it was not using the results. In effect it was just checking if reading the register returned an error. That doesn't really add much if any value, so stop doing that. Fixes: f90be42fb383 ("hwmon: (lm90) Refactor reading of config2 register") Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 618052c6cdb64..b05d73c4fbe2a 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -1465,12 +1465,11 @@ static int lm90_detect(struct i2c_client *client, if (man_id < 0 || chip_id < 0 || config1 < 0 || convrate < 0) return -ENODEV; - if (man_id == 0x01 || man_id == 0x5C || man_id == 0x41) { + if (man_id == 0x01 || man_id == 0x5C || man_id == 0xA1) { config2 = i2c_smbus_read_byte_data(client, LM90_REG_R_CONFIG2); if (config2 < 0) return -ENODEV; - } else - config2 = 0; /* Make compiler happy */ + } if ((address == 0x4C || address == 0x4D) && man_id == 0x01) { /* National Semiconductor */ From 55840b9eae5367b5d5b29619dc2fb7e4596dba46 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 17 Nov 2021 09:51:47 -0800 Subject: [PATCH 087/361] hwmon: (lm90) Prevent integer overflow/underflow in hysteresis calculations Commit b50aa49638c7 ("hwmon: (lm90) Prevent integer underflows of temperature calculations") addressed a number of underflow situations when writing temperature limits. However, it missed one situation, seen when an attempt is made to set the hysteresis value to MAX_LONG and the critical temperature limit is negative. Use clamp_val() when setting the hysteresis temperature to ensure that the provided value can never overflow or underflow. Fixes: b50aa49638c7 ("hwmon: (lm90) Prevent integer underflows of temperature calculations") Cc: Dmitry Osipenko Reviewed-by: Dmitry Osipenko Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index b05d73c4fbe2a..72969ea83d82e 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -1160,8 +1160,8 @@ static int lm90_set_temphyst(struct lm90_data *data, long val) else temp = temp_from_s8(data->temp8[LOCAL_CRIT]); - /* prevent integer underflow */ - val = max(val, -128000l); + /* prevent integer overflow/underflow */ + val = clamp_val(val, -128000l, 255000l); data->temp_hyst = hyst_to_reg(temp - val); err = i2c_smbus_write_byte_data(client, LM90_REG_W_TCRIT_HYST, From 16ba51b5dcd3f6dde2e51d5ccc86313119dcf889 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 13 Nov 2021 08:55:06 -0800 Subject: [PATCH 088/361] hwmon: (lm90) Drop critical attribute support for MAX6654 Tests with a real chip and a closer look into the datasheet show that MAX6654 does not support CRIT/THERM/OVERTEMP limits, so drop support of the respective attributes for this chip. Introduce LM90_HAVE_CRIT flag and use it to instantiate critical limit attributes to solve the problem. Cc: Josh Lehan Fixes: 229d495d8189 ("hwmon: (lm90) Add max6654 support to lm90 driver") Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 86 +++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 72969ea83d82e..6597d055e09d8 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -35,13 +35,14 @@ * explicitly as max6659, or if its address is not 0x4c. * These chips lack the remote temperature offset feature. * - * This driver also supports the MAX6654 chip made by Maxim. This chip can - * be at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is - * otherwise similar to MAX6657/MAX6658/MAX6659. Extended range is available - * by setting the configuration register accordingly, and is done during - * initialization. Extended precision is only available at conversion rates - * of 1 Hz and slower. Note that extended precision is not enabled by - * default, as this driver initializes all chips to 2 Hz by design. + * This driver also supports the MAX6654 chip made by Maxim. This chip can be + * at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is similar + * to MAX6657/MAX6658/MAX6659, but does not support critical temperature + * limits. Extended range is available by setting the configuration register + * accordingly, and is done during initialization. Extended precision is only + * available at conversion rates of 1 Hz and slower. Note that extended + * precision is not enabled by default, as this driver initializes all chips + * to 2 Hz by design. * * This driver also supports the MAX6646, MAX6647, MAX6648, MAX6649 and * MAX6692 chips made by Maxim. These are again similar to the LM86, @@ -188,6 +189,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_HAVE_BROKEN_ALERT (1 << 7) /* Broken alert */ #define LM90_HAVE_EXTENDED_TEMP (1 << 8) /* extended temperature support*/ #define LM90_PAUSE_FOR_CONFIG (1 << 9) /* Pause conversion for config */ +#define LM90_HAVE_CRIT (1 << 10)/* Chip supports CRIT/OVERT register */ /* LM90 status */ #define LM90_STATUS_LTHRM (1 << 0) /* local THERM limit tripped */ @@ -354,38 +356,43 @@ struct lm90_params { static const struct lm90_params lm90_params[] = { [adm1032] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 10, }, [adt7461] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP + | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 10, }, [g781] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, }, [lm86] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [lm90] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [lm99] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [max6646] = { + .flags = LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, @@ -396,50 +403,50 @@ static const struct lm90_params lm90_params[] = { .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6657] = { - .flags = LM90_PAUSE_FOR_CONFIG, + .flags = LM90_PAUSE_FOR_CONFIG | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6659] = { - .flags = LM90_HAVE_EMERGENCY, + .flags = LM90_HAVE_EMERGENCY | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6680] = { - .flags = LM90_HAVE_OFFSET, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 7, }, [max6696] = { .flags = LM90_HAVE_EMERGENCY - | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3, + | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3 | LM90_HAVE_CRIT, .alert_alarms = 0x1c7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [w83l771] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, }, [sa56004] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, .reg_local_ext = SA56004_REG_R_LOCAL_TEMPL, }, [tmp451] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 9, .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL, }, [tmp461] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 9, .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL, @@ -668,20 +675,22 @@ static int lm90_update_limits(struct device *dev) struct i2c_client *client = data->client; int val; - val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT); - if (val < 0) - return val; - data->temp8[LOCAL_CRIT] = val; + if (data->flags & LM90_HAVE_CRIT) { + val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT); + if (val < 0) + return val; + data->temp8[LOCAL_CRIT] = val; - val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT); - if (val < 0) - return val; - data->temp8[REMOTE_CRIT] = val; + val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT); + if (val < 0) + return val; + data->temp8[REMOTE_CRIT] = val; - val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST); - if (val < 0) - return val; - data->temp_hyst = val; + val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST); + if (val < 0) + return val; + data->temp_hyst = val; + } val = lm90_read_reg(client, LM90_REG_R_REMOTE_LOWH); if (val < 0) @@ -1902,11 +1911,14 @@ static int lm90_probe(struct i2c_client *client) info->config = data->channel_config; data->channel_config[0] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | - HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | - HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM; + HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM; data->channel_config[1] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | - HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | - HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT; + HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM | HWMON_T_FAULT; + + if (data->flags & LM90_HAVE_CRIT) { + data->channel_config[0] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST; + data->channel_config[1] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST; + } if (data->flags & LM90_HAVE_OFFSET) data->channel_config[1] |= HWMON_T_OFFSET; From da7dc0568491104c7acb632e9d41ddce9aaabbb1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 26 Nov 2021 22:43:39 -0800 Subject: [PATCH 089/361] hwmom: (lm90) Fix citical alarm status for MAX6680/MAX6681 Tests with a real chip and a closer look into the datasheet reveals that the local and remote critical alarm status bits are swapped for MAX6680/MAX6681. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 6597d055e09d8..dd8612a9d5362 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -190,6 +190,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_HAVE_EXTENDED_TEMP (1 << 8) /* extended temperature support*/ #define LM90_PAUSE_FOR_CONFIG (1 << 9) /* Pause conversion for config */ #define LM90_HAVE_CRIT (1 << 10)/* Chip supports CRIT/OVERT register */ +#define LM90_HAVE_CRIT_ALRM_SWP (1 << 11)/* critical alarm bits swapped */ /* LM90 status */ #define LM90_STATUS_LTHRM (1 << 0) /* local THERM limit tripped */ @@ -415,7 +416,8 @@ static const struct lm90_params lm90_params[] = { .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6680] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT + | LM90_HAVE_CRIT_ALRM_SWP, .alert_alarms = 0x7c, .max_convrate = 7, }, @@ -1201,6 +1203,7 @@ static const u8 lm90_temp_emerg_index[3] = { static const u8 lm90_min_alarm_bits[3] = { 5, 3, 11 }; static const u8 lm90_max_alarm_bits[3] = { 6, 4, 12 }; static const u8 lm90_crit_alarm_bits[3] = { 0, 1, 9 }; +static const u8 lm90_crit_alarm_bits_swapped[3] = { 1, 0, 9 }; static const u8 lm90_emergency_alarm_bits[3] = { 15, 13, 14 }; static const u8 lm90_fault_bits[3] = { 0, 2, 10 }; @@ -1226,7 +1229,10 @@ static int lm90_temp_read(struct device *dev, u32 attr, int channel, long *val) *val = (data->alarms >> lm90_max_alarm_bits[channel]) & 1; break; case hwmon_temp_crit_alarm: - *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1; + if (data->flags & LM90_HAVE_CRIT_ALRM_SWP) + *val = (data->alarms >> lm90_crit_alarm_bits_swapped[channel]) & 1; + else + *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1; break; case hwmon_temp_emergency_alarm: *val = (data->alarms >> lm90_emergency_alarm_bits[channel]) & 1; From cdc5287acad9ede121924a9c9313544b80d15842 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 3 Dec 2021 13:42:22 -0800 Subject: [PATCH 090/361] hwmon: (lm90) Do not report 'busy' status bit as alarm Bit 7 of the status register indicates that the chip is busy doing a conversion. It does not indicate an alarm status. Stop reporting it as alarm status bit. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index dd8612a9d5362..74019dff2550e 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -200,6 +200,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_STATUS_RHIGH (1 << 4) /* remote high temp limit tripped */ #define LM90_STATUS_LLOW (1 << 5) /* local low temp limit tripped */ #define LM90_STATUS_LHIGH (1 << 6) /* local high temp limit tripped */ +#define LM90_STATUS_BUSY (1 << 7) /* conversion is ongoing */ #define MAX6696_STATUS2_R2THRM (1 << 1) /* remote2 THERM limit tripped */ #define MAX6696_STATUS2_R2OPEN (1 << 2) /* remote2 is an open circuit */ @@ -820,7 +821,7 @@ static int lm90_update_device(struct device *dev) val = lm90_read_reg(client, LM90_REG_R_STATUS); if (val < 0) return val; - data->alarms = val; /* lower 8 bit of alarms */ + data->alarms = val & ~LM90_STATUS_BUSY; if (data->kind == max6696) { val = lm90_select_remote_channel(data, 1); From 12f247ab590a08856441efdbd351cf2cc8f60a2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Sun, 12 Dec 2021 21:01:49 -0800 Subject: [PATCH 091/361] Input: atmel_mxt_ts - fix double free in mxt_read_info_block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "id_buf" buffer is stored in "data->raw_info_block" and freed by "mxt_free_object_table" in case of error. Return instead of jumping to avoid a double free. Addresses-Coverity-ID: 1474582 ("Double free") Fixes: 068bdb67ef74 ("Input: atmel_mxt_ts - fix the firmware update") Signed-off-by: José Expósito Link: https://lore.kernel.org/r/20211212194257.68879-1-jose.exposito89@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 05de92c0293bc..eb66cd2689b7c 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -1882,7 +1882,7 @@ static int mxt_read_info_block(struct mxt_data *data) if (error) { dev_err(&client->dev, "Error %d parsing object table\n", error); mxt_free_object_table(data); - goto err_free_mem; + return error; } data->object_table = (struct mxt_object *)(id_buf + MXT_OBJECT_START); From 80936d68665be88dc3bf60884a71f2694eb6b1f1 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Thu, 9 Dec 2021 23:39:56 +0530 Subject: [PATCH 092/361] dmaengine: ti: k3-udma: Fix smatch warnings Smatch reports below warnings [1] wrt dereferencing rm_res when it can potentially be ERR_PTR(). This is possible when entire range is allocated to Linux Fix this case by making sure, there is no deference of rm_res when its ERR_PTR(). [1]: drivers/dma/ti/k3-udma.c:4524 udma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4537 udma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4681 bcdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4696 bcdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4711 bcdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4848 pktdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4861 pktdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() Reported-by: Nishanth Menon Signed-off-by: Vignesh Raghavendra Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20211209180957.29036-1-vigneshr@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 157 ++++++++++++++++++++++++++------------- 1 file changed, 107 insertions(+), 50 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 041d8e32d6300..6e56d1cef5eee 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4534,45 +4534,60 @@ static int udma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->tchan_map, ud->tchan_cnt); + irq_res.sets = 1; } else { bitmap_fill(ud->tchan_map, ud->tchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->tchan_map, &rm_res->desc[i], "tchan"); + irq_res.sets = rm_res->sets; } - irq_res.sets = rm_res->sets; /* rchan and matching default flow ranges */ rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->rchan_map, ud->rchan_cnt); + irq_res.sets++; } else { bitmap_fill(ud->rchan_map, ud->rchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->rchan_map, &rm_res->desc[i], "rchan"); + irq_res.sets += rm_res->sets; } - irq_res.sets += rm_res->sets; irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; - for (i = 0; i < rm_res->sets; i++) { - irq_res.desc[i].start = rm_res->desc[i].start; - irq_res.desc[i].num = rm_res->desc[i].num; - irq_res.desc[i].start_sec = rm_res->desc[i].start_sec; - irq_res.desc[i].num_sec = rm_res->desc[i].num_sec; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = 0; + irq_res.desc[0].num = ud->tchan_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start; + irq_res.desc[i].num = rm_res->desc[i].num; + irq_res.desc[i].start_sec = rm_res->desc[i].start_sec; + irq_res.desc[i].num_sec = rm_res->desc[i].num_sec; + } } rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; - for (j = 0; j < rm_res->sets; j++, i++) { - if (rm_res->desc[j].num) { - irq_res.desc[i].start = rm_res->desc[j].start + - ud->soc_data->oes.udma_rchan; - irq_res.desc[i].num = rm_res->desc[j].num; - } - if (rm_res->desc[j].num_sec) { - irq_res.desc[i].start_sec = rm_res->desc[j].start_sec + - ud->soc_data->oes.udma_rchan; - irq_res.desc[i].num_sec = rm_res->desc[j].num_sec; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = 0; + irq_res.desc[i].num = ud->rchan_cnt; + } else { + for (j = 0; j < rm_res->sets; j++, i++) { + if (rm_res->desc[j].num) { + irq_res.desc[i].start = rm_res->desc[j].start + + ud->soc_data->oes.udma_rchan; + irq_res.desc[i].num = rm_res->desc[j].num; + } + if (rm_res->desc[j].num_sec) { + irq_res.desc[i].start_sec = rm_res->desc[j].start_sec + + ud->soc_data->oes.udma_rchan; + irq_res.desc[i].num_sec = rm_res->desc[j].num_sec; + } } } ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res); @@ -4690,14 +4705,15 @@ static int bcdma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_BCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->bchan_map, ud->bchan_cnt); + irq_res.sets++; } else { bitmap_fill(ud->bchan_map, ud->bchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->bchan_map, &rm_res->desc[i], "bchan"); + irq_res.sets += rm_res->sets; } - irq_res.sets += rm_res->sets; } /* tchan ranges */ @@ -4705,14 +4721,15 @@ static int bcdma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->tchan_map, ud->tchan_cnt); + irq_res.sets += 2; } else { bitmap_fill(ud->tchan_map, ud->tchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->tchan_map, &rm_res->desc[i], "tchan"); + irq_res.sets += rm_res->sets * 2; } - irq_res.sets += rm_res->sets * 2; } /* rchan ranges */ @@ -4720,47 +4737,72 @@ static int bcdma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->rchan_map, ud->rchan_cnt); + irq_res.sets += 2; } else { bitmap_fill(ud->rchan_map, ud->rchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->rchan_map, &rm_res->desc[i], "rchan"); + irq_res.sets += rm_res->sets * 2; } - irq_res.sets += rm_res->sets * 2; } irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; if (ud->bchan_cnt) { rm_res = tisci_rm->rm_ranges[RM_RANGE_BCHAN]; - for (i = 0; i < rm_res->sets; i++) { - irq_res.desc[i].start = rm_res->desc[i].start + - oes->bcdma_bchan_ring; - irq_res.desc[i].num = rm_res->desc[i].num; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = oes->bcdma_bchan_ring; + irq_res.desc[0].num = ud->bchan_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start + + oes->bcdma_bchan_ring; + irq_res.desc[i].num = rm_res->desc[i].num; + } } } if (ud->tchan_cnt) { rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; - for (j = 0; j < rm_res->sets; j++, i += 2) { - irq_res.desc[i].start = rm_res->desc[j].start + - oes->bcdma_tchan_data; - irq_res.desc[i].num = rm_res->desc[j].num; - - irq_res.desc[i + 1].start = rm_res->desc[j].start + - oes->bcdma_tchan_ring; - irq_res.desc[i + 1].num = rm_res->desc[j].num; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->bcdma_tchan_data; + irq_res.desc[i].num = ud->tchan_cnt; + irq_res.desc[i + 1].start = oes->bcdma_tchan_ring; + irq_res.desc[i + 1].num = ud->tchan_cnt; + i += 2; + } else { + for (j = 0; j < rm_res->sets; j++, i += 2) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->bcdma_tchan_data; + irq_res.desc[i].num = rm_res->desc[j].num; + + irq_res.desc[i + 1].start = rm_res->desc[j].start + + oes->bcdma_tchan_ring; + irq_res.desc[i + 1].num = rm_res->desc[j].num; + } } } if (ud->rchan_cnt) { rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; - for (j = 0; j < rm_res->sets; j++, i += 2) { - irq_res.desc[i].start = rm_res->desc[j].start + - oes->bcdma_rchan_data; - irq_res.desc[i].num = rm_res->desc[j].num; - - irq_res.desc[i + 1].start = rm_res->desc[j].start + - oes->bcdma_rchan_ring; - irq_res.desc[i + 1].num = rm_res->desc[j].num; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->bcdma_rchan_data; + irq_res.desc[i].num = ud->rchan_cnt; + irq_res.desc[i + 1].start = oes->bcdma_rchan_ring; + irq_res.desc[i + 1].num = ud->rchan_cnt; + i += 2; + } else { + for (j = 0; j < rm_res->sets; j++, i += 2) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->bcdma_rchan_data; + irq_res.desc[i].num = rm_res->desc[j].num; + + irq_res.desc[i + 1].start = rm_res->desc[j].start + + oes->bcdma_rchan_ring; + irq_res.desc[i + 1].num = rm_res->desc[j].num; + } } } @@ -4858,39 +4900,54 @@ static int pktdma_setup_resources(struct udma_dev *ud) if (IS_ERR(rm_res)) { /* all rflows are assigned exclusively to Linux */ bitmap_zero(ud->rflow_in_use, ud->rflow_cnt); + irq_res.sets = 1; } else { bitmap_fill(ud->rflow_in_use, ud->rflow_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->rflow_in_use, &rm_res->desc[i], "rflow"); + irq_res.sets = rm_res->sets; } - irq_res.sets = rm_res->sets; /* tflow ranges */ rm_res = tisci_rm->rm_ranges[RM_RANGE_TFLOW]; if (IS_ERR(rm_res)) { /* all tflows are assigned exclusively to Linux */ bitmap_zero(ud->tflow_map, ud->tflow_cnt); + irq_res.sets++; } else { bitmap_fill(ud->tflow_map, ud->tflow_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->tflow_map, &rm_res->desc[i], "tflow"); + irq_res.sets += rm_res->sets; } - irq_res.sets += rm_res->sets; irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; rm_res = tisci_rm->rm_ranges[RM_RANGE_TFLOW]; - for (i = 0; i < rm_res->sets; i++) { - irq_res.desc[i].start = rm_res->desc[i].start + - oes->pktdma_tchan_flow; - irq_res.desc[i].num = rm_res->desc[i].num; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = oes->pktdma_tchan_flow; + irq_res.desc[0].num = ud->tflow_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start + + oes->pktdma_tchan_flow; + irq_res.desc[i].num = rm_res->desc[i].num; + } } rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW]; - for (j = 0; j < rm_res->sets; j++, i++) { - irq_res.desc[i].start = rm_res->desc[j].start + - oes->pktdma_rchan_flow; - irq_res.desc[i].num = rm_res->desc[j].num; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->pktdma_rchan_flow; + irq_res.desc[i].num = ud->rflow_cnt; + } else { + for (j = 0; j < rm_res->sets; j++, i++) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->pktdma_rchan_flow; + irq_res.desc[i].num = rm_res->desc[j].num; + } } ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res); kfree(irq_res.desc); From 8affd8a4b5ce356c8900cfb037674f3a4a11fbdb Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 8 Dec 2021 10:01:27 -0700 Subject: [PATCH 093/361] dmaengine: idxd: fix missed completion on abort path Ming reported that with the abort path of the descriptor submission, there can be a window where a completed descriptor can be missed to be completed by the irq completion thread: CPU A CPU B Submit (successful) Submit (fail) irq_process_work_list() // empty llist_abort_desc() // remove all descs from pending list irq_process_pending_llist() // empty exit idxd_wq_thread() with no processing Add opportunistic descriptor completion in the abort path in order to remove the missed completion. Fixes: 6b4b87f2c31a ("dmaengine: idxd: fix submission race window") Reported-by: Ming Li Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163898288714.443911.16084982766671976640.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/submit.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index de76fb4abac24..83452fbbb168b 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -106,6 +106,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, { struct idxd_desc *d, *t, *found = NULL; struct llist_node *head; + LIST_HEAD(flist); desc->completion->status = IDXD_COMP_DESC_ABORT; /* @@ -120,7 +121,11 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, found = desc; continue; } - list_add_tail(&desc->list, &ie->work_list); + + if (d->completion->status) + list_add_tail(&d->list, &flist); + else + list_add_tail(&d->list, &ie->work_list); } } @@ -130,6 +135,17 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, if (found) complete_desc(found, IDXD_COMPLETE_ABORT); + + /* + * complete_desc() will return desc to allocator and the desc can be + * acquired by a different process and the desc->list can be modified. + * Delete desc from list so the list trasversing does not get corrupted + * by the other process. + */ + list_for_each_entry_safe(d, t, &flist, list) { + list_del_init(&d->list); + complete_desc(d, IDXD_COMPLETE_NORMAL); + } } int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) From 822c9f2b833c53fc67e8adf6f63ecc3ea24d502c Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Thu, 25 Nov 2021 15:44:38 +0000 Subject: [PATCH 094/361] dmaengine: st_fdma: fix MODULE_ALIAS modprobe can't handle spaces in aliases. Fixes: 6b4cd727eaf1 ("dmaengine: st_fdma: Add STMicroelectronics FDMA engine driver support") Signed-off-by: Alyssa Ross Link: https://lore.kernel.org/r/20211125154441.2626214-1-hi@alyssa.is Signed-off-by: Vinod Koul --- drivers/dma/st_fdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/st_fdma.c b/drivers/dma/st_fdma.c index 962b6e05287b5..d95c421877fb7 100644 --- a/drivers/dma/st_fdma.c +++ b/drivers/dma/st_fdma.c @@ -874,4 +874,4 @@ MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("STMicroelectronics FDMA engine driver"); MODULE_AUTHOR("Ludovic.barre "); MODULE_AUTHOR("Peter Griffin "); -MODULE_ALIAS("platform: " DRIVER_NAME); +MODULE_ALIAS("platform:" DRIVER_NAME); From 2dee54b289fbc810669a1b2b8a0887fa1c9a14d7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 12 Dec 2021 17:20:25 +0000 Subject: [PATCH 095/361] ALSA: drivers: opl3: Fix incorrect use of vp->state Static analysis with scan-build has found an assignment to vp2 that is never used. It seems that the check on vp->state > 0 should be actually on vp2->state instead. Fix this. This dates back to 2002, I found the offending commit from the git history git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git, commit 91e39521bbf6 ("[PATCH] ALSA patch for 2.5.4") Signed-off-by: Colin Ian King Cc: Link: https://lore.kernel.org/r/20211212172025.470367-1-colin.i.king@gmail.com Signed-off-by: Takashi Iwai --- sound/drivers/opl3/opl3_midi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/drivers/opl3/opl3_midi.c b/sound/drivers/opl3/opl3_midi.c index e1b69c65c3c88..e2b7be67f0e30 100644 --- a/sound/drivers/opl3/opl3_midi.c +++ b/sound/drivers/opl3/opl3_midi.c @@ -397,7 +397,7 @@ void snd_opl3_note_on(void *p, int note, int vel, struct snd_midi_channel *chan) } if (instr_4op) { vp2 = &opl3->voices[voice + 3]; - if (vp->state > 0) { + if (vp2->state > 0) { opl3_reg = reg_side | (OPL3_REG_KEYON_BLOCK + voice_offset + 3); reg_val = vp->keyon_reg & ~OPL3_KEYON_BIT; From c01c1db1dc632edafb0dff32d40daf4f9c1a4e19 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Mon, 13 Dec 2021 15:39:31 +0800 Subject: [PATCH 096/361] ALSA: jack: Check the return value of kstrdup() kstrdup() can return NULL, it is better to check the return value of it. Signed-off-by: Xiaoke Wang Cc: Link: https://lore.kernel.org/r/tencent_094816F3522E0DC704056C789352EBBF0606@qq.com Signed-off-by: Takashi Iwai --- sound/core/jack.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/jack.c b/sound/core/jack.c index 32350c6aba849..537df1e98f8ac 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -509,6 +509,10 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, return -ENOMEM; jack->id = kstrdup(id, GFP_KERNEL); + if (jack->id == NULL) { + kfree(jack); + return -ENOMEM; + } /* don't creat input device for phantom jack */ if (!phantom_jack) { From 5cf06065bd1f7b94fbb80e7eeb033899f77ab5ba Mon Sep 17 00:00:00 2001 From: Alejandro Concepcion-Rodriguez Date: Sun, 12 Dec 2021 16:06:02 +0000 Subject: [PATCH 097/361] drm: simpledrm: fix wrong unit with pixel clock Pixel clock has to be set in kHz. Signed-off-by: Alejandro Concepcion-Rodriguez Fixes: 11e8f5fd223b ("drm: Add simpledrm driver") Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/6f8554ef-1305-0dda-821c-f7d2e5644a48@acoro.eu --- drivers/gpu/drm/tiny/simpledrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 481b48bde0473..5a6e89825bc2f 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -458,7 +458,7 @@ static struct drm_display_mode simpledrm_mode(unsigned int width, { struct drm_display_mode mode = { SIMPLEDRM_MODE(width, height) }; - mode.clock = 60 /* Hz */ * mode.hdisplay * mode.vdisplay; + mode.clock = mode.hdisplay * mode.vdisplay * 60 / 1000 /* kHz */; drm_mode_set_name(&mode); return mode; From c062f2a0b04d86c5b8c9d973bea43493eaca3d32 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 10 Dec 2021 17:42:47 +0100 Subject: [PATCH 098/361] net/sched: sch_ets: don't remove idle classes from the round-robin list Shuang reported that the following script: 1) tc qdisc add dev ddd0 handle 10: parent 1: ets bands 8 strict 4 priomap 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 2) mausezahn ddd0 -A 10.10.10.1 -B 10.10.10.2 -c 0 -a own -b 00:c1:a0:c1:a0:00 -t udp & 3) tc qdisc change dev ddd0 handle 10: ets bands 4 strict 2 quanta 2500 2500 priomap 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 crashes systematically when line 2) is commented: list_del corruption, ffff8e028404bd30->next is LIST_POISON1 (dead000000000100) ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:47! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 954 Comm: tc Not tainted 5.16.0-rc4+ #478 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014 RIP: 0010:__list_del_entry_valid.cold.1+0x12/0x47 Code: fe ff 0f 0b 48 89 c1 4c 89 c6 48 c7 c7 08 42 1b 87 e8 1d c5 fe ff 0f 0b 48 89 fe 48 89 c2 48 c7 c7 98 42 1b 87 e8 09 c5 fe ff <0f> 0b 48 c7 c7 48 43 1b 87 e8 fb c4 fe ff 0f 0b 48 89 f2 48 89 fe RSP: 0018:ffffae46807a3888 EFLAGS: 00010246 RAX: 000000000000004e RBX: 0000000000000007 RCX: 0000000000000202 RDX: 0000000000000000 RSI: ffffffff871ac536 RDI: 00000000ffffffff RBP: ffffae46807a3a10 R08: 0000000000000000 R09: c0000000ffff7fff R10: 0000000000000001 R11: ffffae46807a36a8 R12: ffff8e028404b800 R13: ffff8e028404bd30 R14: dead000000000100 R15: ffff8e02fafa2400 FS: 00007efdc92e4480(0000) GS:ffff8e02fb600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000682f48 CR3: 00000001058be000 CR4: 0000000000350ef0 Call Trace: ets_qdisc_change+0x58b/0xa70 [sch_ets] tc_modify_qdisc+0x323/0x880 rtnetlink_rcv_msg+0x169/0x4a0 netlink_rcv_skb+0x50/0x100 netlink_unicast+0x1a5/0x280 netlink_sendmsg+0x257/0x4d0 sock_sendmsg+0x5b/0x60 ____sys_sendmsg+0x1f2/0x260 ___sys_sendmsg+0x7c/0xc0 __sys_sendmsg+0x57/0xa0 do_syscall_64+0x3a/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7efdc8031338 Code: 89 02 48 c7 c0 ff ff ff ff eb b5 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 43 2c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 41 54 41 89 d4 55 RSP: 002b:00007ffdf1ce9828 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000061b37a97 RCX: 00007efdc8031338 RDX: 0000000000000000 RSI: 00007ffdf1ce9890 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000078a940 R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000688880 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: sch_ets sch_tbf dummy rfkill iTCO_wdt iTCO_vendor_support intel_rapl_msr intel_rapl_common joydev pcspkr i2c_i801 virtio_balloon i2c_smbus lpc_ich ip_tables xfs libcrc32c crct10dif_pclmul crc32_pclmul crc32c_intel serio_raw ghash_clmulni_intel ahci libahci libata virtio_blk virtio_console virtio_net net_failover failover sunrpc dm_mirror dm_region_hash dm_log dm_mod [last unloaded: sch_ets] ---[ end trace f35878d1912655c2 ]--- RIP: 0010:__list_del_entry_valid.cold.1+0x12/0x47 Code: fe ff 0f 0b 48 89 c1 4c 89 c6 48 c7 c7 08 42 1b 87 e8 1d c5 fe ff 0f 0b 48 89 fe 48 89 c2 48 c7 c7 98 42 1b 87 e8 09 c5 fe ff <0f> 0b 48 c7 c7 48 43 1b 87 e8 fb c4 fe ff 0f 0b 48 89 f2 48 89 fe RSP: 0018:ffffae46807a3888 EFLAGS: 00010246 RAX: 000000000000004e RBX: 0000000000000007 RCX: 0000000000000202 RDX: 0000000000000000 RSI: ffffffff871ac536 RDI: 00000000ffffffff RBP: ffffae46807a3a10 R08: 0000000000000000 R09: c0000000ffff7fff R10: 0000000000000001 R11: ffffae46807a36a8 R12: ffff8e028404b800 R13: ffff8e028404bd30 R14: dead000000000100 R15: ffff8e02fafa2400 FS: 00007efdc92e4480(0000) GS:ffff8e02fb600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000682f48 CR3: 00000001058be000 CR4: 0000000000350ef0 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x4e00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- we can remove 'q->classes[i].alist' only if DRR class 'i' was part of the active list. In the ETS scheduler DRR classes belong to that list only if the queue length is greater than zero: we need to test for non-zero value of 'q->classes[i].qdisc->q.qlen' before removing from the list, similarly to what has been done elsewhere in the ETS code. Fixes: de6d25924c2a ("net/sched: sch_ets: don't peek at classes beyond 'nbands'") Reported-by: Shuang Li Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/sch_ets.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index e007fc75ef2fe..d733934935533 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -666,9 +666,9 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, } } for (i = q->nbands; i < oldbands; i++) { - qdisc_tree_flush_backlog(q->classes[i].qdisc); - if (i >= q->nstrict) + if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) list_del(&q->classes[i].alist); + qdisc_tree_flush_backlog(q->classes[i].qdisc); } q->nstrict = nstrict; memcpy(q->prio2band, priomap, sizeof(priomap)); From 3b8e19a0aa3933a785be9f1541afd8d398c4ec69 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 28 Oct 2021 09:43:11 +0200 Subject: [PATCH 099/361] drm/mediatek: hdmi: Perform NULL pointer check for mtk_hdmi_conf In commit 41ca9caaae0b ("drm/mediatek: hdmi: Add check for CEA modes only") a check for CEA modes was added to function mtk_hdmi_bridge_mode_valid() in order to address possible issues on MT8167; moreover, with commit c91026a938c2 ("drm/mediatek: hdmi: Add optional limit on maximal HDMI mode clock") another similar check was introduced. Unfortunately though, at the time of writing, MT8173 does not provide any mtk_hdmi_conf structure and this is crashing the kernel with NULL pointer upon entering mtk_hdmi_bridge_mode_valid(), which happens as soon as a HDMI cable gets plugged in. To fix this regression, add a NULL pointer check for hdmi->conf in the said function, restoring HDMI functionality and avoiding NULL pointer kernel panics. Fixes: 41ca9caaae0b ("drm/mediatek: hdmi: Add check for CEA modes only") Fixes: c91026a938c2 ("drm/mediatek: hdmi: Add optional limit on maximal HDMI mode clock") Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 5838c44cbf6f0..3196189429bcf 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1224,12 +1224,14 @@ static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge, return MODE_BAD; } - if (hdmi->conf->cea_modes_only && !drm_match_cea_mode(mode)) - return MODE_BAD; + if (hdmi->conf) { + if (hdmi->conf->cea_modes_only && !drm_match_cea_mode(mode)) + return MODE_BAD; - if (hdmi->conf->max_mode_clock && - mode->clock > hdmi->conf->max_mode_clock) - return MODE_CLOCK_HIGH; + if (hdmi->conf->max_mode_clock && + mode->clock > hdmi->conf->max_mode_clock) + return MODE_CLOCK_HIGH; + } if (mode->clock < 27000) return MODE_CLOCK_LOW; From 865ed67ab955428b9aa771d8b4f1e4fb7fd08945 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 9 Dec 2021 12:04:56 +0000 Subject: [PATCH 100/361] firmware: arm_scpi: Fix string overflow in SCPI genpd driver Without the bound checks for scpi_pd->name, it could result in the buffer overflow when copying the SCPI device name from the corresponding device tree node as the name string is set at maximum size of 30. Let us fix it by using devm_kasprintf so that the string buffer is allocated dynamically. Fixes: 8bec4337ad40 ("firmware: scpi: add device power domain support using genpd") Reported-by: Pedro Batista Signed-off-by: Sudeep Holla Cc: stable@vger.kernel.org Cc: Cristian Marussi Link: https://lore.kernel.org/r/20211209120456.696879-1-sudeep.holla@arm.com' Signed-off-by: Arnd Bergmann --- drivers/firmware/scpi_pm_domain.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/scpi_pm_domain.c b/drivers/firmware/scpi_pm_domain.c index 51201600d789b..800673910b511 100644 --- a/drivers/firmware/scpi_pm_domain.c +++ b/drivers/firmware/scpi_pm_domain.c @@ -16,7 +16,6 @@ struct scpi_pm_domain { struct generic_pm_domain genpd; struct scpi_ops *ops; u32 domain; - char name[30]; }; /* @@ -110,8 +109,13 @@ static int scpi_pm_domain_probe(struct platform_device *pdev) scpi_pd->domain = i; scpi_pd->ops = scpi_ops; - sprintf(scpi_pd->name, "%pOFn.%d", np, i); - scpi_pd->genpd.name = scpi_pd->name; + scpi_pd->genpd.name = devm_kasprintf(dev, GFP_KERNEL, + "%pOFn.%d", np, i); + if (!scpi_pd->genpd.name) { + dev_err(dev, "Failed to allocate genpd name:%pOFn.%d\n", + np, i); + continue; + } scpi_pd->genpd.power_off = scpi_pd_power_off; scpi_pd->genpd.power_on = scpi_pd_power_on; From 890d5b40908bfd1a79be018d2d297cf9df60f4ee Mon Sep 17 00:00:00 2001 From: Marian Postevca Date: Sat, 4 Dec 2021 23:49:12 +0200 Subject: [PATCH 101/361] usb: gadget: u_ether: fix race in setting MAC address in setup phase When listening for notifications through netlink of a new interface being registered, sporadically, it is possible for the MAC to be read as zero. The zero MAC address lasts a short period of time and then switches to a valid random MAC address. This causes problems for netd in Android, which assumes that the interface is malfunctioning and will not use it. In the good case we get this log: InterfaceController::getCfg() ifName usb0 hwAddr 92:a8:f0:73:79:5b ipv4Addr 0.0.0.0 flags 0x1002 In the error case we get these logs: InterfaceController::getCfg() ifName usb0 hwAddr 00:00:00:00:00:00 ipv4Addr 0.0.0.0 flags 0x1002 netd : interfaceGetCfg("usb0") netd : interfaceSetCfg() -> ServiceSpecificException (99, "[Cannot assign requested address] : ioctl() failed") The reason for the issue is the order in which the interface is setup, it is first registered through register_netdev() and after the MAC address is set. Fixed by first setting the MAC address of the net_device and after that calling register_netdev(). Fixes: bcd4a1c40bee885e ("usb: gadget: u_ether: construct with default values and add setters/getters") Cc: stable@vger.kernel.org Signed-off-by: Marian Postevca Link: https://lore.kernel.org/r/20211204214912.17627-1-posteuca@mutex.one Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index e0ad5aed6ac98..6f5d45ef2e39a 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "u_ether.h" @@ -863,19 +864,23 @@ int gether_register_netdev(struct net_device *net) { struct eth_dev *dev; struct usb_gadget *g; - struct sockaddr sa; int status; if (!net->dev.parent) return -EINVAL; dev = netdev_priv(net); g = dev->gadget; + + net->addr_assign_type = NET_ADDR_RANDOM; + eth_hw_addr_set(net, dev->dev_mac); + status = register_netdev(net); if (status < 0) { dev_dbg(&g->dev, "register_netdev failed, %d\n", status); return status; } else { INFO(dev, "HOST MAC %pM\n", dev->host_mac); + INFO(dev, "MAC %pM\n", dev->dev_mac); /* two kinds of host-initiated state changes: * - iff DATA transfer is active, carrier is "on" @@ -883,15 +888,6 @@ int gether_register_netdev(struct net_device *net) */ netif_carrier_off(net); } - sa.sa_family = net->type; - memcpy(sa.sa_data, dev->dev_mac, ETH_ALEN); - rtnl_lock(); - status = dev_set_mac_address(net, &sa, NULL); - rtnl_unlock(); - if (status) - pr_warn("cannot set self ethernet address: %d\n", status); - else - INFO(dev, "MAC %pM\n", dev->dev_mac); return status; } From ccc14c6cfd346e85c3ecb970975afd5132763437 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 9 Dec 2021 10:54:22 +0800 Subject: [PATCH 102/361] usb: xhci-mtk: fix list_del warning when enable list debug There is warning of 'list_del corruption' when enable list debug (CONFIG_DEBUG_LIST=y), fix it by using list_del_init() Fixes: 4ce186665e7c ("usb: xhci-mtk: Do not use xhci's virt_dev in drop_endpoint") Cc: stable Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211209025422.17108-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 1edef7527c119..edbfa82c65659 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -781,7 +781,7 @@ int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) ret = xhci_check_bandwidth(hcd, udev); if (!ret) - INIT_LIST_HEAD(&mtk->bw_ep_chk_list); + list_del_init(&mtk->bw_ep_chk_list); return ret; } From 16f00d969afe60e233c1a91af7ac840df60d3536 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 10 Dec 2021 12:29:45 +0100 Subject: [PATCH 103/361] usb: cdnsp: Fix incorrect calling of cdnsp_died function Patch restrict calling of cdnsp_died function during removing modules or software disconnect. This function was called because after transition controller to HALT state the driver starts handling the deferred interrupt. In this case such interrupt can be simple ignored. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20211210112945.660-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 1b1438457fb04..e1ac6c398bd36 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1523,7 +1523,14 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data) spin_lock_irqsave(&pdev->lock, flags); if (pdev->cdnsp_state & (CDNSP_STATE_HALTED | CDNSP_STATE_DYING)) { - cdnsp_died(pdev); + /* + * While removing or stopping driver there may still be deferred + * not handled interrupt which should not be treated as error. + * Driver should simply ignore it. + */ + if (pdev->gadget_driver) + cdnsp_died(pdev); + spin_unlock_irqrestore(&pdev->lock, flags); return IRQ_HANDLED; } From 50931ba27d1665c8b038cd1d16c5869301f32fd6 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Mon, 13 Dec 2021 06:06:09 +0100 Subject: [PATCH 104/361] usb: cdnsp: Fix issue in cdnsp_log_ep trace event Patch fixes incorrect order of __entry->stream_id and __entry->state parameters in TP_printk macro. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20211213050609.22640-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-trace.h b/drivers/usb/cdns3/cdnsp-trace.h index 6a2571c6aa9ed..5983dfb996537 100644 --- a/drivers/usb/cdns3/cdnsp-trace.h +++ b/drivers/usb/cdns3/cdnsp-trace.h @@ -57,9 +57,9 @@ DECLARE_EVENT_CLASS(cdnsp_log_ep, __entry->first_prime_det = pep->stream_info.first_prime_det; __entry->drbls_count = pep->stream_info.drbls_count; ), - TP_printk("%s: SID: %08x ep state: %x stream: enabled: %d num %d " + TP_printk("%s: SID: %08x, ep state: %x, stream: enabled: %d num %d " "tds %d, first prime: %d drbls %d", - __get_str(name), __entry->state, __entry->stream_id, + __get_str(name), __entry->stream_id, __entry->state, __entry->enabled, __entry->num_streams, __entry->td_count, __entry->first_prime_det, __entry->drbls_count) ); From 99ea221f2e2f2743314e348b25c1e2574b467528 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 7 Dec 2021 10:18:38 +0100 Subject: [PATCH 105/361] usb: cdnsp: Fix incorrect status for control request Patch fixes incorrect status for control request. Without this fix all usb_request objects were returned to upper drivers with usb_reqest->status field set to -EINPROGRESS. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Reported-by: Ken (Jian) He Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20211207091838.39572-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index e1ac6c398bd36..e45c3d6e1536c 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1029,6 +1029,8 @@ static void cdnsp_process_ctrl_td(struct cdnsp_device *pdev, return; } + *status = 0; + cdnsp_finish_td(pdev, td, event, pep, status); } From ab8eb798ddabddb2944401bf31ead9671cb97d95 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sat, 11 Dec 2021 14:01:53 +0000 Subject: [PATCH 106/361] net: bcmgenet: Fix NULL vs IS_ERR() checking The phy_attach() function does not return NULL. It returns error pointers. Signed-off-by: Miaoqian Lin Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 5f259641437a7..c888ddee1fc41 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -589,9 +589,9 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) * Internal or external PHY with MDIO access */ phydev = phy_attach(priv->dev, phy_name, pd->phy_interface); - if (!phydev) { + if (IS_ERR(phydev)) { dev_err(kdev, "failed to register PHY device\n"); - return -ENODEV; + return PTR_ERR(phydev); } } else { /* From a8d13611b4a7b1b20d17bf2b9a89a3efcabde56c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 11 Dec 2021 14:30:31 -0500 Subject: [PATCH 107/361] selftests/net: toeplitz: fix udp option Tiny fix. Option -u ("use udp") does not take an argument. It can cause the next argument to silently be ignored. Fixes: 5ebfb4cc3048 ("selftests/net: toeplitz test") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/toeplitz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c index 710ac956bdb33..c5489341cfb80 100644 --- a/tools/testing/selftests/net/toeplitz.c +++ b/tools/testing/selftests/net/toeplitz.c @@ -498,7 +498,7 @@ static void parse_opts(int argc, char **argv) bool have_toeplitz = false; int index, c; - while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) { + while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) { switch (c) { case '4': cfg_family = AF_INET; From 9d591fc028b6bddb38c6585874f331267cbdadae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Sat, 11 Dec 2021 23:51:41 +0100 Subject: [PATCH 108/361] net: dsa: mv88e6xxx: Unforce speed & duplex in mac_link_down() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 64d47d50be7a ("net: dsa: mv88e6xxx: configure interface settings in mac_config") removed forcing of speed and duplex from mv88e6xxx_mac_config(), where the link is forced down, and left it only in mv88e6xxx_mac_link_up(), by which time link is unforced. It seems that (at least on 88E6190) when changing cmode to 2500base-x, if the link is not forced down, but the speed or duplex are still forced, the forcing of new settings for speed & duplex doesn't take in mv88e6xxx_mac_link_up(). Fix this by unforcing speed & duplex in mv88e6xxx_mac_link_down(). Fixes: 64d47d50be7a ("net: dsa: mv88e6xxx: configure interface settings in mac_config") Signed-off-by: Marek Behún Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 14f87f6ac479a..cd8462d1e27c0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -768,6 +768,10 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, if ((!mv88e6xxx_port_ppu_updates(chip, port) || mode == MLO_AN_FIXED) && ops->port_sync_link) err = ops->port_sync_link(chip, port, mode, false); + + if (!err && ops->port_set_speed_duplex) + err = ops->port_set_speed_duplex(chip, port, SPEED_UNFORCED, + DUPLEX_UNFORCED); mv88e6xxx_reg_unlock(chip); if (err) From 71da1aec215290e249d09c44c768df859f3a3bba Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 13 Dec 2021 16:36:00 +0800 Subject: [PATCH 109/361] selftest/net/forwarding: declare NETIFS p9 p10 The recent GRE selftests defined NUM_NETIFS=10. If the users copy forwarding.config.sample to forwarding.config directly, they will get error "Command line is not complete" when run the GRE tests, because create_netif_veth() failed with no interface name defined. Fix it by extending the NETIFS with p9 and p10. Fixes: 2800f2485417 ("selftests: forwarding: Test multipath hashing on inner IP pkts for GRE tunnel") Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/forwarding.config.sample | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index bf17e485684f0..b0980a2efa317 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -13,6 +13,8 @@ NETIFS[p5]=veth4 NETIFS[p6]=veth5 NETIFS[p7]=veth6 NETIFS[p8]=veth7 +NETIFS[p9]=veth8 +NETIFS[p10]=veth9 # Port that does not have a cable connected. NETIF_NO_CABLE=eth8 From be565ec71d1d59438bed0c7ed0a252a327e0b0ef Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Mon, 13 Dec 2021 01:44:36 -0800 Subject: [PATCH 110/361] net: ethernet: ti: add missing of_node_put before return Fix following coccicheck warning: WARNING: Function "for_each_child_of_node" should have of_node_put() before return. Early exits from for_each_child_of_node should decrement the node reference counter. Signed-off-by: Wang Qing Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 29 ++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index c092cb61416a1..ffbbda8f4d416 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1844,13 +1844,14 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) if (ret < 0) { dev_err(dev, "%pOF error reading port_id %d\n", port_np, ret); - return ret; + goto of_node_put; } if (!port_id || port_id > common->port_num) { dev_err(dev, "%pOF has invalid port_id %u %s\n", port_np, port_id, port_np->name); - return -EINVAL; + ret = -EINVAL; + goto of_node_put; } port = am65_common_get_port(common, port_id); @@ -1866,8 +1867,10 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) (AM65_CPSW_NU_FRAM_PORT_OFFSET * (port_id - 1)); port->slave.mac_sl = cpsw_sl_get("am65", dev, port->port_base); - if (IS_ERR(port->slave.mac_sl)) - return PTR_ERR(port->slave.mac_sl); + if (IS_ERR(port->slave.mac_sl)) { + ret = PTR_ERR(port->slave.mac_sl); + goto of_node_put; + } port->disabled = !of_device_is_available(port_np); if (port->disabled) { @@ -1880,7 +1883,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) ret = PTR_ERR(port->slave.ifphy); dev_err(dev, "%pOF error retrieving port phy: %d\n", port_np, ret); - return ret; + goto of_node_put; } port->slave.mac_only = @@ -1889,10 +1892,12 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) /* get phy/link info */ if (of_phy_is_fixed_link(port_np)) { ret = of_phy_register_fixed_link(port_np); - if (ret) - return dev_err_probe(dev, ret, + if (ret) { + ret = dev_err_probe(dev, ret, "failed to register fixed-link phy %pOF\n", port_np); + goto of_node_put; + } port->slave.phy_node = of_node_get(port_np); } else { port->slave.phy_node = @@ -1902,14 +1907,15 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) if (!port->slave.phy_node) { dev_err(dev, "slave[%d] no phy found\n", port_id); - return -ENODEV; + ret = -ENODEV; + goto of_node_put; } ret = of_get_phy_mode(port_np, &port->slave.phy_if); if (ret) { dev_err(dev, "%pOF read phy-mode err %d\n", port_np, ret); - return ret; + goto of_node_put; } ret = of_get_mac_address(port_np, port->slave.mac_addr); @@ -1932,6 +1938,11 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) } return 0; + +of_node_put: + of_node_put(port_np); + of_node_put(node); + return ret; } static void am65_cpsw_pcpu_stats_free(void *data) From d33dae51645c0d837e587000f3131118fcd6bf5e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 13 Dec 2021 11:05:13 +0000 Subject: [PATCH 111/361] net: phy: add a note about refcounting Recently, a patch has been submitted to "fix" the refcounting for a DT node in of_mdiobus_link_mdiodev(). This is not a leaked refcount. The refcount is passed to the new device. Sadly, coccicheck identifies this location as a leaked refcount, which means we're likely to keep getting patches to "fix" this. However, fixing this will cause breakage. Add a comment to state that the lack of of_node_put() here is intentional. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index c204067f18902..c198722e4871d 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -460,6 +460,9 @@ static void of_mdiobus_link_mdiodev(struct mii_bus *bus, if (addr == mdiodev->addr) { device_set_node(dev, of_fwnode_handle(child)); + /* The refcount on "child" is passed to the mdio + * device. Do _not_ use of_node_put(child) here. + */ return; } } From 884d2b845477cd0a18302444dc20fe2d9a01743e Mon Sep 17 00:00:00 2001 From: David Wu Date: Mon, 13 Dec 2021 19:15:15 +0800 Subject: [PATCH 112/361] net: stmmac: Add GFP_DMA32 for rx buffers if no 64 capability Use page_pool_alloc_pages instead of page_pool_dev_alloc_pages, which can give the gfp parameter, in the case of not supporting 64-bit width, using 32-bit address memory can reduce a copy from swiotlb. Signed-off-by: David Wu Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index da8306f607302..8ded4be08b001 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1461,16 +1461,20 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + if (priv->dma_cap.addr64 <= 32) + gfp |= GFP_DMA32; if (!buf->page) { - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->page) return -ENOMEM; buf->page_offset = stmmac_rx_offset(priv); } if (priv->sph && !buf->sec_page) { - buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->sec_page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->sec_page) return -ENOMEM; @@ -4482,6 +4486,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; int dirty = stmmac_rx_dirty(priv, queue); unsigned int entry = rx_q->dirty_rx; + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + if (priv->dma_cap.addr64 <= 32) + gfp |= GFP_DMA32; while (dirty-- > 0) { struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry]; @@ -4494,13 +4502,13 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) p = rx_q->dma_rx + entry; if (!buf->page) { - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->page) break; } if (priv->sph && !buf->sec_page) { - buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->sec_page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->sec_page) break; From d800c65c2d4eccebb27ffb7808e842d5b533823c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Dec 2021 09:04:01 -0700 Subject: [PATCH 113/361] io-wq: drop wqe lock before creating new worker We have two io-wq creation paths: - On queue enqueue - When a worker goes to sleep The latter invokes worker creation with the wqe->lock held, but that can run into problems if we end up exiting and need to cancel the queued work. syzbot caught this: ============================================ WARNING: possible recursive locking detected 5.16.0-rc4-syzkaller #0 Not tainted -------------------------------------------- iou-wrk-6468/6471 is trying to acquire lock: ffff88801aa98018 (&wqe->lock){+.+.}-{2:2}, at: io_worker_cancel_cb+0xb7/0x210 fs/io-wq.c:187 but task is already holding lock: ffff88801aa98018 (&wqe->lock){+.+.}-{2:2}, at: io_wq_worker_sleeping+0xb6/0x140 fs/io-wq.c:700 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&wqe->lock); lock(&wqe->lock); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by iou-wrk-6468/6471: #0: ffff88801aa98018 (&wqe->lock){+.+.}-{2:2}, at: io_wq_worker_sleeping+0xb6/0x140 fs/io-wq.c:700 stack backtrace: CPU: 1 PID: 6471 Comm: iou-wrk-6468 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1dc/0x2d8 lib/dump_stack.c:106 print_deadlock_bug kernel/locking/lockdep.c:2956 [inline] check_deadlock kernel/locking/lockdep.c:2999 [inline] validate_chain+0x5984/0x8240 kernel/locking/lockdep.c:3788 __lock_acquire+0x1382/0x2b00 kernel/locking/lockdep.c:5027 lock_acquire+0x19f/0x4d0 kernel/locking/lockdep.c:5637 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:154 io_worker_cancel_cb+0xb7/0x210 fs/io-wq.c:187 io_wq_cancel_tw_create fs/io-wq.c:1220 [inline] io_queue_worker_create+0x3cf/0x4c0 fs/io-wq.c:372 io_wq_worker_sleeping+0xbe/0x140 fs/io-wq.c:701 sched_submit_work kernel/sched/core.c:6295 [inline] schedule+0x67/0x1f0 kernel/sched/core.c:6323 schedule_timeout+0xac/0x300 kernel/time/timer.c:1857 wait_woken+0xca/0x1b0 kernel/sched/wait.c:460 unix_msg_wait_data net/unix/unix_bpf.c:32 [inline] unix_bpf_recvmsg+0x7f9/0xe20 net/unix/unix_bpf.c:77 unix_stream_recvmsg+0x214/0x2c0 net/unix/af_unix.c:2832 sock_recvmsg_nosec net/socket.c:944 [inline] sock_recvmsg net/socket.c:962 [inline] sock_read_iter+0x3a7/0x4d0 net/socket.c:1035 call_read_iter include/linux/fs.h:2156 [inline] io_iter_do_read fs/io_uring.c:3501 [inline] io_read fs/io_uring.c:3558 [inline] io_issue_sqe+0x144c/0x9590 fs/io_uring.c:6671 io_wq_submit_work+0x2d8/0x790 fs/io_uring.c:6836 io_worker_handle_work+0x808/0xdd0 fs/io-wq.c:574 io_wqe_worker+0x395/0x870 fs/io-wq.c:630 ret_from_fork+0x1f/0x30 We can safely drop the lock before doing work creation, making the two contexts the same in that regard. Reported-by: syzbot+b18b8be69df33a3918e9@syzkaller.appspotmail.com Fixes: 71a85387546e ("io-wq: check for wq exit after adding new worker task_work") Signed-off-by: Jens Axboe --- fs/io-wq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io-wq.c b/fs/io-wq.c index 8d2bb818a3bb0..5c4f582d6549a 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -395,7 +395,9 @@ static void io_wqe_dec_running(struct io_worker *worker) if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { atomic_inc(&acct->nr_running); atomic_inc(&wqe->wq->worker_refs); + raw_spin_unlock(&wqe->lock); io_queue_worker_create(worker, acct, create_worker_cb); + raw_spin_lock(&wqe->lock); } } From bc2f39a6252ee40d9bfc2743d4437d420aec5f6e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Nov 2021 11:13:50 +0300 Subject: [PATCH 114/361] iavf: missing unlocks in iavf_watchdog_task() This code was re-organized and there some unlocks missing now. Fixes: 898ef1cb1cb2 ("iavf: Combine init and watchdog state machines") Signed-off-by: Dan Carpenter Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index cfdbf8c08d18b..884a19c515433 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2046,6 +2046,7 @@ static void iavf_watchdog_task(struct work_struct *work) } adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; + mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(10)); @@ -2076,9 +2077,8 @@ static void iavf_watchdog_task(struct work_struct *work) iavf_detect_recover_hung(&adapter->vsi); break; case __IAVF_REMOVE: - mutex_unlock(&adapter->crit_lock); - return; default: + mutex_unlock(&adapter->crit_lock); return; } From fe523d7c9a8332855376ad5eb1aa301091129ba4 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 1 Dec 2021 09:14:34 +0100 Subject: [PATCH 115/361] iavf: do not override the adapter state in the watchdog task (again) The watchdog task incorrectly changes the state to __IAVF_RESETTING, instead of letting the reset task take care of that. This was already resolved by commit 22c8fd71d3a5 ("iavf: do not override the adapter state in the watchdog task") but the problem was reintroduced by the recent code refactoring in commit 45eebd62999d ("iavf: Refactor iavf state machine tracking"). Fixes: 45eebd62999d ("iavf: Refactor iavf state machine tracking") Signed-off-by: Stefan Assmann Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 884a19c515433..4e7c04047f917 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2085,7 +2085,6 @@ static void iavf_watchdog_task(struct work_struct *work) /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { - iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; From e386dfc56f837da66d00a078e5314bc8382fab83 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 10 Dec 2021 14:00:15 -0800 Subject: [PATCH 116/361] fget: clarify and improve __fget_files() implementation Commit 054aa8d439b9 ("fget: check that the fd still exists after getting a ref to it") fixed a race with getting a reference to a file just as it was being closed. It was a fairly minimal patch, and I didn't think re-checking the file pointer lookup would be a measurable overhead, since it was all right there and cached. But I was wrong, as pointed out by the kernel test robot. The 'poll2' case of the will-it-scale.per_thread_ops benchmark regressed quite noticeably. Admittedly it seems to be a very artificial test: doing "poll()" system calls on regular files in a very tight loop in multiple threads. That means that basically all the time is spent just looking up file descriptors without ever doing anything useful with them (not that doing 'poll()' on a regular file is useful to begin with). And as a result it shows the extra "re-check fd" cost as a sore thumb. Happily, the regression is fixable by just writing the code to loook up the fd to be better and clearer. There's still a cost to verify the file pointer, but now it's basically in the noise even for that benchmark that does nothing else - and the code is more understandable and has better comments too. [ Side note: this patch is also a classic case of one that looks very messy with the default greedy Myers diff - it's much more legible with either the patience of histogram diff algorithm ] Link: https://lore.kernel.org/lkml/20211210053743.GA36420@xsang-OptiPlex-9020/ Link: https://lore.kernel.org/lkml/20211213083154.GA20853@linux.intel.com/ Reported-by: kernel test robot Tested-by: Carel Si Cc: Jann Horn Cc: Miklos Szeredi Signed-off-by: Linus Torvalds --- fs/file.c | 72 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/fs/file.c b/fs/file.c index ad4a8bf3cf109..97d212a9b8144 100644 --- a/fs/file.c +++ b/fs/file.c @@ -841,28 +841,68 @@ void do_close_on_exec(struct files_struct *files) spin_unlock(&files->file_lock); } -static struct file *__fget_files(struct files_struct *files, unsigned int fd, - fmode_t mask, unsigned int refs) +static inline struct file *__fget_files_rcu(struct files_struct *files, + unsigned int fd, fmode_t mask, unsigned int refs) { - struct file *file; + for (;;) { + struct file *file; + struct fdtable *fdt = rcu_dereference_raw(files->fdt); + struct file __rcu **fdentry; - rcu_read_lock(); -loop: - file = files_lookup_fd_rcu(files, fd); - if (file) { - /* File object ref couldn't be taken. - * dup2() atomicity guarantee is the reason - * we loop to catch the new file (or NULL pointer) + if (unlikely(fd >= fdt->max_fds)) + return NULL; + + fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds); + file = rcu_dereference_raw(*fdentry); + if (unlikely(!file)) + return NULL; + + if (unlikely(file->f_mode & mask)) + return NULL; + + /* + * Ok, we have a file pointer. However, because we do + * this all locklessly under RCU, we may be racing with + * that file being closed. + * + * Such a race can take two forms: + * + * (a) the file ref already went down to zero, + * and get_file_rcu_many() fails. Just try + * again: */ - if (file->f_mode & mask) - file = NULL; - else if (!get_file_rcu_many(file, refs)) - goto loop; - else if (files_lookup_fd_raw(files, fd) != file) { + if (unlikely(!get_file_rcu_many(file, refs))) + continue; + + /* + * (b) the file table entry has changed under us. + * Note that we don't need to re-check the 'fdt->fd' + * pointer having changed, because it always goes + * hand-in-hand with 'fdt'. + * + * If so, we need to put our refs and try again. + */ + if (unlikely(rcu_dereference_raw(files->fdt) != fdt) || + unlikely(rcu_dereference_raw(*fdentry) != file)) { fput_many(file, refs); - goto loop; + continue; } + + /* + * Ok, we have a ref to the file, and checked that it + * still exists. + */ + return file; } +} + +static struct file *__fget_files(struct files_struct *files, unsigned int fd, + fmode_t mask, unsigned int refs) +{ + struct file *file; + + rcu_read_lock(); + file = __fget_files_rcu(files, fd, mask, refs); rcu_read_unlock(); return file; From aa50faff4416c869b52dff68a937c84d29e12f4b Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Wed, 1 Dec 2021 22:34:02 +0100 Subject: [PATCH 117/361] PCI: mt7621: Convert driver into 'bool' The driver is not ready yet to be compiled as a module since it depends on some symbols not exported on MIPS. We have the following current problems: Building mips:allmodconfig ... failed -------------- Error log: ERROR: modpost: missing MODULE_LICENSE() in drivers/pci/controller/pcie-mt7621.o ERROR: modpost: "mips_cm_unlock_other" [drivers/pci/controller/pcie-mt7621.ko] undefined! ERROR: modpost: "mips_cpc_base" [drivers/pci/controller/pcie-mt7621.ko] undefined! ERROR: modpost: "mips_cm_lock_other" [drivers/pci/controller/pcie-mt7621.ko] undefined! ERROR: modpost: "mips_cm_is64" [drivers/pci/controller/pcie-mt7621.ko] undefined! ERROR: modpost: "mips_gcr_base" [drivers/pci/controller/pcie-mt7621.ko] undefined! Temporarily move from 'tristate' to 'bool' until a better solution is ready. Also RALINK is redundant because SOC_MT7621 already depends on it. Hence, simplify condition. Fixes: 2bdd5238e756 ("PCI: mt7621: Add MediaTek MT7621 PCIe host controller driver"). Signed-off-by: Sergio Paracuellos Reviewed-and-Tested-by: Guenter Roeck Signed-off-by: Linus Torvalds --- drivers/pci/controller/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 93b1411105373..7fc5135ffbbfd 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -332,8 +332,8 @@ config PCIE_APPLE If unsure, say Y if you have an Apple Silicon system. config PCIE_MT7621 - tristate "MediaTek MT7621 PCIe Controller" - depends on (RALINK && SOC_MT7621) || (MIPS && COMPILE_TEST) + bool "MediaTek MT7621 PCIe Controller" + depends on SOC_MT7621 || (MIPS && COMPILE_TEST) select PHY_MT7621_PCI default SOC_MT7621 help From d341b427c3c3fd6a58263ce01e01700d16861c28 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 12 Dec 2021 02:11:45 +0300 Subject: [PATCH 118/361] ASoC: tegra: Add DAPM switches for headphones and mic jack UCM of Acer Chromebook (Nyan) uses DAPM switches of headphones and mic jack. These switches were lost by accident during unification of the machine drivers, restore them. Cc: Fixes: cc8f70f ("ASoC: tegra: Unify ASoC machine drivers") Reported-by: Thomas Graichen # T124 Nyan Big Tested-by: Thomas Graichen # T124 Nyan Big Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20211211231146.6137-1-digetx@gmail.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_asoc_machine.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/tegra/tegra_asoc_machine.c b/sound/soc/tegra/tegra_asoc_machine.c index b95438c3dbf7e..f3e86bd714b45 100644 --- a/sound/soc/tegra/tegra_asoc_machine.c +++ b/sound/soc/tegra/tegra_asoc_machine.c @@ -116,6 +116,8 @@ static const struct snd_kcontrol_new tegra_machine_controls[] = { SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Internal Mic 1"), SOC_DAPM_PIN_SWITCH("Internal Mic 2"), + SOC_DAPM_PIN_SWITCH("Headphones"), + SOC_DAPM_PIN_SWITCH("Mic Jack"), }; int tegra_asoc_machine_init(struct snd_soc_pcm_runtime *rtd) From db635ba4fadf3ba676d07537f3b3f58166aa7b0e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 12 Dec 2021 02:11:46 +0300 Subject: [PATCH 119/361] ASoC: tegra: Restore headphones jack name on Nyan Big UCM of Acer Chromebook (Nyan) uses a different name for the headphones jack. The name was changed during unification of the machine drivers and UCM fails now to load because of that. Restore the old jack name. Cc: Fixes: cc8f70f ("ASoC: tegra: Unify ASoC machine drivers") Reported-by: Thomas Graichen # T124 Nyan Big Tested-by: Thomas Graichen # T124 Nyan Big Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20211211231146.6137-2-digetx@gmail.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_asoc_machine.c | 9 ++++++++- sound/soc/tegra/tegra_asoc_machine.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/soc/tegra/tegra_asoc_machine.c b/sound/soc/tegra/tegra_asoc_machine.c index f3e86bd714b45..a73404879aa18 100644 --- a/sound/soc/tegra/tegra_asoc_machine.c +++ b/sound/soc/tegra/tegra_asoc_machine.c @@ -124,10 +124,16 @@ int tegra_asoc_machine_init(struct snd_soc_pcm_runtime *rtd) { struct snd_soc_card *card = rtd->card; struct tegra_machine *machine = snd_soc_card_get_drvdata(card); + const char *jack_name; int err; if (machine->gpiod_hp_det && machine->asoc->add_hp_jack) { - err = snd_soc_card_jack_new(card, "Headphones Jack", + if (machine->asoc->hp_jack_name) + jack_name = machine->asoc->hp_jack_name; + else + jack_name = "Headphones Jack"; + + err = snd_soc_card_jack_new(card, jack_name, SND_JACK_HEADPHONE, &tegra_machine_hp_jack, tegra_machine_hp_jack_pins, @@ -660,6 +666,7 @@ static struct snd_soc_card snd_soc_tegra_max98090 = { static const struct tegra_asoc_data tegra_max98090_data = { .mclk_rate = tegra_machine_mclk_rate_12mhz, .card = &snd_soc_tegra_max98090, + .hp_jack_name = "Headphones", .add_common_dapm_widgets = true, .add_common_controls = true, .add_common_snd_ops = true, diff --git a/sound/soc/tegra/tegra_asoc_machine.h b/sound/soc/tegra/tegra_asoc_machine.h index d6a8d13205516..6f795d7dff7c1 100644 --- a/sound/soc/tegra/tegra_asoc_machine.h +++ b/sound/soc/tegra/tegra_asoc_machine.h @@ -14,6 +14,7 @@ struct snd_soc_pcm_runtime; struct tegra_asoc_data { unsigned int (*mclk_rate)(unsigned int srate); const char *codec_dev_name; + const char *hp_jack_name; struct snd_soc_card *card; unsigned int mclk_id; bool hp_jack_gpio_active_low; From b0cdc5dbcf2ba0d99785da5aabf1b17943805b8a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 11 Dec 2021 17:11:12 +0100 Subject: [PATCH 120/361] mptcp: never allow the PM to close a listener subflow Currently, when deleting an endpoint the netlink PM treverses all the local MPTCP sockets, regardless of their status. If an MPTCP listener socket is bound to the IP matching the delete endpoint, the listener TCP socket will be closed. That is unexpected, the PM should only affect data subflows. Additionally, syzbot was able to trigger a NULL ptr dereference due to the above: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] CPU: 1 PID: 6550 Comm: syz-executor122 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__lock_acquire+0xd7d/0x54a0 kernel/locking/lockdep.c:4897 Code: 0f 0e 41 be 01 00 00 00 0f 86 c8 00 00 00 89 05 69 cc 0f 0e e9 bd 00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 da 48 c1 ea 03 <80> 3c 02 00 0f 85 f3 2f 00 00 48 81 3b 20 75 17 8f 0f 84 52 f3 ff RSP: 0018:ffffc90001f2f818 EFLAGS: 00010016 RAX: dffffc0000000000 RBX: 0000000000000018 RCX: 0000000000000000 RDX: 0000000000000003 RSI: 0000000000000000 RDI: 0000000000000001 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: 000000000000000a R12: 0000000000000000 R13: ffff88801b98d700 R14: 0000000000000000 R15: 0000000000000001 FS: 00007f177cd3d700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f177cd1b268 CR3: 000000001dd55000 CR4: 0000000000350ee0 Call Trace: lock_acquire kernel/locking/lockdep.c:5637 [inline] lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5602 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x39/0x50 kernel/locking/spinlock.c:162 finish_wait+0xc0/0x270 kernel/sched/wait.c:400 inet_csk_wait_for_connect net/ipv4/inet_connection_sock.c:464 [inline] inet_csk_accept+0x7de/0x9d0 net/ipv4/inet_connection_sock.c:497 mptcp_accept+0xe5/0x500 net/mptcp/protocol.c:2865 inet_accept+0xe4/0x7b0 net/ipv4/af_inet.c:739 mptcp_stream_accept+0x2e7/0x10e0 net/mptcp/protocol.c:3345 do_accept+0x382/0x510 net/socket.c:1773 __sys_accept4_file+0x7e/0xe0 net/socket.c:1816 __sys_accept4+0xb0/0x100 net/socket.c:1846 __do_sys_accept net/socket.c:1864 [inline] __se_sys_accept net/socket.c:1861 [inline] __x64_sys_accept+0x71/0xb0 net/socket.c:1861 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f177cd8b8e9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f177cd3d308 EFLAGS: 00000246 ORIG_RAX: 000000000000002b RAX: ffffffffffffffda RBX: 00007f177ce13408 RCX: 00007f177cd8b8e9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007f177ce13400 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f177ce1340c R13: 00007f177cde1004 R14: 6d705f706374706d R15: 0000000000022000 Fix the issue explicitly skipping MPTCP socket in TCP_LISTEN status. Reported-and-tested-by: syzbot+e4d843bb96a9431e6331@syzkaller.appspotmail.com Reviewed-by: Mat Martineau Fixes: 740d798e8767 ("mptcp: remove id 0 address") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/ebc7594cdd420d241fb2172ddb8542ba64717657.1639238695.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 7b96be1e9f14a..f523051f5aef3 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -700,6 +700,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, msk_owned_by_me(msk); + if (sk->sk_state == TCP_LISTEN) + return; + if (!rm_list->nr) return; From 2fe24343922e0428fb68674a4fae099171141bc7 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 13 Dec 2021 18:10:48 +0800 Subject: [PATCH 121/361] scsi: pm8001: Fix phys_to_virt() usage on dma_addr_t The driver supports a "direct" mode of operation, where the SMP req frame is directly copied into the command payload (and vice-versa for the SMP resp). To get at the SMP req frame data in the scatterlist the driver uses phys_to_virt() on the DMA mapped memory dma_addr_t . This is broken, and subsequently crashes as follows when an IOMMU is enabled: Unable to handle kernel paging request at virtual address ffff0000fcebfb00 ... pc : pm80xx_chip_smp_req+0x2d0/0x3d0 lr : pm80xx_chip_smp_req+0xac/0x3d0 pm80xx_chip_smp_req+0x2d0/0x3d0 pm8001_task_exec.constprop.0+0x368/0x520 pm8001_queue_command+0x1c/0x30 smp_execute_task_sg+0xdc/0x204 sas_discover_expander.part.0+0xac/0x6cc sas_discover_root_expander+0x8c/0x150 sas_discover_domain+0x3ac/0x6a0 process_one_work+0x1d0/0x354 worker_thread+0x13c/0x470 kthread+0x17c/0x190 ret_from_fork+0x10/0x20 Code: 371806e1 910006d6 6b16033f 54000249 (38766b05) ---[ end trace b91d59aaee98ea2d ]--- note: kworker/u192:0[7] exited with preempt_count 1 Instead use kmap_atomic(). -- Difference to v1: - use kmap_atomic() in both locations Difference to v2: - add whitespace around arithmetic (Damien) Link: https://lore.kernel.org/r/1639390248-213603-1-git-send-email-john.garry@huawei.com Reviewed-by: Damien Le Moal Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm80xx_hwi.c | 38 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index b9f6d83ff380c..2101fc5761c3c 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3053,7 +3053,6 @@ mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) struct smp_completion_resp *psmpPayload; struct task_status_struct *ts; struct pm8001_device *pm8001_dev; - char *pdma_respaddr = NULL; psmpPayload = (struct smp_completion_resp *)(piomb + 4); status = le32_to_cpu(psmpPayload->status); @@ -3080,19 +3079,23 @@ mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) if (pm8001_dev) atomic_dec(&pm8001_dev->running_req); if (pm8001_ha->smp_exp_mode == SMP_DIRECT) { + struct scatterlist *sg_resp = &t->smp_task.smp_resp; + u8 *payload; + void *to; + pm8001_dbg(pm8001_ha, IO, "DIRECT RESPONSE Length:%d\n", param); - pdma_respaddr = (char *)(phys_to_virt(cpu_to_le64 - ((u64)sg_dma_address - (&t->smp_task.smp_resp)))); + to = kmap_atomic(sg_page(sg_resp)); + payload = to + sg_resp->offset; for (i = 0; i < param; i++) { - *(pdma_respaddr+i) = psmpPayload->_r_a[i]; + *(payload + i) = psmpPayload->_r_a[i]; pm8001_dbg(pm8001_ha, IO, "SMP Byte%d DMA data 0x%x psmp 0x%x\n", - i, *(pdma_respaddr + i), + i, *(payload + i), psmpPayload->_r_a[i]); } + kunmap_atomic(to); } break; case IO_ABORTED: @@ -4236,14 +4239,14 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, struct sas_task *task = ccb->task; struct domain_device *dev = task->dev; struct pm8001_device *pm8001_dev = dev->lldd_dev; - struct scatterlist *sg_req, *sg_resp; + struct scatterlist *sg_req, *sg_resp, *smp_req; u32 req_len, resp_len; struct smp_req smp_cmd; u32 opc; struct inbound_queue_table *circularQ; - char *preq_dma_addr = NULL; - __le64 tmp_addr; u32 i, length; + u8 *payload; + u8 *to; memset(&smp_cmd, 0, sizeof(smp_cmd)); /* @@ -4280,8 +4283,9 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, pm8001_ha->smp_exp_mode = SMP_INDIRECT; - tmp_addr = cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_req)); - preq_dma_addr = (char *)phys_to_virt(tmp_addr); + smp_req = &task->smp_task.smp_req; + to = kmap_atomic(sg_page(smp_req)); + payload = to + smp_req->offset; /* INDIRECT MODE command settings. Use DMA */ if (pm8001_ha->smp_exp_mode == SMP_INDIRECT) { @@ -4289,7 +4293,7 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, /* for SPCv indirect mode. Place the top 4 bytes of * SMP Request header here. */ for (i = 0; i < 4; i++) - smp_cmd.smp_req16[i] = *(preq_dma_addr + i); + smp_cmd.smp_req16[i] = *(payload + i); /* exclude top 4 bytes for SMP req header */ smp_cmd.long_smp_req.long_req_addr = cpu_to_le64((u64)sg_dma_address @@ -4320,20 +4324,20 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, pm8001_dbg(pm8001_ha, IO, "SMP REQUEST DIRECT MODE\n"); for (i = 0; i < length; i++) if (i < 16) { - smp_cmd.smp_req16[i] = *(preq_dma_addr+i); + smp_cmd.smp_req16[i] = *(payload + i); pm8001_dbg(pm8001_ha, IO, "Byte[%d]:%x (DMA data:%x)\n", i, smp_cmd.smp_req16[i], - *(preq_dma_addr)); + *(payload)); } else { - smp_cmd.smp_req[i] = *(preq_dma_addr+i); + smp_cmd.smp_req[i] = *(payload + i); pm8001_dbg(pm8001_ha, IO, "Byte[%d]:%x (DMA data:%x)\n", i, smp_cmd.smp_req[i], - *(preq_dma_addr)); + *(payload)); } } - + kunmap_atomic(to); build_smp_cmd(pm8001_dev->device_id, smp_cmd.tag, &smp_cmd, pm8001_ha->smp_exp_mode, length); rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &smp_cmd, From fea3fdf975dd9f3e5248afaab8fe023db313f005 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 14 Dec 2021 09:41:26 +0800 Subject: [PATCH 122/361] drm/ast: potential dereference of null pointer The return value of kzalloc() needs to be checked. To avoid use of null pointer '&ast_state->base' in case of the failure of alloc. Fixes: f0adbc382b8b ("drm/ast: Allocate initial CRTC state of the correct size") Signed-off-by: Jiasheng Jiang Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20211214014126.2211535-1-jiasheng@iscas.ac.cn --- drivers/gpu/drm/ast/ast_mode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 1e30eaeb0e1b3..d5c98f79d58d3 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1121,7 +1121,10 @@ static void ast_crtc_reset(struct drm_crtc *crtc) if (crtc->state) crtc->funcs->atomic_destroy_state(crtc, crtc->state); - __drm_atomic_helper_crtc_reset(crtc, &ast_state->base); + if (ast_state) + __drm_atomic_helper_crtc_reset(crtc, &ast_state->base); + else + __drm_atomic_helper_crtc_reset(crtc, NULL); } static struct drm_crtc_state * From 83b67041f3eaf33f98a075249aa7f4c7617c2f85 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Nov 2021 10:43:48 +0100 Subject: [PATCH 123/361] USB: serial: cp210x: fix CP2105 GPIO registration When generalising GPIO support and adding support for CP2102N, the GPIO registration for some CP2105 devices accidentally broke. Specifically, when all the pins of a port are in "modem" mode, and thus unavailable for GPIO use, the GPIO chip would now be registered without having initialised the number of GPIO lines. This would in turn be rejected by gpiolib and some errors messages would be printed (but importantly probe would still succeed). Fix this by initialising the number of GPIO lines before registering the GPIO chip. Note that as for the other device types, and as when all CP2105 pins are muxed for LED function, the GPIO chip is registered also when no pins are available for GPIO use. Reported-by: Maarten Brock Link: https://lore.kernel.org/r/5eb560c81d2ea1a2b4602a92d9f48a89@vanmierlo.com Fixes: c8acfe0aadbe ("USB: serial: cp210x: implement GPIO support for CP2102N") Cc: stable@vger.kernel.org # 4.19 Cc: Karoly Pados Link: https://lore.kernel.org/r/20211126094348.31698-1-johan@kernel.org Reviewed-by: Greg Kroah-Hartman Tested-by: Maarten Brock Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 7705328034cab..8a60c0d56863e 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -1635,6 +1635,8 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) /* 2 banks of GPIO - One for the pins taken from each serial port */ if (intf_num == 0) { + priv->gc.ngpio = 2; + if (mode.eci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; @@ -1645,8 +1647,9 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_ECI_GPIO_MODE_MASK) >> CP210X_ECI_GPIO_MODE_OFFSET); - priv->gc.ngpio = 2; } else if (intf_num == 1) { + priv->gc.ngpio = 3; + if (mode.sci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; @@ -1657,7 +1660,6 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_SCI_GPIO_MODE_MASK) >> CP210X_SCI_GPIO_MODE_OFFSET); - priv->gc.ngpio = 3; } else { return -ENODEV; } From 2b503c8598d1b232e7fc7526bce9326d92331541 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 10 Dec 2021 11:07:14 +0100 Subject: [PATCH 124/361] USB: serial: option: add Telit FN990 compositions Add the following Telit FN990 compositions: 0x1070: tty, adb, rmnet, tty, tty, tty, tty 0x1071: tty, adb, mbim, tty, tty, tty, tty 0x1072: rndis, tty, adb, tty, tty, tty, tty 0x1073: tty, adb, ecm, tty, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20211210100714.22587-1-dnlplm@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 546fce4617a85..42420bfc983c2 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1219,6 +1219,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff), /* Telit LN920 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff), /* Telit FN990 (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff), /* Telit FN990 (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff), /* Telit FN990 (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 53b3495273282aa844c4613d19c3b30558c70c84 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Thu, 9 Dec 2021 20:41:24 -0800 Subject: [PATCH 125/361] drm/i915/display: Fix an unsigned subtraction which can never be negative. smatch warning: drivers/gpu/drm/i915/display/intel_dmc.c:601 parse_dmc_fw() warn: unsigned 'fw->size - offset' is never less than zero Firmware size is size_t and offset is u32. So the subtraction is unsigned which can never be less than zero. Fixes: 3d5928a168a9 ("drm/i915/xelpd: Pipe A DMC plugging") Signed-off-by: Harshit Mogalapalli Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20211210044129.12422-1-harshit.m.mogalapalli@oracle.com (cherry picked from commit 87bb2a410dcfb617b88e4695edf4beb6336dc314) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 2dc9d632969db..aef69522f0be3 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -596,7 +596,7 @@ static void parse_dmc_fw(struct drm_i915_private *dev_priv, continue; offset = readcount + dmc->dmc_info[id].dmc_offset * 4; - if (fw->size - offset < 0) { + if (offset > fw->size) { drm_err(&dev_priv->drm, "Reading beyond the fw_size\n"); continue; } From d296a74b7b59ff9116236c17edb25f26935dbf70 Mon Sep 17 00:00:00 2001 From: Bradley Scott Date: Mon, 13 Dec 2021 10:49:39 -0500 Subject: [PATCH 126/361] ALSA: hda/realtek: Amp init fixup for HP ZBook 15 G6 HP ZBook 15 G6 (SSID 103c:860f) needs the same speaker amplifier initialization as used on several other HP laptops using ALC285. Signed-off-by: Bradley Scott Cc: Link: https://lore.kernel.org/r/20211213154938.503201-1-Bradley.Scott@zebra.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3599f4c85ebf7..d162662fe6846 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8660,6 +8660,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), + SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), From aa72394667e5cea3547e4c41ddff7ca8c632d764 Mon Sep 17 00:00:00 2001 From: Bradley Scott Date: Mon, 13 Dec 2021 11:22:47 -0500 Subject: [PATCH 127/361] ALSA: hda/realtek: Add new alc285-hp-amp-init model Adds a new "alc285-hp-amp-init" model that can be used to apply the ALC285 HP speaker amplifier initialization fixup to devices that are not already known by passing "hda_model=alc285-hp-amp-init" to the snd-sof-intel-hda-common module or "model=alc285-hp-amp-init" to the snd-hda-intel module, depending on which is being used. Signed-off-by: Bradley Scott Cc: Link: https://lore.kernel.org/r/20211213162246.506838-1-bscott@teksavvy.com Signed-off-by: Takashi Iwai --- Documentation/sound/hd-audio/models.rst | 2 ++ sound/pci/hda/patch_realtek.c | 1 + 2 files changed, 3 insertions(+) diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst index 0ea967d345838..d25335993e553 100644 --- a/Documentation/sound/hd-audio/models.rst +++ b/Documentation/sound/hd-audio/models.rst @@ -326,6 +326,8 @@ usi-headset Headset support on USI machines dual-codecs Lenovo laptops with dual codecs +alc285-hp-amp-init + HP laptops which require speaker amplifier initialization (ALC285) ALC680 ====== diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d162662fe6846..fc41f3e8ddc3c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9124,6 +9124,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"}, + {.id = ALC285_FIXUP_HP_GPIO_AMP_INIT, .name = "alc285-hp-amp-init"}, {} }; #define ALC225_STANDARD_PINS \ From 1fe98f5690c4219d419ea9cc190f94b3401cf324 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 2 Dec 2021 13:45:33 +0100 Subject: [PATCH 128/361] mac80211: send ADDBA requests using the tid/queue of the aggregation session Sending them out on a different queue can cause a race condition where a number of packets in the queue may be discarded by the receiver, because the ADDBA request is sent too early. This affects any driver with software A-MPDU setup which does not allocate packet seqno in hardware on tx, regardless of whether iTXQ is used or not. The only driver I've seen that explicitly deals with this issue internally is mwl8k. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20211202124533.80388-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c1558dd2d2443..58761ca7da3c5 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -106,7 +106,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, tid); } void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn) From 37d33114240ede043c42463a6347f68ed72d6904 Mon Sep 17 00:00:00 2001 From: Finn Behrens Date: Wed, 1 Dec 2021 13:49:18 +0100 Subject: [PATCH 129/361] nl80211: remove reload flag from regulatory_request This removes the previously unused reload flag, which was introduced in 1eda919126b4. The request is handled as NL80211_REGDOM_SET_BY_CORE, which is parsed unconditionally. Reported-by: kernel test robot Reported-by: Nathan Chancellor Fixes: 1eda919126b4 ("nl80211: reset regdom when reloading regdb") Link: https://lore.kernel.org/all/YaZuKYM5bfWe2Urn@archlinux-ax161/ Signed-off-by: Finn Behrens Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/YadvTolO8rQcNCd/@gimli.kloenk.dev Signed-off-by: Johannes Berg --- include/net/regulatory.h | 1 - net/wireless/reg.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 0cf9335431e07..47f06f6f5a67c 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -83,7 +83,6 @@ struct regulatory_request { enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; - bool reload; enum environment_cap country_ie_env; struct list_head list; }; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 61f1bf1bc4a73..8148a3b5f6071 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1134,9 +1134,8 @@ int reg_reload_regdb(void) request->wiphy_idx = WIPHY_IDX_INVALID; request->alpha2[0] = current_regdomain->alpha2[0]; request->alpha2[1] = current_regdomain->alpha2[1]; - request->initiator = NL80211_USER_REG_HINT_USER; + request->initiator = NL80211_REGDOM_SET_BY_CORE; request->user_reg_hint_type = NL80211_USER_REG_HINT_USER; - request->reload = true; reg_process_hint(request); @@ -2712,8 +2711,7 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - (treatment == REG_REQ_ALREADY_SET && - !user_request->reload)) + treatment == REG_REQ_ALREADY_SET) return REG_REQ_IGNORE; user_request->intersect = treatment == REG_REQ_INTERSECT; From 06c41bda0ea14aa7fba932a9613c4ee239682cf0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 2 Dec 2021 15:26:25 +0200 Subject: [PATCH 130/361] mac80211: agg-tx: don't schedule_and_wake_txq() under sta->lock When we call ieee80211_agg_start_txq(), that will in turn call schedule_and_wake_txq(). Called from ieee80211_stop_tx_ba_cb() this is done under sta->lock, which leads to certain circular lock dependencies, as reported by Chris Murphy: https://lore.kernel.org/r/CAJCQCtSXJ5qA4bqSPY=oLRMbv-irihVvP7A2uGutEbXQVkoNaw@mail.gmail.com In general, ieee80211_agg_start_txq() is usually not called with sta->lock held, only in this one place. But it's always called with sta->ampdu_mlme.mtx held, and that's therefore clearly sufficient. Change ieee80211_stop_tx_ba_cb() to also call it without the sta->lock held, by factoring it out of ieee80211_remove_tid_tx() (which is only called in this one place). This breaks the locking chain and makes it less likely that we'll have similar locking chain problems in the future. Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Reported-by: Chris Murphy Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211202152554.f519884c8784.I555fef8e67d93fff3d9a304886c4a9f8b322e591@changeid Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 58761ca7da3c5..74a878f213d3e 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2020 Intel Corporation + * Copyright (C) 2018 - 2021 Intel Corporation */ #include @@ -213,6 +213,8 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi; + lockdep_assert_held(&sta->ampdu_mlme.mtx); + if (!txq) return; @@ -290,7 +292,6 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) ieee80211_assign_tid_tx(sta, tid, NULL); ieee80211_agg_splice_finish(sta->sdata, tid); - ieee80211_agg_start_txq(sta, tid, false); kfree_rcu(tid_tx, rcu_head); } @@ -889,6 +890,7 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, { struct ieee80211_sub_if_data *sdata = sta->sdata; bool send_delba = false; + bool start_txq = false; ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", sta->sta.addr, tid); @@ -906,10 +908,14 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, send_delba = true; ieee80211_remove_tid_tx(sta, tid); + start_txq = true; unlock_sta: spin_unlock_bh(&sta->lock); + if (start_txq) + ieee80211_agg_start_txq(sta, tid, false); + if (send_delba) ieee80211_send_delba(sdata, sta->sta.addr, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); From e08ebd6d7b90ae81f21425ca39136f5b2272580f Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 2 Dec 2021 15:28:54 +0200 Subject: [PATCH 131/361] cfg80211: Acquire wiphy mutex on regulatory work The function cfg80211_reg_can_beacon_relax() expects wiphy mutex to be held when it is being called. However, when reg_leave_invalid_chans() is called the mutex is not held. Fix it by acquiring the lock before calling the function. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211202152831.527686cda037.I40ad9372a47cbad53b4aae7b5a6ccc0dc3fddf8b@changeid Signed-off-by: Johannes Berg --- net/wireless/reg.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 8148a3b5f6071..f8f01a3e020ba 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2359,6 +2359,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) struct cfg80211_chan_def chandef = {}; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); enum nl80211_iftype iftype; + bool ret; wdev_lock(wdev); iftype = wdev->iftype; @@ -2408,7 +2409,11 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: - return cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); + wiphy_lock(wiphy); + ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); + wiphy_unlock(wiphy); + + return ret; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: return cfg80211_chandef_usable(wiphy, &chandef, From 768c0b19b50665e337c96858aa2b7928d6dcf756 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 11 Dec 2021 20:10:24 +0100 Subject: [PATCH 132/361] mac80211: validate extended element ID is present Before attempting to parse an extended element, verify that the extended element ID is present. Fixes: 41cbb0f5a295 ("mac80211: add support for HE") Reported-by: syzbot+59bdff68edce82e393b6@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20211211201023.f30a1b128c07.I5cacc176da94ba316877c6e10fe3ceec8b4dbd7d@changeid Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/util.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 43df2f0c5db9c..6c2934854d3ce 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -943,7 +943,12 @@ static void ieee80211_parse_extension_element(u32 *crc, struct ieee802_11_elems *elems) { const void *data = elem->data + 1; - u8 len = elem->datalen - 1; + u8 len; + + if (!elem->datalen) + return; + + len = elem->datalen - 1; switch (elem->data[0]) { case WLAN_EID_EXT_HE_MU_EDCA: From 511ab0c1dfb260a6b17b8771109e8d63474473a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 15:32:46 +0200 Subject: [PATCH 133/361] mac80211: fix lookup when adding AddBA extension element We should be doing the HE capabilities lookup based on the full interface type so if P2P doesn't have HE but client has it doesn't get confused. Fix that. Fixes: 2ab45876756f ("mac80211: add support for the ADDBA extension element") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.010fc1d61137.If3a468145f29d670cb00a693bed559d8290ba693@changeid Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 470ff0ce3dc76..7d2925bb966e0 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ /** @@ -191,7 +191,8 @@ static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata, sband = ieee80211_get_sband(sdata); if (!sband) return; - he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type); + he_cap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); if (!he_cap) return; From f22d981386d12d1513bd2720fb4387b469124d4b Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 29 Nov 2021 15:32:45 +0200 Subject: [PATCH 134/361] mac80211: Fix the size used for building probe request Instead of using the hard-coded value of '100' use the correct scan IEs length as calculated during HW registration to mac80211. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.0a82d6891719.I8ded1f2e0bccb9e71222c945666bcd86537f2e35@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6c2934854d3ce..fe2903b84cebf 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2068,7 +2068,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, chandef.chan = chan; skb = ieee80211_probereq_get(&local->hw, src, ssid, ssid_len, - 100 + ie_len); + local->scan_ies_len + ie_len); if (!skb) return NULL; From 4dde3c3627b52ca515a34f6f4de3898224aa1dd3 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Mon, 29 Nov 2021 15:32:42 +0200 Subject: [PATCH 135/361] mac80211: update channel context before station state Currently channel context is updated only after station got an update about new assoc state, this results in station using the old channel context. Fix this by moving the update channel context before updating station, enabling the driver to immediately use the updated channel context in the new assoc state. Signed-off-by: Mordechay Goodstein Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.1c80c17ffd8a.I94ae31378b363c1182cfdca46c4b7e7165cff984@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 840ad1a860fa2..537535a88990c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -667,6 +667,15 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) list_add_tail_rcu(&sta->list, &local->sta_list); + /* update channel context before notifying the driver about state + * change, this enables driver using the updated channel context right away. + */ + if (sta->sta_state >= IEEE80211_STA_ASSOC) { + ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); + } + /* notify driver */ err = sta_info_insert_drv_state(local, sdata, sta); if (err) @@ -674,12 +683,6 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_INSERTED); - if (sta->sta_state >= IEEE80211_STA_ASSOC) { - ieee80211_recalc_min_chandef(sta->sdata); - if (!sta->sta.support_p2p_ps) - ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); - } - /* accept BA sessions now */ clear_sta_flag(sta, WLAN_STA_BLOCK_BA); From db7205af049d230e7e0abf61c1e74c1aab40f390 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 15:32:39 +0200 Subject: [PATCH 136/361] mac80211: mark TX-during-stop for TX in in_reconfig Mark TXQs as having seen transmit while they were stopped if we bail out of drv_wake_tx_queue() due to reconfig, so that the queue wake after this will make them catch up. This is particularly necessary for when TXQs are used for management packets since those TXQs won't see a lot of traffic that'd make them catch up later. Cc: stable@vger.kernel.org Fixes: 4856bfd23098 ("mac80211: do not call driver wake_tx_queue op during reconfig") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.4573a221c0e1.I0d1d5daea3089be3fc0dccc92991b0f8c5677f0c@changeid Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index cd3731cbf6c68..c336267f4599c 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1219,8 +1219,11 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); - if (local->in_reconfig) + /* In reconfig don't transmit now, but mark for waking later */ + if (local->in_reconfig) { + set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags); return; + } if (!check_sdata_in_driver(sdata)) return; From 13dee10b30c058ee2c58c5da00339cc0d4201aa6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 15:32:40 +0200 Subject: [PATCH 137/361] mac80211: do drv_reconfig_complete() before restarting all When we reconfigure, the driver might do some things to complete the reconfiguration. It's strange and could be broken in some cases because we restart other works (e.g. remain-on-channel and TX) before this happens, yet only start queues later. Change this to do the reconfig complete when reconfiguration is actually complete, not when we've already started doing other things again. For iwlwifi, this should fix a race where the reconfig can race with TX, for ath10k and ath11k that also use this it won't make a difference because they just start queues there, and mac80211 also stopped the queues and will restart them later as before. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.cab99f22fe19.Iefe494687f15fd85f77c1b989d1149c8efdfdc36@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index fe2903b84cebf..0e4e1956bcea1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2651,6 +2651,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); } + /* + * If this is for hw restart things are still running. + * We may want to change that later, however. + */ + if (local->open_count && (!suspended || reconfig_due_to_wowlan)) + drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART); + if (local->in_reconfig) { local->in_reconfig = false; barrier(); @@ -2669,13 +2676,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) IEEE80211_QUEUE_STOP_REASON_SUSPEND, false); - /* - * If this is for hw restart things are still running. - * We may want to change that later, however. - */ - if (local->open_count && (!suspended || reconfig_due_to_wowlan)) - drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART); - if (!suspended) return 0; From 8734b41b3efe0fc6082c1937b0e88556c396dc96 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Tue, 23 Nov 2021 18:15:20 +1000 Subject: [PATCH 138/361] powerpc/module_64: Fix livepatching for RO modules Livepatching a loaded module involves applying relocations through apply_relocate_add(), which attempts to write to read-only memory when CONFIG_STRICT_MODULE_RWX=y. Work around this by performing these writes through the text poke area by using patch_instruction(). R_PPC_REL24 is the only relocation type generated by the kpatch-build userspace tool or klp-convert kernel tree that I observed applying a relocation to a post-init module. A more comprehensive solution is planned, but using patch_instruction() for R_PPC_REL24 on should serve as a sufficient fix. This does have a performance impact, I observed ~15% overhead in module_load() on POWER8 bare metal with checksum verification off. Fixes: c35717c71e98 ("powerpc: Set ARCH_HAS_STRICT_MODULE_RWX") Cc: stable@vger.kernel.org # v5.14+ Reported-by: Joe Lawrence Signed-off-by: Russell Currey Tested-by: Joe Lawrence [mpe: Check return codes from patch_instruction()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211214121248.777249-1-mpe@ellerman.id.au --- arch/powerpc/kernel/module_64.c | 42 ++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 6baa676e7cb60..5d77d3f5fbb56 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -422,11 +422,17 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, const char *name) { long reladdr; + func_desc_t desc; + int i; if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); - memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns)); + for (i = 0; i < sizeof(ppc64_stub_insns) / sizeof(u32); i++) { + if (patch_instruction(&entry->jump[i], + ppc_inst(ppc64_stub_insns[i]))) + return 0; + } /* Stub uses address relative to r2. */ reladdr = (unsigned long)entry - my_r2(sechdrs, me); @@ -437,10 +443,24 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, } pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); - entry->jump[0] |= PPC_HA(reladdr); - entry->jump[1] |= PPC_LO(reladdr); - entry->funcdata = func_desc(addr); - entry->magic = STUB_MAGIC; + if (patch_instruction(&entry->jump[0], + ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) + return 0; + + if (patch_instruction(&entry->jump[1], + ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) + return 0; + + // func_desc_t is 8 bytes if ABIv2, else 16 bytes + desc = func_desc(addr); + for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) { + if (patch_instruction(((u32 *)&entry->funcdata) + i, + ppc_inst(((u32 *)(&desc))[i]))) + return 0; + } + + if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC))) + return 0; return 1; } @@ -495,8 +515,11 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) me->name, *instruction, instruction); return 0; } + /* ld r2,R2_STACK_OFFSET(r1) */ - *instruction = PPC_INST_LD_TOC; + if (patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC))) + return 0; + return 1; } @@ -636,9 +659,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } /* Only replace bits 2 through 26 */ - *(uint32_t *)location - = (*(uint32_t *)location & ~0x03fffffc) + value = (*(uint32_t *)location & ~0x03fffffc) | (value & 0x03fffffc); + + if (patch_instruction((u32 *)location, ppc_inst(value))) + return -EFAULT; + break; case R_PPC64_REL64: From 83dbf898a2d45289be875deb580e93050ba67529 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Tue, 14 Dec 2021 12:49:32 +0100 Subject: [PATCH 139/361] PCI/MSI: Mask MSI-X vectors only on success MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Masking all unused MSI-X entries is done to ensure that a crash kernel starts from a clean slate, which correponds to the reset state of the device as defined in the PCI-E specificion 3.0 and later: Vector Control for MSI-X Table Entries -------------------------------------- "00: Mask bit: When this bit is set, the function is prohibited from sending a message using this MSI-X Table entry. ... This bit’s state after reset is 1 (entry is masked)." A Marvell NVME device fails to deliver MSI interrupts after trying to enable MSI-X interrupts due to that masking. It seems to take the MSI-X mask bits into account even when MSI-X is disabled. While not specification compliant, this can be cured by moving the masking into the success path, so that the MSI-X table entries stay in device reset state when the MSI-X setup fails. [ tglx: Move it into the success path, add comment and amend changelog ] Fixes: aa8092c1d1f1 ("PCI/MSI: Mask all unused MSI-X entries") Signed-off-by: Stefan Roese Signed-off-by: Thomas Gleixner Cc: linux-pci@vger.kernel.org Cc: Bjorn Helgaas Cc: Michal Simek Cc: Marek Vasut Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211210161025.3287927-1-sr@denx.de --- drivers/pci/msi.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 48e3f4e47b293..6748cf9d7d90b 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -722,9 +722,6 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, goto out_disable; } - /* Ensure that all table entries are masked. */ - msix_mask_all(base, tsize); - ret = msix_setup_entries(dev, base, entries, nvec, affd); if (ret) goto out_disable; @@ -751,6 +748,16 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, /* Set MSI-X enabled bits and unmask the function */ pci_intx_for_msi(dev, 0); dev->msix_enabled = 1; + + /* + * Ensure that all table entries are masked to prevent + * stale entries from firing in a crash kernel. + * + * Done late to deal with a broken Marvell NVME device + * which takes the MSI-X mask bits into account even + * when MSI-X is disabled, which prevents MSI delivery. + */ + msix_mask_all(base, tsize); pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); pcibios_free_irq(dev); From 94185adbfad56815c2c8401e16d81bdb74a79201 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Dec 2021 12:42:14 +0100 Subject: [PATCH 140/361] PCI/MSI: Clear PCI_MSIX_FLAGS_MASKALL on error PCI_MSIX_FLAGS_MASKALL is set in the MSI-X control register at MSI-X interrupt setup time. It's cleared on success, but the error handling path only clears the PCI_MSIX_FLAGS_ENABLE bit. That's incorrect as the reset state of the PCI_MSIX_FLAGS_MASKALL bit is zero. That can be observed via lspci: Capabilities: [b0] MSI-X: Enable- Count=67 Masked+ Clear the bit in the error path to restore the reset state. Fixes: 438553958ba1 ("PCI/MSI: Enable and mask MSI-X early") Reported-by: Stefan Roese Signed-off-by: Thomas Gleixner Tested-by: Stefan Roese Cc: linux-pci@vger.kernel.org Cc: Bjorn Helgaas Cc: Michal Simek Cc: Marek Vasut Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87tufevoqx.ffs@tglx --- drivers/pci/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 6748cf9d7d90b..d84cf30bb2790 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -784,7 +784,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, free_msi_irqs(dev); out_disable: - pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); + pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0); return ret; } From aeb7c75cb77478fdbf821628e9c95c4baa9adc63 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Sat, 11 Dec 2021 22:51:34 +0800 Subject: [PATCH 141/361] net: stmmac: fix tc flower deletion for VLAN priority Rx steering To replicate the issue:- 1) Add 1 flower filter for VLAN Priority based frame steering:- $ IFDEVNAME=eth0 $ tc qdisc add dev $IFDEVNAME ingress $ tc qdisc add dev $IFDEVNAME root mqprio num_tc 8 \ map 0 1 2 3 4 5 6 7 0 0 0 0 0 0 0 0 \ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0 $ tc filter add dev $IFDEVNAME parent ffff: protocol 802.1Q \ flower vlan_prio 0 hw_tc 0 2) Get the 'pref' id $ tc filter show dev $IFDEVNAME ingress 3) Delete a specific tc flower record (say pref 49151) $ tc filter del dev $IFDEVNAME parent ffff: pref 49151 From dmesg, we will observe kernel NULL pointer ooops [ 197.170464] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 197.171367] #PF: supervisor read access in kernel mode [ 197.171367] #PF: error_code(0x0000) - not-present page [ 197.171367] PGD 0 P4D 0 [ 197.171367] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 197.171367] RIP: 0010:tc_setup_cls+0x20b/0x4a0 [stmmac] [ 197.171367] Call Trace: [ 197.171367] [ 197.171367] ? __stmmac_disable_all_queues+0xa8/0xe0 [stmmac] [ 197.171367] stmmac_setup_tc_block_cb+0x70/0x110 [stmmac] [ 197.171367] tc_setup_cb_destroy+0xb3/0x180 [ 197.171367] fl_hw_destroy_filter+0x94/0xc0 [cls_flower] The above issue is due to previous incorrect implementation of tc_del_vlan_flow(), shown below, that uses flow_cls_offload_flow_rule() to get struct flow_rule *rule which is no longer valid for tc filter delete operation. struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct flow_dissector *dissector = rule->match.dissector; So, to ensure tc_del_vlan_flow() deletes the right VLAN cls record for earlier configured RX queue (configured by hw_tc) in tc_add_vlan_flow(), this patch introduces stmmac_rfs_entry as driver-side flow_cls_offload record for 'RX frame steering' tc flower, currently used for VLAN priority. The implementation has taken consideration for future extension to include other type RX frame steering such as EtherType based. v2: - Clean up overly extensive backtrace and rewrite git message to better explain the kernel NULL pointer issue. Fixes: 0e039f5cf86c ("net: stmmac: add RX frame steering based on VLAN priority in tc flower") Tested-by: Kurt Kanzenbach Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 17 ++++ .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 86 ++++++++++++++++--- 2 files changed, 90 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 5f129733aabd2..873b9e3e5da25 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -172,6 +172,19 @@ struct stmmac_flow_entry { int is_l4; }; +/* Rx Frame Steering */ +enum stmmac_rfs_type { + STMMAC_RFS_T_VLAN, + STMMAC_RFS_T_MAX, +}; + +struct stmmac_rfs_entry { + unsigned long cookie; + int in_use; + int type; + int tc; +}; + struct stmmac_priv { /* Frequently used values are kept adjacent for cache effect */ u32 tx_coal_frames[MTL_MAX_TX_QUEUES]; @@ -289,6 +302,10 @@ struct stmmac_priv { struct stmmac_tc_entry *tc_entries; unsigned int flow_entries_max; struct stmmac_flow_entry *flow_entries; + unsigned int rfs_entries_max[STMMAC_RFS_T_MAX]; + unsigned int rfs_entries_cnt[STMMAC_RFS_T_MAX]; + unsigned int rfs_entries_total; + struct stmmac_rfs_entry *rfs_entries; /* Pulse Per Second output */ struct stmmac_pps_cfg pps[STMMAC_PPS_MAX]; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1c4ea0b1b845b..d0a2b289f4603 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -232,11 +232,33 @@ static int tc_setup_cls_u32(struct stmmac_priv *priv, } } +static int tc_rfs_init(struct stmmac_priv *priv) +{ + int i; + + priv->rfs_entries_max[STMMAC_RFS_T_VLAN] = 8; + + for (i = 0; i < STMMAC_RFS_T_MAX; i++) + priv->rfs_entries_total += priv->rfs_entries_max[i]; + + priv->rfs_entries = devm_kcalloc(priv->device, + priv->rfs_entries_total, + sizeof(*priv->rfs_entries), + GFP_KERNEL); + if (!priv->rfs_entries) + return -ENOMEM; + + dev_info(priv->device, "Enabled RFS Flow TC (entries=%d)\n", + priv->rfs_entries_total); + + return 0; +} + static int tc_init(struct stmmac_priv *priv) { struct dma_features *dma_cap = &priv->dma_cap; unsigned int count; - int i; + int ret, i; if (dma_cap->l3l4fnum) { priv->flow_entries_max = dma_cap->l3l4fnum; @@ -250,10 +272,14 @@ static int tc_init(struct stmmac_priv *priv) for (i = 0; i < priv->flow_entries_max; i++) priv->flow_entries[i].idx = i; - dev_info(priv->device, "Enabled Flow TC (entries=%d)\n", + dev_info(priv->device, "Enabled L3L4 Flow TC (entries=%d)\n", priv->flow_entries_max); } + ret = tc_rfs_init(priv); + if (ret) + return -ENOMEM; + if (!priv->plat->fpe_cfg) { priv->plat->fpe_cfg = devm_kzalloc(priv->device, sizeof(*priv->plat->fpe_cfg), @@ -607,16 +633,45 @@ static int tc_del_flow(struct stmmac_priv *priv, return ret; } +static struct stmmac_rfs_entry *tc_find_rfs(struct stmmac_priv *priv, + struct flow_cls_offload *cls, + bool get_free) +{ + int i; + + for (i = 0; i < priv->rfs_entries_total; i++) { + struct stmmac_rfs_entry *entry = &priv->rfs_entries[i]; + + if (entry->cookie == cls->cookie) + return entry; + if (get_free && entry->in_use == false) + return entry; + } + + return NULL; +} + #define VLAN_PRIO_FULL_MASK (0x07) static int tc_add_vlan_flow(struct stmmac_priv *priv, struct flow_cls_offload *cls) { + struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false); struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct flow_dissector *dissector = rule->match.dissector; int tc = tc_classid_to_hwtc(priv->dev, cls->classid); struct flow_match_vlan match; + if (!entry) { + entry = tc_find_rfs(priv, cls, true); + if (!entry) + return -ENOENT; + } + + if (priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN] >= + priv->rfs_entries_max[STMMAC_RFS_T_VLAN]) + return -ENOENT; + /* Nothing to do here */ if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) return -EINVAL; @@ -638,6 +693,12 @@ static int tc_add_vlan_flow(struct stmmac_priv *priv, prio = BIT(match.key->vlan_priority); stmmac_rx_queue_prio(priv, priv->hw, prio, tc); + + entry->in_use = true; + entry->cookie = cls->cookie; + entry->tc = tc; + entry->type = STMMAC_RFS_T_VLAN; + priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN]++; } return 0; @@ -646,20 +707,19 @@ static int tc_add_vlan_flow(struct stmmac_priv *priv, static int tc_del_vlan_flow(struct stmmac_priv *priv, struct flow_cls_offload *cls) { - struct flow_rule *rule = flow_cls_offload_flow_rule(cls); - struct flow_dissector *dissector = rule->match.dissector; - int tc = tc_classid_to_hwtc(priv->dev, cls->classid); + struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false); - /* Nothing to do here */ - if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) - return -EINVAL; + if (!entry || !entry->in_use || entry->type != STMMAC_RFS_T_VLAN) + return -ENOENT; - if (tc < 0) { - netdev_err(priv->dev, "Invalid traffic class\n"); - return -EINVAL; - } + stmmac_rx_queue_prio(priv, priv->hw, 0, entry->tc); + + entry->in_use = false; + entry->cookie = 0; + entry->tc = 0; + entry->type = 0; - stmmac_rx_queue_prio(priv, priv->hw, 0, tc); + priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN]--; return 0; } From 4fc7261dbab139d3c64c3b618262504e16cfe7ee Mon Sep 17 00:00:00 2001 From: Prathamesh Shete Date: Tue, 14 Dec 2021 17:06:53 +0530 Subject: [PATCH 142/361] mmc: sdhci-tegra: Fix switch to HS400ES mode When CMD13 is sent after switching to HS400ES mode, the bus is operating at either MMC_HIGH_26_MAX_DTR or MMC_HIGH_52_MAX_DTR. To meet Tegra SDHCI requirement at HS400ES mode, force SDHCI interface clock to MMC_HS200_MAX_DTR (200 MHz) so that host controller CAR clock and the interface clock are rate matched. Signed-off-by: Prathamesh Shete Acked-by: Adrian Hunter Fixes: dfc9700cef77 ("mmc: tegra: Implement HS400 enhanced strobe") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211214113653.4631-1-pshete@nvidia.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-tegra.c | 43 ++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index a5001875876b9..9762ffab2e236 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -356,23 +356,6 @@ static void tegra_sdhci_set_tap(struct sdhci_host *host, unsigned int tap) } } -static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, - struct mmc_ios *ios) -{ - struct sdhci_host *host = mmc_priv(mmc); - u32 val; - - val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); - - if (ios->enhanced_strobe) - val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; - else - val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; - - sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); - -} - static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -793,6 +776,32 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) } } +static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + struct sdhci_host *host = mmc_priv(mmc); + u32 val; + + val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); + + if (ios->enhanced_strobe) { + val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; + /* + * When CMD13 is sent from mmc_select_hs400es() after + * switching to HS400ES mode, the bus is operating at + * either MMC_HIGH_26_MAX_DTR or MMC_HIGH_52_MAX_DTR. + * To meet Tegra SDHCI requirement at HS400ES mode, force SDHCI + * interface clock to MMC_HS200_MAX_DTR (200 MHz) so that host + * controller CAR clock and the interface clock are rate matched. + */ + tegra_sdhci_set_clock(host, MMC_HS200_MAX_DTR); + } else { + val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; + } + + sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); +} + static unsigned int tegra_sdhci_get_max_clock(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); From 166b6a46b78bf8b9559a6620c3032f9fe492e082 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Mon, 13 Dec 2021 15:46:04 +0100 Subject: [PATCH 143/361] flow_offload: return EOPNOTSUPP for the unsupported mpls action type We need to return EOPNOTSUPP for the unsupported mpls action type when setup the flow action. In the original implement, we will return 0 for the unsupported mpls action type, actually we do not setup it and the following actions to the flow action entry. Fixes: 9838b20a7fb2 ("net: sched: take rtnl lock in tc_setup_flow_action()") Signed-off-by: Baowen Zheng Signed-off-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2ef8f5a6205a9..e54f0a42270c1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3687,6 +3687,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mpls_mangle.ttl = tcf_mpls_ttl(act); break; default: + err = -EOPNOTSUPP; goto err_out_locked; } } else if (is_tcf_skbedit_ptype(act)) { From 5f9562ebe710c307adc5f666bf1a2162ee7977c0 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Tue, 14 Dec 2021 18:46:59 +0800 Subject: [PATCH 144/361] rds: memory leak in __rds_conn_create() __rds_conn_create() did not release conn->c_path when loop_trans != 0 and trans->t_prefer_loopback != 0 and is_outgoing == 0. Fixes: aced3ce57cd3 ("RDS tcp loopback connection can hang") Signed-off-by: Hangyu Hua Reviewed-by: Sharath Srinivasan Signed-off-by: David S. Miller --- net/rds/connection.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/connection.c b/net/rds/connection.c index a3bc4b54d4910..b4cc699c5fad3 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -253,6 +253,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, * should end up here, but if it * does, reset/destroy the connection. */ + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = ERR_PTR(-EOPNOTSUPP); goto out; From b442f2ea84624873d538e4c5986d7c0d40883a47 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 14 Dec 2021 12:21:36 +0200 Subject: [PATCH 145/361] mlxsw: spectrum_router: Consolidate MAC profiles when possible Currently, when setting a router interface (RIF) MAC address while the MAC profile is not shared with other RIFs, the profile is edited so that the new MAC address is assigned to it. This does not take into account a situation in which the new MAC address already matches an existing MAC profile. In that situation, two MAC profiles will be occupied even though they hold MAC addresses from the same profile. In order to prevent that, add a check to ensure that editing a MAC profile takes place only when the new MAC address does not match an existing profile. Fixes: 605d25cd782a6 ("mlxsw: spectrum_router: Add RIF MAC profiles support") Reported-by: Maksym Yaremchuk Tested-by: Maksym Yaremchuk Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 217e3b351dfe6..c34833ff1dded 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8494,7 +8494,8 @@ mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp, u8 mac_profile; int err; - if (!mlxsw_sp_rif_mac_profile_is_shared(rif)) + if (!mlxsw_sp_rif_mac_profile_is_shared(rif) && + !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac)) return mlxsw_sp_rif_mac_profile_edit(rif, new_mac); err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac, From 20617717cd219d3c1f798cd13dbce1bcd86a6ece Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 14 Dec 2021 12:21:37 +0200 Subject: [PATCH 146/361] selftests: mlxsw: Add a test case for MAC profiles consolidation Add a test case to cover the bug fixed by the previous patch. Edit the MAC address of one netdev so that it matches the MAC address of the second netdev. Verify that the two MAC profiles were consolidated by testing that the MAC profiles occupancy decreased by one. Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/rif_mac_profiles_occ.sh | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh index b513f64d9092d..026a126f584d7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh @@ -72,6 +72,35 @@ rif_mac_profile_replacement_test() ip link set $h1.10 address $h1_10_mac } +rif_mac_profile_consolidation_test() +{ + local count=$1; shift + local h1_20_mac + + RET=0 + + if [[ $count -eq 1 ]]; then + return + fi + + h1_20_mac=$(mac_get $h1.20) + + # Set the MAC of $h1.20 to that of $h1.10 and confirm that they are + # using the same MAC profile. + ip link set $h1.20 address 00:11:11:11:11:11 + check_err $? + + occ=$(devlink -j resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]') + + [[ $occ -eq $((count - 1)) ]] + check_err $? "MAC profile occupancy did not decrease" + + log_test "RIF MAC profile consolidation" + + ip link set $h1.20 address $h1_20_mac +} + rif_mac_profile_shared_replacement_test() { local count=$1; shift @@ -104,6 +133,7 @@ rif_mac_profile_edit_test() create_max_rif_mac_profiles $count rif_mac_profile_replacement_test + rif_mac_profile_consolidation_test $count rif_mac_profile_shared_replacement_test $count } From 8deb34a90f06374fd26f722c2a79e15160f66be7 Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Tue, 14 Dec 2021 18:50:33 +0800 Subject: [PATCH 147/361] ASoC: rt5682: fix the wrong jack type detected Some powers were changed during the jack insert detection and clk's enable/disable in CCF. If in parallel, the influence has a chance to detect the wrong jack type, so add a lock. Signed-off-by: Derek Fang Link: https://lore.kernel.org/r/20211214105033.471-1-derek.fang@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 5224123d0d3bb..b34a8542077dc 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -929,6 +929,8 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) unsigned int val, count; if (jack_insert) { + snd_soc_dapm_mutex_lock(dapm); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_VREF2 | RT5682_PWR_MB, RT5682_PWR_VREF2 | RT5682_PWR_MB); @@ -979,6 +981,8 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) snd_soc_component_update_bits(component, RT5682_MICBIAS_2, RT5682_PWR_CLK25M_MASK | RT5682_PWR_CLK1M_MASK, RT5682_PWR_CLK25M_PU | RT5682_PWR_CLK1M_PU); + + snd_soc_dapm_mutex_unlock(dapm); } else { rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, From edaa26334c117a584add6053f48d63a988d25a6e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 13 Dec 2021 14:14:43 -1000 Subject: [PATCH 148/361] iocost: Fix divide-by-zero on donation from low hweight cgroup The donation calculation logic assumes that the donor has non-zero after-donation hweight, so the lowest active hweight a donating cgroup can have is 2 so that it can donate 1 while keeping the other 1 for itself. Earlier, we only donated from cgroups with sizable surpluses so this condition was always true. However, with the precise donation algorithm implemented, f1de2439ec43 ("blk-iocost: revamp donation amount determination") made the donation amount calculation exact enabling even low hweight cgroups to donate. This means that in rare occasions, a cgroup with active hweight of 1 can enter donation calculation triggering the following warning and then a divide-by-zero oops. WARNING: CPU: 4 PID: 0 at block/blk-iocost.c:1928 transfer_surpluses.cold+0x0/0x53 [884/94867] ... RIP: 0010:transfer_surpluses.cold+0x0/0x53 Code: 92 ff 48 c7 c7 28 d1 ab b5 65 48 8b 34 25 00 ae 01 00 48 81 c6 90 06 00 00 e8 8b 3f fe ff 48 c7 c0 ea ff ff ff e9 95 ff 92 ff <0f> 0b 48 c7 c7 30 da ab b5 e8 71 3f fe ff 4c 89 e8 4d 85 ed 74 0 4 ... Call Trace: ioc_timer_fn+0x1043/0x1390 call_timer_fn+0xa1/0x2c0 __run_timers.part.0+0x1ec/0x2e0 run_timer_softirq+0x35/0x70 ... iocg: invalid donation weights in /a/b: active=1 donating=1 after=0 Fix it by excluding cgroups w/ active hweight < 2 from donating. Excluding these extreme low hweight donations shouldn't affect work conservation in any meaningful way. Signed-off-by: Tejun Heo Fixes: f1de2439ec43 ("blk-iocost: revamp donation amount determination") Cc: stable@vger.kernel.org # v5.10+ Link: https://lore.kernel.org/r/Ybfh86iSvpWKxhVM@slm.duckdns.org Signed-off-by: Jens Axboe --- block/blk-iocost.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a5b37cc65b171..769b643942989 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2311,7 +2311,14 @@ static void ioc_timer_fn(struct timer_list *timer) hwm = current_hweight_max(iocg); new_hwi = hweight_after_donation(iocg, old_hwi, hwm, usage, &now); - if (new_hwi < hwm) { + /* + * Donation calculation assumes hweight_after_donation + * to be positive, a condition that a donor w/ hwa < 2 + * can't meet. Don't bother with donation if hwa is + * below 2. It's not gonna make a meaningful difference + * anyway. + */ + if (new_hwi < hwm && hwa >= 2) { iocg->hweight_donating = hwa; iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); From bd0687c18e635b63233dc87f38058cd728802ab4 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 14 Dec 2021 11:26:07 +0100 Subject: [PATCH 149/361] xsk: Do not sleep in poll() when need_wakeup set Do not sleep in poll() when the need_wakeup flag is set. When this flag is set, the application needs to explicitly wake up the driver with a syscall (poll, recvmsg, sendmsg, etc.) to guarantee that Rx and/or Tx processing will be processed promptly. But the current code in poll(), sleeps first then wakes up the driver. This means that no driver processing will occur (baring any interrupts) until the timeout has expired. Fix this by checking the need_wakeup flag first and if set, wake the driver and return to the application. Only if need_wakeup is not set should the process sleep if there is a timeout set in the poll() call. Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") Reported-by: Keith Wiles Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20211214102607.7677-1-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index f16074eb53c72..7a466ea962c57 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -677,8 +677,6 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, struct xdp_sock *xs = xdp_sk(sk); struct xsk_buff_pool *pool; - sock_poll_wait(file, sock, wait); - if (unlikely(!xsk_is_bound(xs))) return mask; @@ -690,6 +688,8 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, else /* Poll needs to drive Tx also in copy mode */ __xsk_sendmsg(sk); + } else { + sock_poll_wait(file, sock, wait); } if (xs->rx && !xskq_prod_is_empty(xs->rx)) From f7abc4c8df8c7930d0b9c56d9abee9a1fca635e9 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 14 Dec 2021 07:18:00 +0530 Subject: [PATCH 150/361] selftests/bpf: Fix OOB write in test_verifier The commit referenced below added fixup_map_timer support (to create a BPF map containing timers), but failed to increase the size of the map_fds array, leading to out of bounds write. Fix this by changing MAX_NR_MAPS to 22. Fixes: e60e6962c503 ("selftests/bpf: Add tests for restricted helpers") Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211214014800.78762-1-memxor@gmail.com --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 465ef3f112c0c..d3bf83d5c6cff 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -54,7 +54,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 21 +#define MAX_NR_MAPS 22 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 From f35838a6930296fc1988764cfa54cb3f705c0665 Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Thu, 9 Dec 2021 14:56:31 +0800 Subject: [PATCH 151/361] btrfs: fix memory leak in __add_inode_ref() Line 1169 (#3) allocates a memory chunk for victim_name by kmalloc(), but when the function returns in line 1184 (#4) victim_name allocated by line 1169 (#3) is not freed, which will lead to a memory leak. There is a similar snippet of code in this function as allocating a memory chunk for victim_name in line 1104 (#1) as well as releasing the memory in line 1116 (#2). We should kfree() victim_name when the return value of backref_in_log() is less than zero and before the function returns in line 1184 (#4). 1057 static inline int __add_inode_ref(struct btrfs_trans_handle *trans, 1058 struct btrfs_root *root, 1059 struct btrfs_path *path, 1060 struct btrfs_root *log_root, 1061 struct btrfs_inode *dir, 1062 struct btrfs_inode *inode, 1063 u64 inode_objectid, u64 parent_objectid, 1064 u64 ref_index, char *name, int namelen, 1065 int *search_done) 1066 { 1104 victim_name = kmalloc(victim_name_len, GFP_NOFS); // #1: kmalloc (victim_name-1) 1105 if (!victim_name) 1106 return -ENOMEM; 1112 ret = backref_in_log(log_root, &search_key, 1113 parent_objectid, victim_name, 1114 victim_name_len); 1115 if (ret < 0) { 1116 kfree(victim_name); // #2: kfree (victim_name-1) 1117 return ret; 1118 } else if (!ret) { 1169 victim_name = kmalloc(victim_name_len, GFP_NOFS); // #3: kmalloc (victim_name-2) 1170 if (!victim_name) 1171 return -ENOMEM; 1180 ret = backref_in_log(log_root, &search_key, 1181 parent_objectid, victim_name, 1182 victim_name_len); 1183 if (ret < 0) { 1184 return ret; // #4: missing kfree (victim_name-2) 1185 } else if (!ret) { 1241 return 0; 1242 } Fixes: d3316c8233bb ("btrfs: Properly handle backref_in_log retval") CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Jianglei Nie Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3e6f14e13918b..8778401665c31 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1181,6 +1181,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, parent_objectid, victim_name, victim_name_len); if (ret < 0) { + kfree(victim_name); return ret; } else if (!ret) { ret = -ENOENT; From 33fab972497ae66822c0b6846d4f9382938575b6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 10 Dec 2021 19:02:18 +0000 Subject: [PATCH 152/361] btrfs: fix double free of anon_dev after failure to create subvolume When creating a subvolume, at create_subvol(), we allocate an anonymous device and later call btrfs_get_new_fs_root(), which in turn just calls btrfs_get_root_ref(). There we call btrfs_init_fs_root() which assigns the anonymous device to the root, but if after that call there's an error, when we jump to 'fail' label, we call btrfs_put_root(), which frees the anonymous device and then returns an error that is propagated back to create_subvol(). Than create_subvol() frees the anonymous device again. When this happens, if the anonymous device was not reallocated after the first time it was freed with btrfs_put_root(), we get a kernel message like the following: (...) [13950.282466] BTRFS: error (device dm-0) in create_subvol:663: errno=-5 IO failure [13950.283027] ida_free called for id=65 which is not allocated. [13950.285974] BTRFS info (device dm-0): forced readonly (...) If the anonymous device gets reallocated by another btrfs filesystem or any other kernel subsystem, then bad things can happen. So fix this by setting the root's anonymous device to 0 at btrfs_get_root_ref(), before we call btrfs_put_root(), if an error happened. Fixes: 2dfb1e43f57dd3 ("btrfs: preallocate anon block device at first phase of snapshot creation") CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 847aabb306760..28449ca66dbd2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1732,6 +1732,14 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, } return root; fail: + /* + * If our caller provided us an anonymous device, then it's his + * responsability to free it in case we fail. So we have to set our + * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() + * and once again by our caller. + */ + if (anon_dev) + root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); } From 1b2e5e5c7feabb4f3041f637b96494944da6aeff Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 14 Dec 2021 11:29:01 +0000 Subject: [PATCH 153/361] btrfs: fix missing last dir item offset update when logging directory When logging a directory, once we finish processing a leaf that is full of dir items, if we find the next leaf was not modified in the current transaction, we grab the first key of that next leaf and log it as to mark the end of a key range boundary. However we did not update the value of ctx->last_dir_item_offset, which tracks the offset of the last logged key. This can result in subsequent logging of the same directory in the current transaction to not realize that key was already logged, and then add it to the middle of a batch that starts with a lower key, resulting later in a leaf with one key that is duplicated and at non-consecutive slots. When that happens we get an error later when writing out the leaf, reporting that there is a pair of keys in wrong order. The report is something like the following: Dec 13 21:44:50 kernel: BTRFS critical (device dm-0): corrupt leaf: root=18446744073709551610 block=118444032 slot=21, bad key order, prev (704687 84 4146773349) current (704687 84 1063561078) Dec 13 21:44:50 kernel: BTRFS info (device dm-0): leaf 118444032 gen 91449 total ptrs 39 free space 546 owner 18446744073709551610 Dec 13 21:44:50 kernel: item 0 key (704687 1 0) itemoff 3835 itemsize 160 Dec 13 21:44:50 kernel: inode generation 35532 size 1026 mode 40755 Dec 13 21:44:50 kernel: item 1 key (704687 12 704685) itemoff 3822 itemsize 13 Dec 13 21:44:50 kernel: item 2 key (704687 24 3817753667) itemoff 3736 itemsize 86 Dec 13 21:44:50 kernel: item 3 key (704687 60 0) itemoff 3728 itemsize 8 Dec 13 21:44:50 kernel: item 4 key (704687 72 0) itemoff 3720 itemsize 8 Dec 13 21:44:50 kernel: item 5 key (704687 84 140445108) itemoff 3666 itemsize 54 Dec 13 21:44:50 kernel: dir oid 704793 type 1 Dec 13 21:44:50 kernel: item 6 key (704687 84 298800632) itemoff 3599 itemsize 67 Dec 13 21:44:50 kernel: dir oid 707849 type 2 Dec 13 21:44:50 kernel: item 7 key (704687 84 476147658) itemoff 3532 itemsize 67 Dec 13 21:44:50 kernel: dir oid 707901 type 2 Dec 13 21:44:50 kernel: item 8 key (704687 84 633818382) itemoff 3471 itemsize 61 Dec 13 21:44:50 kernel: dir oid 704694 type 2 Dec 13 21:44:50 kernel: item 9 key (704687 84 654256665) itemoff 3403 itemsize 68 Dec 13 21:44:50 kernel: dir oid 707841 type 1 Dec 13 21:44:50 kernel: item 10 key (704687 84 995843418) itemoff 3331 itemsize 72 Dec 13 21:44:50 kernel: dir oid 2167736 type 1 Dec 13 21:44:50 kernel: item 11 key (704687 84 1063561078) itemoff 3278 itemsize 53 Dec 13 21:44:50 kernel: dir oid 704799 type 2 Dec 13 21:44:50 kernel: item 12 key (704687 84 1101156010) itemoff 3225 itemsize 53 Dec 13 21:44:50 kernel: dir oid 704696 type 1 Dec 13 21:44:50 kernel: item 13 key (704687 84 2521936574) itemoff 3173 itemsize 52 Dec 13 21:44:50 kernel: dir oid 704704 type 2 Dec 13 21:44:50 kernel: item 14 key (704687 84 2618368432) itemoff 3112 itemsize 61 Dec 13 21:44:50 kernel: dir oid 704738 type 1 Dec 13 21:44:50 kernel: item 15 key (704687 84 2676316190) itemoff 3046 itemsize 66 Dec 13 21:44:50 kernel: dir oid 2167729 type 1 Dec 13 21:44:50 kernel: item 16 key (704687 84 3319104192) itemoff 2986 itemsize 60 Dec 13 21:44:50 kernel: dir oid 704745 type 2 Dec 13 21:44:50 kernel: item 17 key (704687 84 3908046265) itemoff 2929 itemsize 57 Dec 13 21:44:50 kernel: dir oid 2167734 type 1 Dec 13 21:44:50 kernel: item 18 key (704687 84 3945713089) itemoff 2857 itemsize 72 Dec 13 21:44:50 kernel: dir oid 2167730 type 1 Dec 13 21:44:50 kernel: item 19 key (704687 84 4077169308) itemoff 2795 itemsize 62 Dec 13 21:44:50 kernel: dir oid 704688 type 1 Dec 13 21:44:50 kernel: item 20 key (704687 84 4146773349) itemoff 2727 itemsize 68 Dec 13 21:44:50 kernel: dir oid 707892 type 1 Dec 13 21:44:50 kernel: item 21 key (704687 84 1063561078) itemoff 2674 itemsize 53 Dec 13 21:44:50 kernel: dir oid 704799 type 2 Dec 13 21:44:50 kernel: item 22 key (704687 96 2) itemoff 2612 itemsize 62 Dec 13 21:44:50 kernel: item 23 key (704687 96 6) itemoff 2551 itemsize 61 Dec 13 21:44:50 kernel: item 24 key (704687 96 7) itemoff 2498 itemsize 53 Dec 13 21:44:50 kernel: item 25 key (704687 96 12) itemoff 2446 itemsize 52 Dec 13 21:44:50 kernel: item 26 key (704687 96 14) itemoff 2385 itemsize 61 Dec 13 21:44:50 kernel: item 27 key (704687 96 18) itemoff 2325 itemsize 60 Dec 13 21:44:50 kernel: item 28 key (704687 96 24) itemoff 2271 itemsize 54 Dec 13 21:44:50 kernel: item 29 key (704687 96 28) itemoff 2218 itemsize 53 Dec 13 21:44:50 kernel: item 30 key (704687 96 62) itemoff 2150 itemsize 68 Dec 13 21:44:50 kernel: item 31 key (704687 96 66) itemoff 2083 itemsize 67 Dec 13 21:44:50 kernel: item 32 key (704687 96 75) itemoff 2015 itemsize 68 Dec 13 21:44:50 kernel: item 33 key (704687 96 79) itemoff 1948 itemsize 67 Dec 13 21:44:50 kernel: item 34 key (704687 96 82) itemoff 1882 itemsize 66 Dec 13 21:44:50 kernel: item 35 key (704687 96 83) itemoff 1810 itemsize 72 Dec 13 21:44:50 kernel: item 36 key (704687 96 85) itemoff 1753 itemsize 57 Dec 13 21:44:50 kernel: item 37 key (704687 96 87) itemoff 1681 itemsize 72 Dec 13 21:44:50 kernel: item 38 key (704694 1 0) itemoff 1521 itemsize 160 Dec 13 21:44:50 kernel: inode generation 35534 size 30 mode 40755 Dec 13 21:44:50 kernel: BTRFS error (device dm-0): block=118444032 write time tree block corruption detected So fix that by adding the missing update of ctx->last_dir_item_offset with the offset of the boundary key. Reported-by: Chris Murphy Link: https://lore.kernel.org/linux-btrfs/CAJCQCtT+RSzpUjbMq+UfzNUMe1X5+1G+DnAGbHC=OZ=iRS24jg@mail.gmail.com/ Fixes: dc2872247ec0ca ("btrfs: keep track of the last logged keys when logging a directory") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8778401665c31..6993dcdba6f1a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3978,6 +3978,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, goto done; } if (btrfs_header_generation(path->nodes[0]) != trans->transid) { + ctx->last_dir_item_offset = min_key.offset; ret = overwrite_item(trans, log, dst_path, path->nodes[0], path->slots[0], &min_key); From 80d5be1a057e05f01d66e986cfd34d71845e5190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Mon, 6 Dec 2021 22:45:43 +0000 Subject: [PATCH 154/361] ASoC: tas2770: Fix setting of high sample rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although the codec advertises support for 176.4 and 192 ksps, without this fix setting those sample rates fails with EINVAL at hw_params time. Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20211206224529.74656-1-povik@protonmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2770.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 172e79cbe0daf..6549e7fef3e32 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -291,11 +291,11 @@ static int tas2770_set_samplerate(struct tas2770_priv *tas2770, int samplerate) ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_44_1KHZ | TAS2770_TDM_CFG_REG0_31_88_2_96KHZ; break; - case 19200: + case 192000: ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_48KHZ | TAS2770_TDM_CFG_REG0_31_176_4_192KHZ; break; - case 17640: + case 176400: ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_44_1KHZ | TAS2770_TDM_CFG_REG0_31_176_4_192KHZ; break; From 1bcd326631dc4faa3322d60b4fc45e8b3747993e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 6 Dec 2021 22:08:03 +0100 Subject: [PATCH 155/361] ASoC: meson: aiu: fifo: Add missing dma_coerce_mask_and_coherent() The FIFO registers which take an DMA-able address are only 32-bit wide on AIU. Add dma_coerce_mask_and_coherent() to make the DMA core aware of this limitation. Fixes: 6ae9ca9ce986bf ("ASoC: meson: aiu: add i2s and spdif support") Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20211206210804.2512999-2-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- sound/soc/meson/aiu-fifo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/meson/aiu-fifo.c b/sound/soc/meson/aiu-fifo.c index 4ad23267cace5..d67ff4cdabd5a 100644 --- a/sound/soc/meson/aiu-fifo.c +++ b/sound/soc/meson/aiu-fifo.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -179,6 +180,11 @@ int aiu_fifo_pcm_new(struct snd_soc_pcm_runtime *rtd, struct snd_card *card = rtd->card->snd_card; struct aiu_fifo *fifo = dai->playback_dma_data; size_t size = fifo->pcm->buffer_bytes_max; + int ret; + + ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; snd_pcm_set_managed_buffer_all(rtd->pcm, SNDRV_DMA_TYPE_DEV, card->dev, size, size); From ee907afb0c39a41ee74b862882cfe12820c74b98 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 6 Dec 2021 22:08:04 +0100 Subject: [PATCH 156/361] ASoC: meson: aiu: Move AIU_I2S_MISC hold setting to aiu-fifo-i2s The out-of-tree vendor driver uses the following approach to set the AIU_I2S_MISC register: 1) write AIU_MEM_I2S_START_PTR and AIU_MEM_I2S_RD_PTR 2) configure AIU_I2S_MUTE_SWAP[15:0] 3) write AIU_MEM_I2S_END_PTR 4) set AIU_I2S_MISC[2] to 1 (documented as: "put I2S interface in hold mode") 5) set AIU_I2S_MISC[4] to 1 (depending on the driver revision it always stays at 1 while for older drivers this bit is unset in step 4) 6) set AIU_I2S_MISC[2] to 0 7) write AIU_MEM_I2S_MASKS 8) toggle AIU_MEM_I2S_CONTROL[0] 9) toggle AIU_MEM_I2S_BUF_CNTL[0] Move setting the AIU_I2S_MISC[2] bit to aiu_fifo_i2s_hw_params() so it resembles the flow in the vendor kernel more closely. While here also configure AIU_I2S_MISC[4] (documented as: "force each audio data to left or right according to the bit attached with the audio data") similar to how the vendor driver does this. This fixes the infamous and long-standing "machine gun noise" issue (a buffer underrun issue). Fixes: 6ae9ca9ce986bf ("ASoC: meson: aiu: add i2s and spdif support") Reported-by: Christian Hewitt Reported-by: Geraldo Nascimento Tested-by: Christian Hewitt Tested-by: Geraldo Nascimento Acked-by: Jerome Brunet Cc: stable@vger.kernel.org Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20211206210804.2512999-3-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- sound/soc/meson/aiu-encoder-i2s.c | 33 ------------------------------- sound/soc/meson/aiu-fifo-i2s.c | 19 ++++++++++++++++++ 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/sound/soc/meson/aiu-encoder-i2s.c b/sound/soc/meson/aiu-encoder-i2s.c index 9322245521463..67729de41a73e 100644 --- a/sound/soc/meson/aiu-encoder-i2s.c +++ b/sound/soc/meson/aiu-encoder-i2s.c @@ -18,7 +18,6 @@ #define AIU_RST_SOFT_I2S_FAST BIT(0) #define AIU_I2S_DAC_CFG_MSB_FIRST BIT(2) -#define AIU_I2S_MISC_HOLD_EN BIT(2) #define AIU_CLK_CTRL_I2S_DIV_EN BIT(0) #define AIU_CLK_CTRL_I2S_DIV GENMASK(3, 2) #define AIU_CLK_CTRL_AOCLK_INVERT BIT(6) @@ -36,37 +35,6 @@ static void aiu_encoder_i2s_divider_enable(struct snd_soc_component *component, enable ? AIU_CLK_CTRL_I2S_DIV_EN : 0); } -static void aiu_encoder_i2s_hold(struct snd_soc_component *component, - bool enable) -{ - snd_soc_component_update_bits(component, AIU_I2S_MISC, - AIU_I2S_MISC_HOLD_EN, - enable ? AIU_I2S_MISC_HOLD_EN : 0); -} - -static int aiu_encoder_i2s_trigger(struct snd_pcm_substream *substream, int cmd, - struct snd_soc_dai *dai) -{ - struct snd_soc_component *component = dai->component; - - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - aiu_encoder_i2s_hold(component, false); - return 0; - - case SNDRV_PCM_TRIGGER_STOP: - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - aiu_encoder_i2s_hold(component, true); - return 0; - - default: - return -EINVAL; - } -} - static int aiu_encoder_i2s_setup_desc(struct snd_soc_component *component, struct snd_pcm_hw_params *params) { @@ -353,7 +321,6 @@ static void aiu_encoder_i2s_shutdown(struct snd_pcm_substream *substream, } const struct snd_soc_dai_ops aiu_encoder_i2s_dai_ops = { - .trigger = aiu_encoder_i2s_trigger, .hw_params = aiu_encoder_i2s_hw_params, .hw_free = aiu_encoder_i2s_hw_free, .set_fmt = aiu_encoder_i2s_set_fmt, diff --git a/sound/soc/meson/aiu-fifo-i2s.c b/sound/soc/meson/aiu-fifo-i2s.c index 2388a2d0b3a6c..57e6e7160d2f2 100644 --- a/sound/soc/meson/aiu-fifo-i2s.c +++ b/sound/soc/meson/aiu-fifo-i2s.c @@ -20,6 +20,8 @@ #define AIU_MEM_I2S_CONTROL_MODE_16BIT BIT(6) #define AIU_MEM_I2S_BUF_CNTL_INIT BIT(0) #define AIU_RST_SOFT_I2S_FAST BIT(0) +#define AIU_I2S_MISC_HOLD_EN BIT(2) +#define AIU_I2S_MISC_FORCE_LEFT_RIGHT BIT(4) #define AIU_FIFO_I2S_BLOCK 256 @@ -90,6 +92,10 @@ static int aiu_fifo_i2s_hw_params(struct snd_pcm_substream *substream, unsigned int val; int ret; + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_HOLD_EN, + AIU_I2S_MISC_HOLD_EN); + ret = aiu_fifo_hw_params(substream, params, dai); if (ret) return ret; @@ -117,6 +123,19 @@ static int aiu_fifo_i2s_hw_params(struct snd_pcm_substream *substream, snd_soc_component_update_bits(component, AIU_MEM_I2S_MASKS, AIU_MEM_I2S_MASKS_IRQ_BLOCK, val); + /* + * Most (all?) supported SoCs have this bit set by default. The vendor + * driver however sets it manually (depending on the version either + * while un-setting AIU_I2S_MISC_HOLD_EN or right before that). Follow + * the same approach for consistency with the vendor driver. + */ + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_FORCE_LEFT_RIGHT, + AIU_I2S_MISC_FORCE_LEFT_RIGHT); + + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_HOLD_EN, 0); + return 0; } From 0013881c1145d36bf26165bb70fdd7560a5507a3 Mon Sep 17 00:00:00 2001 From: Karol Kolacinski Date: Thu, 4 Nov 2021 14:52:11 +0100 Subject: [PATCH 157/361] ice: Use div64_u64 instead of div_u64 in adjfine Change the division in ice_ptp_adjfine from div_u64 to div64_u64. div_u64 is used when the divisor is 32 bit but in this case incval is 64 bit and it caused incorrect calculations and incval adjustments. Fixes: 06c16d89d2cb ("ice: register 1588 PTP clock device object for E810 devices") Signed-off-by: Karol Kolacinski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index bf7247c6f58e2..ad7cabe7932f0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -705,7 +705,7 @@ static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) scaled_ppm = -scaled_ppm; } - while ((u64)scaled_ppm > div_u64(U64_MAX, incval)) { + while ((u64)scaled_ppm > div64_u64(U64_MAX, incval)) { /* handle overflow by scaling down the scaled_ppm and * the divisor, losing some precision */ From 37e738b6fdb14529534dca441e0222313688fde3 Mon Sep 17 00:00:00 2001 From: Karol Kolacinski Date: Tue, 16 Nov 2021 13:07:14 +0100 Subject: [PATCH 158/361] ice: Don't put stale timestamps in the skb The driver has to check if it does not accidentally put the timestamp in the SKB before previous timestamp gets overwritten. Timestamp values in the PHY are read only and do not get cleared except at hardware reset or when a new timestamp value is captured. The cached_tstamp field is used to detect the case where a new timestamp has not yet been captured, ensuring that we avoid sending stale timestamp data to the stack. Fixes: ea9b847cda64 ("ice: enable transmit timestamps for E810 devices") Signed-off-by: Karol Kolacinski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 11 ++++------- drivers/net/ethernet/intel/ice/ice_ptp.h | 6 ++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index ad7cabe7932f0..442b031b0edc0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1540,19 +1540,16 @@ static void ice_ptp_tx_tstamp_work(struct kthread_work *work) if (err) continue; - /* Check if the timestamp is valid */ - if (!(raw_tstamp & ICE_PTP_TS_VALID)) + /* Check if the timestamp is invalid or stale */ + if (!(raw_tstamp & ICE_PTP_TS_VALID) || + raw_tstamp == tx->tstamps[idx].cached_tstamp) continue; - /* clear the timestamp register, so that it won't show valid - * again when re-used. - */ - ice_clear_phy_tstamp(hw, tx->quad, phy_idx); - /* The timestamp is valid, so we'll go ahead and clear this * index and then send the timestamp up to the stack. */ spin_lock(&tx->lock); + tx->tstamps[idx].cached_tstamp = raw_tstamp; clear_bit(idx, tx->in_use); skb = tx->tstamps[idx].skb; tx->tstamps[idx].skb = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index f71ad317d6c8f..53c15fc9d9961 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -55,15 +55,21 @@ struct ice_perout_channel { * struct ice_tx_tstamp - Tracking for a single Tx timestamp * @skb: pointer to the SKB for this timestamp request * @start: jiffies when the timestamp was first requested + * @cached_tstamp: last read timestamp * * This structure tracks a single timestamp request. The SKB pointer is * provided when initiating a request. The start time is used to ensure that * we discard old requests that were not fulfilled within a 2 second time * window. + * Timestamp values in the PHY are read only and do not get cleared except at + * hardware reset or when a new timestamp value is captured. The cached_tstamp + * field is used to detect the case where a new timestamp has not yet been + * captured, ensuring that we avoid sending stale timestamp data to the stack. */ struct ice_tx_tstamp { struct sk_buff *skb; unsigned long start; + u64 cached_tstamp; }; /** From 6c3118c32129b4197999a8928ba776bcabd0f5c4 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Fri, 10 Dec 2021 14:55:03 -0800 Subject: [PATCH 159/361] signal: Skip the altstack update when not needed == Background == Support for large, "dynamic" fpstates was recently merged. This included code to ensure that sigaltstacks are sufficiently sized for these large states. A new lock was added to remove races between enabling large features and setting up sigaltstacks. == Problem == The new lock (sigaltstack_lock()) is acquired in the sigreturn path before restoring the old sigaltstack. Unfortunately, contention on the new lock causes a measurable signal handling performance regression [1]. However, the common case is that no *changes* are made to the sigaltstack state at sigreturn. == Solution == do_sigaltstack() acquires sigaltstack_lock() and is used for both sys_sigaltstack() and restoring the sigaltstack in sys_sigreturn(). Check for changes to the sigaltstack before taking the lock. If no changes were made, return before acquiring the lock. This removes lock contention from the common-case sigreturn path. [1] https://lore.kernel.org/lkml/20211207012128.GA16074@xsang-OptiPlex-9020/ Fixes: 3aac3ebea08f ("x86/signal: Implement sigaltstack size validation") Reported-by: kernel test robot Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20211210225503.12734-1-chang.seok.bae@intel.com --- kernel/signal.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index a629b11bf3e0d..dfcee3888b00e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4185,6 +4185,15 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, ss_mode != 0)) return -EINVAL; + /* + * Return before taking any locks if no actual + * sigaltstack changes were requested. + */ + if (t->sas_ss_sp == (unsigned long)ss_sp && + t->sas_ss_size == ss_size && + t->sas_ss_flags == ss_flags) + return 0; + sigaltstack_lock(); if (ss_mode == SS_DISABLE) { ss_size = 0; From f3a8076eb28cae1553958c629aecec479394bbe2 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Sat, 4 Dec 2021 18:59:08 +0800 Subject: [PATCH 160/361] drm/amdgpu: correct register access for RLC_JUMP_TABLE_RESTORE should count on GC IP base address Signed-off-by: Le Ma Signed-off-by: Hawking Zhang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b305fd39874fe..edb3e3b08eed8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3070,8 +3070,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); + WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); gfx_v9_0_init_gfx_power_gating(adev); } } From 841933d5b8aa853abe68e63827f68f50fab37226 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sat, 4 Dec 2021 19:22:12 +0800 Subject: [PATCH 161/361] drm/amdgpu: don't override default ECO_BITs setting Leave this bit as hardware default setting Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 2 -- 8 files changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 480e41847d7c0..ec4d5e15b766a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -162,7 +162,6 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 14c1c1a297dd3..6e0ace2fbfab1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -196,7 +196,6 @@ static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index e80d1dc430790..b4eddf6e98a6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -197,7 +197,6 @@ static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index a99953833820e..b3bede1dc41da 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -145,7 +145,6 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index f80a14a1b82dc..f5f7181f9af5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -165,7 +165,6 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 25f8e93e5ec37..3718ff610ab28 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -267,7 +267,6 @@ static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index a11d60ec63215..9e16da28505af 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -194,7 +194,6 @@ static void mmhub_v2_3_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index c4ef822bbe8c5..ff49eeaf78824 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -189,8 +189,6 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, - ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, From dcd10d879a9d1d4e929d374c2f24aba8fac3252b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 9 Dec 2021 12:13:53 -0600 Subject: [PATCH 162/361] drm/amd/pm: fix reading SMU FW version from amdgpu_firmware_info on YC This value does not get cached into adev->pm.fw_version during startup for smu13 like it does for other SMU like smu12. Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 35145db6eedfc..19a5d2c39c8d8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -198,6 +198,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu) int smu_v13_0_check_fw_version(struct smu_context *smu) { + struct amdgpu_device *adev = smu->adev; uint32_t if_version = 0xff, smu_version = 0xff; uint16_t smu_major; uint8_t smu_minor, smu_debug; @@ -210,6 +211,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu_major = (smu_version >> 16) & 0xffff; smu_minor = (smu_version >> 8) & 0xff; smu_debug = (smu_version >> 0) & 0xff; + if (smu->is_apu) + adev->pm.fw_version = smu_version; switch (smu->adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): From 7e4d2f30df3fb48f75ce9e96867d42bdddab83ac Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 10 Dec 2021 15:03:59 -0800 Subject: [PATCH 163/361] drm/amd/display: Set exit_optimized_pwr_state for DCN31 [Why] SMU now respects the PHY refclk disable request from driver. This causes a hang during hotplug when PHY refclk was disabled because it's not being re-enabled and the transmitter control starts on dc_link_detect. [How] We normally would re-enable the clk with exit_optimized_pwr_state but this is only set on DCN21 and DCN301. Set it for dcn31 as well. This fixes DMCUB timeouts in the PHY. Fixes: 64b1d0e8d500 ("drm/amd/display: Add DCN3.1 HWSEQ") Reviewed-by: Eric Yang Acked-by: Pavle Kotarac Tested-by: Daniel Wheeler Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index 05335a8c3c2dc..4f6e639e93536 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -101,6 +101,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .z10_restore = dcn31_z10_restore, .z10_save_init = dcn31_z10_save_init, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn20_update_visual_confirm_color, }; From 791255ca9fbe38042cfd55df5deb116dc11fef18 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 10 Dec 2021 15:04:05 -0800 Subject: [PATCH 164/361] drm/amd/display: Reset DMCUB before HW init [Why] If the firmware wasn't reset by PSP or HW and is currently running then the firmware will hang or perform underfined behavior when we modify its firmware state underneath it. [How] Reset DMCUB before setting up cache windows and performing HW init. Reviewed-by: Aurabindo Jayamohanan Pillai Acked-by: Pavle Kotarac Tested-by: Daniel Wheeler Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 122dae1a1813b..e727f1dd2a9a7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1051,6 +1051,11 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) return 0; } + /* Reset DMCUB if it was previously running - before we overwrite its memory. */ + status = dmub_srv_hw_reset(dmub_srv); + if (status != DMUB_STATUS_OK) + DRM_WARN("Error resetting DMUB HW: %d\n", status); + hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data; fw_inst_const = dmub_fw->data + From 17c65d6fca844ee72a651944d8ce721e9040bf70 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 13 Dec 2021 14:38:38 +0800 Subject: [PATCH 165/361] drm/amdgpu: correct the wrong cached state for GMC on PICASSO Pair the operations did in GMC ->hw_init and ->hw_fini. That can help to maintain correct cached state for GMC and avoid unintention gate operation dropping due to wrong cached state. BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1828 Signed-off-by: Evan Quan Acked-by: Guchun Chen Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 8 ++++---- drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 7 ++++++- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index cb82404df5342..d84523cf5f759 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1808,6 +1808,14 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } + /* + * Pair the operations did in gmc_v9_0_hw_init and thus maintain + * a correct cached state for GMC. Otherwise, the "gate" again + * operation on S3 resuming will fail due to wrong cached state. + */ + if (adev->mmhub.funcs->update_power_gating) + adev->mmhub.funcs->update_power_gating(adev, false); + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index b3bede1dc41da..1da2ec692057e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -301,10 +301,10 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return; - if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true); - - } + if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB) + amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GMC, + enable); } static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 8d796ed3b7d16..619f8d3052920 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1328,7 +1328,12 @@ static int pp_set_powergating_by_smu(void *handle, pp_dpm_powergate_vce(handle, gate); break; case AMD_IP_BLOCK_TYPE_GMC: - pp_dpm_powergate_mmhub(handle); + /* + * For now, this is only used on PICASSO. + * And only "gate" operation is supported. + */ + if (gate) + pp_dpm_powergate_mmhub(handle); break; case AMD_IP_BLOCK_TYPE_GFX: ret = pp_dpm_powergate_gfx(handle, gate); From aa464957f7e660abd554f2546a588f6533720e21 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Tue, 14 Dec 2021 15:25:54 +0800 Subject: [PATCH 166/361] drm/amd/pm: fix a potential gpu_metrics_table memory leak Memory is allocated for gpu_metrics_table in renoir_init_smc_tables(), but not freed in int smu_v12_0_fini_smc_tables(). Free it! Fixes: 95868b85764a ("drm/amd/powerplay: add Renoir support for gpu metrics export") Signed-off-by: Lang Yu Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index d60b8c5e87157..43028f2cd28b5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -191,6 +191,9 @@ int smu_v12_0_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } From 4ad8181426df92976feee5fbc55236293d069b37 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 9 Dec 2021 22:06:55 +0800 Subject: [PATCH 167/361] RDMA/hns: Fix RNR retransmission issue for HIP08 Due to the discrete nature of the HIP08 timer unit, a requester might finish the timeout period sooner, in elapsed real time, than its responder does, even when both sides share the identical RNR timeout length included in the RNR Nak packet and the responder indeed starts the timing prior to the requester. Furthermore, if a 'providential' resend packet arrived before the responder's timeout period expired, the responder is certainly entitled to drop the packet silently in the light of IB protocol. To address this problem, our team made good use of certain hardware facts: 1) The timing resolution regards the transmission arrangements is 1 microsecond, e.g. if cq_period field is set to 3, it would be interpreted as 3 microsecond by hardware 2) A QPC field shall inform the hardware how many timing unit (ticks) constitutes a full microsecond, which, by default, is 1000 3) It takes 14ns for the processor to handle a packet in the buffer, so the RNR timeout length of 10ns would ensure our processing mechanism is disabled during the entire timeout period and the packet won't be dropped silently To achieve (3), we permanently set the QPC field mentioned in (2) to zero which nominally indicates every time tick is equivalent to a microsecond in wall-clock time; now, a RNR timeout period at face value of 10 would only last 10 ticks, which is 10ns in wall-clock time. It's worth noting that we adapt the driver by magnifying certain configuration parameters(cq_period, eq_period and ack_timeout)by 1000 given the user assumes the configuring timing unit to be microseconds. Also, this particular improvisation is only deployed on HIP08 since other hardware has already solved this issue. Fixes: cfc85f3e4b7f ("RDMA/hns: Add profile support for hip08 driver") Link: https://lore.kernel.org/r/20211209140655.49493-1-liangwenpeng@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 +++++++++++++++++++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 8 +++ 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index bbfa1332dedc0..eb0defa80d0dc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1594,11 +1594,17 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; + u32 clock_cycles_of_1us; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM, false); - hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, 0x3e8); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + clock_cycles_of_1us = HNS_ROCE_1NS_CFG; + else + clock_cycles_of_1us = HNS_ROCE_1US_CFG; + + hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us); hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT); return hns_roce_cmq_send(hr_dev, &desc, 1); @@ -4802,6 +4808,30 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, return ret; } +static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout) +{ +#define QP_ACK_TIMEOUT_MAX_HIP08 20 +#define QP_ACK_TIMEOUT_OFFSET 10 +#define QP_ACK_TIMEOUT_MAX 31 + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) { + ibdev_warn(&hr_dev->ib_dev, + "Local ACK timeout shall be 0 to 20.\n"); + return false; + } + *timeout += QP_ACK_TIMEOUT_OFFSET; + } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) { + if (*timeout > QP_ACK_TIMEOUT_MAX) { + ibdev_warn(&hr_dev->ib_dev, + "Local ACK timeout shall be 0 to 31.\n"); + return false; + } + } + + return true; +} + static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4811,6 +4841,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret = 0; + u8 timeout; if (attr_mask & IB_QP_AV) { ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context, @@ -4820,12 +4851,10 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, } if (attr_mask & IB_QP_TIMEOUT) { - if (attr->timeout < 31) { - hr_reg_write(context, QPC_AT, attr->timeout); + timeout = attr->timeout; + if (check_qp_timeout_cfg_range(hr_dev, &timeout)) { + hr_reg_write(context, QPC_AT, timeout); hr_reg_clear(qpc_mask, QPC_AT); - } else { - ibdev_warn(&hr_dev->ib_dev, - "Local ACK timeout shall be 0 to 30.\n"); } } @@ -4882,7 +4911,9 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); if (attr_mask & IB_QP_MIN_RNR_TIMER) { - hr_reg_write(context, QPC_MIN_RNR_TIME, attr->min_rnr_timer); + hr_reg_write(context, QPC_MIN_RNR_TIME, + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ? + HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer); hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME); } @@ -5499,6 +5530,16 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count); hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT); + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) { + dev_info(hr_dev->dev, + "cq_period(%u) reached the upper limit, adjusted to 65.\n", + cq_period); + cq_period = HNS_ROCE_MAX_CQ_PERIOD; + } + cq_period *= HNS_ROCE_CLOCK_ADJUST; + } hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period); hr_reg_clear(cqc_mask, CQC_CQ_PERIOD); @@ -5894,6 +5935,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX); hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) { + dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n", + eq->eq_period); + eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD; + } + eq->eq_period *= HNS_ROCE_CLOCK_ADJUST; + } + hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period); hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER); hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 4d904d5e82be4..35c61da7ba156 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1444,6 +1444,14 @@ struct hns_roce_dip { struct list_head node; /* all dips are on a list */ }; +/* only for RNR timeout issue of HIP08 */ +#define HNS_ROCE_CLOCK_ADJUST 1000 +#define HNS_ROCE_MAX_CQ_PERIOD 65 +#define HNS_ROCE_MAX_EQ_PERIOD 65 +#define HNS_ROCE_RNR_TIMER_10NS 1 +#define HNS_ROCE_1US_CFG 999 +#define HNS_ROCE_1NS_CFG 0 + #define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0 #define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0 From bee90911e0138c76ee67458ac0d58b38a3190f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Wed, 8 Dec 2021 18:52:38 +0100 Subject: [PATCH 168/361] IB/qib: Fix memory leak in qib_user_sdma_queue_pkts() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wrong goto label was used for the error case and missed cleanup of the pkt allocation. Fixes: d39bf40e55e6 ("IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields") Link: https://lore.kernel.org/r/20211208175238.29983-1-jose.exposito89@gmail.com Addresses-Coverity-ID: 1493352 ("Resource leak") Signed-off-by: José Expósito Acked-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index ac11943a5ddb0..bf2f30d67949d 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -941,7 +941,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, &addrlimit) || addrlimit > type_max(typeof(pkt->addrlimit))) { ret = -EINVAL; - goto free_pbc; + goto free_pkt; } pkt->addrlimit = addrlimit; From 12d3bbdd6bd2780b71cc466f3fbc6eb7d43bbc2a Mon Sep 17 00:00:00 2001 From: Jiacheng Shi Date: Fri, 10 Dec 2021 01:42:34 -0800 Subject: [PATCH 169/361] RDMA/hns: Replace kfree() with kvfree() Variables allocated by kvmalloc_array() should not be freed by kfree. Because they may be allocated by vmalloc. So we replace kfree() with kvfree() here. Fixes: 6fd610c5733d ("RDMA/hns: Support 0 hop addressing for SRQ buffer") Link: https://lore.kernel.org/r/20211210094234.5829-1-billsjc@sjtu.edu.cn Signed-off-by: Jiacheng Shi Acked-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 6eee9deadd122..e64ef6903fb4f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -259,7 +259,7 @@ static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) static void free_srq_wrid(struct hns_roce_srq *srq) { - kfree(srq->wrid); + kvfree(srq->wrid); srq->wrid = NULL; } From 404cd9a22150f24acf23a8df2ad0c094ba379f57 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Dec 2021 15:16:01 -0800 Subject: [PATCH 170/361] mptcp: remove tcp ulp setsockopt support TCP_ULP setsockopt cannot be used for mptcp because its already used internally to plumb subflow (tcp) sockets to the mptcp layer. syzbot managed to trigger a crash for mptcp connections that are in fallback mode: KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] CPU: 1 PID: 1083 Comm: syz-executor.3 Not tainted 5.16.0-rc2-syzkaller #0 RIP: 0010:tls_build_proto net/tls/tls_main.c:776 [inline] [..] __tcp_set_ulp net/ipv4/tcp_ulp.c:139 [inline] tcp_set_ulp+0x428/0x4c0 net/ipv4/tcp_ulp.c:160 do_tcp_setsockopt+0x455/0x37c0 net/ipv4/tcp.c:3391 mptcp_setsockopt+0x1b47/0x2400 net/mptcp/sockopt.c:638 Remove support for TCP_ULP setsockopt. Fixes: d9e4c1291810 ("mptcp: only admit explicitly supported sockopt") Reported-by: syzbot+1fd9b69cde42967d1add@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/sockopt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 0f1e661c2032b..f8efd478ac97f 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -525,7 +525,6 @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_NODELAY: case TCP_THIN_LINEAR_TIMEOUTS: case TCP_CONGESTION: - case TCP_ULP: case TCP_CORK: case TCP_KEEPIDLE: case TCP_KEEPINTVL: From d6692b3b97bdc165d150f4c1505751a323a80717 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Dec 2021 15:16:02 -0800 Subject: [PATCH 171/361] mptcp: clear 'kern' flag from fallback sockets The mptcp ULP extension relies on sk->sk_sock_kern being set correctly: It prevents setsockopt(fd, IPPROTO_TCP, TCP_ULP, "mptcp", 6); from working for plain tcp sockets (any userspace-exposed socket). But in case of fallback, accept() can return a plain tcp sk. In such case, sk is still tagged as 'kernel' and setsockopt will work. This will crash the kernel, The subflow extension has a NULL ctx->conn mptcp socket: BUG: KASAN: null-ptr-deref in subflow_data_ready+0x181/0x2b0 Call Trace: tcp_data_ready+0xf8/0x370 [..] Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming connections") Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c82a76d2d0bfe..6dc1ff07994c2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2879,7 +2879,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, */ if (WARN_ON_ONCE(!new_mptcp_sock)) { tcp_sk(newsk)->is_mptcp = 0; - return newsk; + goto out; } /* acquire the 2nd reference for the owning socket */ @@ -2891,6 +2891,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); } +out: + newsk->sk_kern_sock = kern; return newsk; } From 3d79e3756ca90f7a6087b77b62c1d9c0801e0820 Mon Sep 17 00:00:00 2001 From: Maxim Galaganov Date: Tue, 14 Dec 2021 15:16:03 -0800 Subject: [PATCH 172/361] mptcp: fix deadlock in __mptcp_push_pending() __mptcp_push_pending() may call mptcp_flush_join_list() with subflow socket lock held. If such call hits mptcp_sockopt_sync_all() then subsequently __mptcp_sockopt_sync() could try to lock the subflow socket for itself, causing a deadlock. sysrq: Show Blocked State task:ss-server state:D stack: 0 pid: 938 ppid: 1 flags:0x00000000 Call Trace: __schedule+0x2d6/0x10c0 ? __mod_memcg_state+0x4d/0x70 ? csum_partial+0xd/0x20 ? _raw_spin_lock_irqsave+0x26/0x50 schedule+0x4e/0xc0 __lock_sock+0x69/0x90 ? do_wait_intr_irq+0xa0/0xa0 __lock_sock_fast+0x35/0x50 mptcp_sockopt_sync_all+0x38/0xc0 __mptcp_push_pending+0x105/0x200 mptcp_sendmsg+0x466/0x490 sock_sendmsg+0x57/0x60 __sys_sendto+0xf0/0x160 ? do_wait_intr_irq+0xa0/0xa0 ? fpregs_restore_userregs+0x12/0xd0 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f9ba546c2d0 RSP: 002b:00007ffdc3b762d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f9ba56c8060 RCX: 00007f9ba546c2d0 RDX: 000000000000077a RSI: 0000000000e5e180 RDI: 0000000000000234 RBP: 0000000000cc57f0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9ba56c8060 R13: 0000000000b6ba60 R14: 0000000000cc7840 R15: 41d8685b1d7901b8 Fix the issue by using __mptcp_flush_join_list() instead of plain mptcp_flush_join_list() inside __mptcp_push_pending(), as suggested by Florian. The sockopt sync will be deferred to the workqueue. Fixes: 1b3e7ede1365 ("mptcp: setsockopt: handle SO_KEEPALIVE and SO_PRIORITY") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/244 Suggested-by: Florian Westphal Reviewed-by: Florian Westphal Signed-off-by: Maxim Galaganov Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6dc1ff07994c2..54613f5b75217 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1524,7 +1524,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) int ret = 0; prev_ssk = ssk; - mptcp_flush_join_list(msk); + __mptcp_flush_join_list(msk); ssk = mptcp_subflow_get_send(msk); /* First check. If the ssk has changed since From 6813b1928758ce64fabbb8ef157f994b7c2235fa Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 14 Dec 2021 15:16:04 -0800 Subject: [PATCH 173/361] mptcp: add missing documented NL params 'loc_id' and 'rem_id' are set in all events linked to subflows but those were missing in the events description in the comments. Fixes: b911c97c7dc7 ("mptcp: add netlink event support") Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index c8cc46f80a161..f106a3941cdf3 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -136,19 +136,21 @@ struct mptcp_info { * MPTCP_EVENT_REMOVED: token, rem_id * An address has been lost by the peer. * - * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6, - * daddr4 | daddr6, sport, dport, backup, - * if_idx [, error] + * MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id, + * saddr4 | saddr6, daddr4 | daddr6, sport, + * dport, backup, if_idx [, error] * A new subflow has been established. 'error' should not be set. * - * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, backup, if_idx [, error] + * MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, if_idx + * [, error] * A subflow has been closed. An error (copy of sk_err) could be set if an * error has been detected for this subflow. * - * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, backup, if_idx [, error] - * The priority of a subflow has changed. 'error' should not be set. + * MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, if_idx + * [, error] + * The priority of a subflow has changed. 'error' should not be set. */ enum mptcp_event_type { MPTCP_EVENT_UNSPEC = 0, From cb2ac2912a9ca7d3d26291c511939a41361d2d83 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Dec 2021 07:03:24 -0700 Subject: [PATCH 174/361] block: reduce kblockd_mod_delayed_work_on() CPU consumption Dexuan reports that he's seeing spikes of very heavy CPU utilization when running 24 disks and using the 'none' scheduler. This happens off the sched restart path, because SCSI requires the queue to be restarted async, and hence we're hammering on mod_delayed_work_on() to ensure that the work item gets run appropriately. Avoid hammering on the timer and just use queue_work_on() if no delay has been specified. Reported-and-tested-by: Dexuan Cui Link: https://lore.kernel.org/linux-block/BYAPR21MB1270C598ED214C0490F47400BF719@BYAPR21MB1270.namprd21.prod.outlook.com/ Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 1378d084c770f..c1833f95cb972 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1484,6 +1484,8 @@ EXPORT_SYMBOL(kblockd_schedule_work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { + if (!delay) + return queue_work_on(cpu, kblockd_workqueue, &dwork->work); return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); From aa97f6cdb7e92909e17c8ca63e622fcb81d57a57 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Fri, 12 Nov 2021 13:36:29 +0800 Subject: [PATCH 175/361] bcache: fix NULL pointer reference in cached_dev_detach_finish Commit 0259d4498ba4 ("bcache: move calc_cached_dev_sectors to proper place on backing device detach") tries to fix calc_cached_dev_sectors when bcache device detaches, but now we have: cached_dev_detach_finish ... bcache_device_detach(&dc->disk); ... closure_put(&d->c->caching); d->c = NULL; [*explicitly set dc->disk.c to NULL*] list_move(&dc->list, &uncached_devices); calc_cached_dev_sectors(dc->disk.c); [*passing a NULL pointer*] ... Upper codeflows shows how bug happens, this patch fix the problem by caching dc->disk.c beforehand, and cache_set won't be freed under us because c->caching closure at least holds a reference count and closure callback __cache_set_unregister only being called by bch_cache_set_stop which using closure_queue(&c->caching), that means c->caching closure callback for destroying cache_set won't be trigger by previous closure_put(&d->c->caching). So at this stage(while cached_dev_detach_finish is calling) it's safe to access cache_set dc->disk.c. Fixes: 0259d4498ba4 ("bcache: move calc_cached_dev_sectors to proper place on backing device detach") Signed-off-by: Lin Feng Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211112053629.3437-2-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 86b9e355c5837..140f35dc0c457 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1139,6 +1139,7 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) static void cached_dev_detach_finish(struct work_struct *w) { struct cached_dev *dc = container_of(w, struct cached_dev, detach); + struct cache_set *c = dc->disk.c; BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); BUG_ON(refcount_read(&dc->count)); @@ -1156,7 +1157,7 @@ static void cached_dev_detach_finish(struct work_struct *w) bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); - calc_cached_dev_sectors(dc->disk.c); + calc_cached_dev_sectors(c); clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); From 7d3baf0afa3aa9102d6a521a8e4c41888bb79882 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Dec 2021 12:51:56 +0000 Subject: [PATCH 176/361] bpf: Fix kernel address leakage in atomic fetch The change in commit 37086bfdc737 ("bpf: Propagate stack bounds to registers in atomics w/ BPF_FETCH") around check_mem_access() handling is buggy since this would allow for unprivileged users to leak kernel pointers. For example, an atomic fetch/and with -1 on a stack destination which holds a spilled pointer will migrate the spilled register type into a scalar, which can then be exported out of the program (since scalar != pointer) by dumping it into a map value. The original implementation of XADD was preventing this situation by using a double call to check_mem_access() one with BPF_READ and a subsequent one with BPF_WRITE, in both cases passing -1 as a placeholder value instead of register as per XADD semantics since it didn't contain a value fetch. The BPF_READ also included a check in check_stack_read_fixed_off() which rejects the program if the stack slot is of __is_pointer_value() if dst_regno < 0. The latter is to distinguish whether we're dealing with a regular stack spill/ fill or some arithmetical operation which is disallowed on non-scalars, see also 6e7e63cbb023 ("bpf: Forbid XADD on spilled pointers for unprivileged users") for more context on check_mem_access() and its handling of placeholder value -1. One minimally intrusive option to fix the leak is for the BPF_FETCH case to initially check the BPF_READ case via check_mem_access() with -1 as register, followed by the actual load case with non-negative load_reg to propagate stack bounds to registers. Fixes: 37086bfdc737 ("bpf: Propagate stack bounds to registers in atomics w/ BPF_FETCH") Reported-by: Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f2f1ed34cfe9b..53d39db3b0fa6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4584,13 +4584,19 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i load_reg = -1; } - /* check whether we can read the memory */ + /* Check whether we can read the memory, with second call for fetch + * case to simulate the register fill. + */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, load_reg, true); + BPF_SIZE(insn->code), BPF_READ, -1, true); + if (!err && load_reg >= 0) + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, load_reg, + true); if (err) return err; - /* check whether we can write into the same memory */ + /* Check whether we can write into the same memory. */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true); if (err) From 180486b430f4e22cc00a478163d942804baae4b5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Dec 2021 10:07:04 +0000 Subject: [PATCH 177/361] bpf, selftests: Add test case for atomic fetch on spilled pointer Test whether unprivileged would be able to leak the spilled pointer either by exporting the returned value from the atomic{32,64} operation or by reading and exporting the value from the stack after the atomic operation took place. Note that for unprivileged, the below atomic cmpxchg test case named "Dest pointer in r0 - succeed" is failing. The reason is that in the dst memory location (r10 -8) there is the spilled register r10: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (bf) r0 = r10 1: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (7b) *(u64 *)(r10 -8) = r0 2: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 fp-8_w=fp 2: (b7) r1 = 0 3: R0_w=fp0 R1_w=invP0 R10=fp0 fp-8_w=fp 3: (db) r0 = atomic64_cmpxchg((u64 *)(r10 -8), r0, r1) 4: R0_w=fp0 R1_w=invP0 R10=fp0 fp-8_w=mmmmmmmm 4: (79) r1 = *(u64 *)(r0 -8) 5: R0_w=fp0 R1_w=invP(id=0) R10=fp0 fp-8_w=mmmmmmmm 5: (b7) r0 = 0 6: R0_w=invP0 R1_w=invP(id=0) R10=fp0 fp-8_w=mmmmmmmm 6: (95) exit However, allowing this case for unprivileged is a bit useless given an update with a new pointer will fail anyway: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (bf) r0 = r10 1: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (7b) *(u64 *)(r10 -8) = r0 2: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 fp-8_w=fp 2: (db) r0 = atomic64_cmpxchg((u64 *)(r10 -8), r0, r10) R10 leaks addr into mem Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/atomic_cmpxchg.c | 23 +++++ .../selftests/bpf/verifier/atomic_fetch.c | 94 +++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index c22dc83a41fdc..0ffc69f602af1 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -156,4 +156,27 @@ BPF_EXIT_INSN(), }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "leaking pointer from stack off -8", +}, +{ + "Dest pointer in r0 - succeed, check 2", + .insns = { + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* r1 = *r0 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R5 leaks addr into mem", }, diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch.c b/tools/testing/selftests/bpf/verifier/atomic_fetch.c index 3bc9ff7a860b7..5bf03fb4fa2b6 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_fetch.c +++ b/tools/testing/selftests/bpf/verifier/atomic_fetch.c @@ -1,3 +1,97 @@ +{ + "atomic dw/fetch and address leakage of (map ptr & -1) via stack slot", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "leaking pointer from stack off -8", +}, +{ + "atomic dw/fetch and address leakage of (map ptr & -1) via returned value", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "leaking pointer from stack off -8", +}, +{ + "atomic w/fetch and address leakage of (map ptr & -1) via stack slot", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = REJECT, + .errstr = "invalid size of register fill", +}, +{ + "atomic w/fetch and address leakage of (map ptr & -1) via returned value", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = REJECT, + .errstr = "invalid size of register fill", +}, #define __ATOMIC_FETCH_OP_TEST(src_reg, dst_reg, operand1, op, operand2, expect) \ { \ "atomic fetch " #op ", src=" #dst_reg " dst=" #dst_reg, \ From a82fe085f344ef20b452cd5f481010ff96b5c4cd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Dec 2021 11:02:02 +0000 Subject: [PATCH 178/361] bpf: Fix kernel address leakage in atomic cmpxchg's r0 aux reg The implementation of BPF_CMPXCHG on a high level has the following parameters: .-[old-val] .-[new-val] BPF_R0 = cmpxchg{32,64}(DST_REG + insn->off, BPF_R0, SRC_REG) `-[mem-loc] `-[old-val] Given a BPF insn can only have two registers (dst, src), the R0 is fixed and used as an auxilliary register for input (old value) as well as output (returning old value from memory location). While the verifier performs a number of safety checks, it misses to reject unprivileged programs where R0 contains a pointer as old value. Through brute-forcing it takes about ~16sec on my machine to leak a kernel pointer with BPF_CMPXCHG. The PoC is basically probing for kernel addresses by storing the guessed address into the map slot as a scalar, and using the map value pointer as R0 while SRC_REG has a canary value to detect a matching address. Fix it by checking R0 for pointers, and reject if that's the case for unprivileged programs. Fixes: 5ffa25502b5a ("bpf: Add instructions for atomic_[cmp]xchg") Reported-by: Ryota Shiga (Flatt Security) Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 53d39db3b0fa6..2d48159b58bd3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4547,9 +4547,16 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (insn->imm == BPF_CMPXCHG) { /* Check comparison of R0 with memory location */ - err = check_reg_arg(env, BPF_REG_0, SRC_OP); + const u32 aux_reg = BPF_REG_0; + + err = check_reg_arg(env, aux_reg, SRC_OP); if (err) return err; + + if (is_pointer_value(env, aux_reg)) { + verbose(env, "R%d leaks addr into mem\n", aux_reg); + return -EACCES; + } } if (is_pointer_value(env, insn->src_reg)) { From e523102cb719cbad1673b6aa2a4d5c1fa6f13799 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 13 Dec 2021 22:25:23 +0000 Subject: [PATCH 179/361] bpf, selftests: Update test case for atomic cmpxchg on r0 with pointer Fix up unprivileged test case results for 'Dest pointer in r0' verifier tests given they now need to reject R0 containing a pointer value, and add a couple of new related ones with 32bit cmpxchg as well. root@foo:~/bpf/tools/testing/selftests/bpf# ./test_verifier #0/u invalid and of negative number OK #0/p invalid and of negative number OK [...] #1268/p XDP pkt read, pkt_meta' <= pkt_data, bad access 1 OK #1269/p XDP pkt read, pkt_meta' <= pkt_data, bad access 2 OK #1270/p XDP pkt read, pkt_data <= pkt_meta', good access OK #1271/p XDP pkt read, pkt_data <= pkt_meta', bad access 1 OK #1272/p XDP pkt read, pkt_data <= pkt_meta', bad access 2 OK Summary: 1900 PASSED, 0 SKIPPED, 0 FAILED Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/atomic_cmpxchg.c | 67 ++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index 0ffc69f602af1..b39665f33524f 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -138,6 +138,8 @@ BPF_EXIT_INSN(), }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr into mem", }, { "Dest pointer in r0 - succeed", @@ -157,7 +159,7 @@ }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "leaking pointer from stack off -8", + .errstr_unpriv = "R0 leaks addr into mem", }, { "Dest pointer in r0 - succeed, check 2", @@ -178,5 +180,66 @@ }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R5 leaks addr into mem", + .errstr_unpriv = "R0 leaks addr into mem", +}, +{ + "Dest pointer in r0 - succeed, check 3", + .insns = { + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid size of register fill", + .errstr_unpriv = "R0 leaks addr into mem", +}, +{ + "Dest pointer in r0 - succeed, check 4", + .insns = { + /* r0 = &val */ + BPF_MOV32_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV32_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* r1 = *r10 */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R10 partial copy of pointer", +}, +{ + "Dest pointer in r0 - succeed, check 5", + .insns = { + /* r0 = &val */ + BPF_MOV32_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV32_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* r1 = *r0 */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 invalid mem access", + .errstr_unpriv = "R10 partial copy of pointer", }, From f7ac570d0f026cf5475d4cc4d8040bd947980b3a Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Wed, 15 Dec 2021 00:41:54 +0800 Subject: [PATCH 180/361] ALSA: hda/realtek: fix mute/micmute LEDs for a HP ProBook There is a HP ProBook which using ALC236 codec and need the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make mute LED and micmute LED work. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20211214164156.49711-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fc41f3e8ddc3c..e59ff75eea751 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8706,6 +8706,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 58e138d62476fc5f889252dcf73848beeaa54789 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 13 Dec 2021 12:27:55 +0100 Subject: [PATCH 181/361] Revert "x86/boot: Mark prepare_command_line() __init" This reverts commit c0f2077baa4113f38f008b8e912b9fb3ff8d43df. Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211213112757.2612-2-bp@alien8.de --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6a190c7f4d71b..c410be738ae78 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -742,7 +742,7 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } -static char * __init prepare_command_line(void) +static char *prepare_command_line(void) { #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE From fbe6183998546f8896ee0b620ece86deff5a2fd1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 13 Dec 2021 12:27:56 +0100 Subject: [PATCH 182/361] Revert "x86/boot: Pull up cmdline preparation and early param parsing" This reverts commit 8d48bf8206f77aa8687f0e241e901e5197e52423. It turned out to be a bad idea as it broke supplying mem= cmdline parameters due to parse_memopt() requiring preparatory work like setting up the e820 table in e820__memory_setup() in order to be able to exclude the range specified by mem=. Pulling that up would've broken Xen PV again, see threads at https://lkml.kernel.org/r/20210920120421.29276-1-jgross@suse.com due to xen_memory_setup() needing the first reservations in early_reserve_memory() - kernel and initrd - to have happened already. This could be fixed again by having Xen do those reservations itself... Long story short, revert this and do a simpler fix in a later patch. Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211213112757.2612-3-bp@alien8.de --- arch/x86/kernel/setup.c | 66 +++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c410be738ae78..49b596db5631e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -742,28 +742,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } -static char *prepare_command_line(void) -{ -#ifdef CONFIG_CMDLINE_BOOL -#ifdef CONFIG_CMDLINE_OVERRIDE - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); -#else - if (builtin_cmdline[0]) { - /* append boot loader cmdline to builtin */ - strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); - strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); - } -#endif -#endif - - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - - parse_early_param(); - - return command_line; -} - /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -852,23 +830,6 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); - /* - * x86_configure_nx() is called before parse_early_param() (called by - * prepare_command_line()) to detect whether hardware doesn't support - * NX (so that the early EHCI debug console setup can safely call - * set_fixmap()). It may then be called again from within noexec_setup() - * during parsing early parameters to honor the respective command line - * option. - */ - x86_configure_nx(); - - /* - * This parses early params and it needs to run before - * early_reserve_memory() because latter relies on such settings - * supplied as early params. - */ - *cmdline_p = prepare_command_line(); - /* * Do some memory reservations *before* memory is added to memblock, so * memblock allocations won't overwrite it. @@ -902,6 +863,33 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = __pa_symbol(__bss_start); bss_resource.end = __pa_symbol(__bss_stop)-1; +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + /* + * x86_configure_nx() is called before parse_early_param() to detect + * whether hardware doesn't support NX (so that the early EHCI debug + * console setup can safely call set_fixmap()). It may then be called + * again from within noexec_setup() during parsing early parameters + * to honor the respective command line option. + */ + x86_configure_nx(); + + parse_early_param(); + #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux From 04e57a2d952bbd34bc45744e72be3eecdc344294 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 14 Dec 2021 10:45:26 +0100 Subject: [PATCH 183/361] tomoyo: Check exceeded quota early in tomoyo_domain_quota_is_ok(). If tomoyo is used in a testing/fuzzing environment in learning mode, for lots of domains the quota will be exceeded and stay exceeded for prolonged periods of time. In such cases it's pointless (and slow) to walk the whole acl list again and again just to rediscover that the quota is exceeded. We already have the TOMOYO_DIF_QUOTA_WARNED flag that notes the overflow condition. Check it early to avoid the slowdown. [penguin-kernel] This patch causes a user visible change that the learning mode will not be automatically resumed after the quota is increased. To resume the learning mode, administrator will need to explicitly clear TOMOYO_DIF_QUOTA_WARNED flag after increasing the quota. But I think that this change is generally preferable, for administrator likely wants to optimize the acl list for that domain before increasing the quota, or that domain likely hits the quota again. Therefore, don't try to care to clear TOMOYO_DIF_QUOTA_WARNED flag automatically when the quota for that domain changed. Signed-off-by: Dmitry Vyukov Signed-off-by: Tetsuo Handa --- security/tomoyo/util.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index 1da2e3722b126..af8cd2af3466d 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -1051,6 +1051,8 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) return false; if (!domain) return true; + if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED])) + return false; list_for_each_entry_rcu(ptr, &domain->acl_info_list, list, srcu_read_lock_held(&tomoyo_ss)) { u16 perm; @@ -1096,14 +1098,12 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) if (count < tomoyo_profile(domain->ns, domain->profile)-> pref[TOMOYO_PREF_MAX_LEARNING_ENTRY]) return true; - if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) { - domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true; - /* r->granted = false; */ - tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); + WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true); + /* r->granted = false; */ + tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); #ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING - pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", - domain->domainname->name); + pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", + domain->domainname->name); #endif - } return false; } From f702e1107601230eec707739038a89018ea3468d Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 15 Dec 2021 20:13:55 +0900 Subject: [PATCH 184/361] tomoyo: use hwight16() in tomoyo_domain_quota_is_ok() hwight16() is much faster. While we are at it, no need to include "perm =" part into data_race() macro, for perm is a local variable that cannot be accessed by other threads. Signed-off-by: Tetsuo Handa --- security/tomoyo/util.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index af8cd2af3466d..6799b1122c9d8 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -1056,7 +1056,6 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) list_for_each_entry_rcu(ptr, &domain->acl_info_list, list, srcu_read_lock_held(&tomoyo_ss)) { u16 perm; - u8 i; if (ptr->is_deleted) continue; @@ -1067,23 +1066,23 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) */ switch (ptr->type) { case TOMOYO_TYPE_PATH_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_path_acl, head)->perm); break; case TOMOYO_TYPE_PATH2_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path2_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_path2_acl, head)->perm); break; case TOMOYO_TYPE_PATH_NUMBER_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path_number_acl, head) + perm = data_race(container_of(ptr, struct tomoyo_path_number_acl, head) ->perm); break; case TOMOYO_TYPE_MKDEV_ACL: - data_race(perm = container_of(ptr, struct tomoyo_mkdev_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_mkdev_acl, head)->perm); break; case TOMOYO_TYPE_INET_ACL: - data_race(perm = container_of(ptr, struct tomoyo_inet_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_inet_acl, head)->perm); break; case TOMOYO_TYPE_UNIX_ACL: - data_race(perm = container_of(ptr, struct tomoyo_unix_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_unix_acl, head)->perm); break; case TOMOYO_TYPE_MANUAL_TASK_ACL: perm = 0; @@ -1091,9 +1090,7 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) default: perm = 1; } - for (i = 0; i < 16; i++) - if (perm & (1 << i)) - count++; + count += hweight16(perm); } if (count < tomoyo_profile(domain->ns, domain->profile)-> pref[TOMOYO_PREF_MAX_LEARNING_ENTRY]) From 2f5b3514c33fecad4003ce0f22ca9691492d310b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 13 Dec 2021 12:27:57 +0100 Subject: [PATCH 185/361] x86/boot: Move EFI range reservation after cmdline parsing The memory reservation in arch/x86/platform/efi/efi.c depends on at least two command line parameters. Put it back later in the boot process and move efi_memblock_x86_reserve_range() out of early_memory_reserve(). An attempt to fix this was done in 8d48bf8206f7 ("x86/boot: Pull up cmdline preparation and early param parsing") but that caused other troubles so it got reverted. The bug this is addressing is: Dan reports that Anjaneya Chagam can no longer use the efi=nosoftreserve kernel command line parameter to suppress "soft reservation" behavior. This is due to the fact that the following call-chain happens at boot: early_reserve_memory |-> efi_memblock_x86_reserve_range |-> efi_fake_memmap_early which does if (!efi_soft_reserve_enabled()) return; and that would have set EFI_MEM_NO_SOFT_RESERVE after having parsed "nosoftreserve". However, parse_early_param() gets called *after* it, leading to the boot cmdline not being taken into account. See also https://lore.kernel.org/r/e8dd8993c38702ee6dd73b3c11f158617e665607.camel@intel.com [ bp: Turn into a proper patch. ] Signed-off-by: Mike Rapoport Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211213112757.2612-4-bp@alien8.de --- arch/x86/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 49b596db5631e..e04f5e6eb33f4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -713,9 +713,6 @@ static void __init early_reserve_memory(void) early_reserve_initrd(); - if (efi_enabled(EFI_BOOT)) - efi_memblock_x86_reserve_range(); - memblock_x86_reserve_range_setup_data(); reserve_ibft_region(); @@ -890,6 +887,9 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + if (efi_enabled(EFI_BOOT)) + efi_memblock_x86_reserve_range(); + #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux From 651740a502411793327e2f0741104749c4eedcd1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 13 Dec 2021 14:22:33 -0500 Subject: [PATCH 186/361] btrfs: check WRITE_ERR when trying to read an extent buffer Filipe reported a hang when we have errors on btrfs. This turned out to be a side-effect of my fix c2e39305299f01 ("btrfs: clear extent buffer uptodate when we fail to write it") which made it so we clear EXTENT_BUFFER_UPTODATE on an eb when we fail to write it out. Below is a paste of Filipe's analysis he got from using drgn to debug the hang """ btree readahead code calls read_extent_buffer_pages(), sets ->io_pages to a value while writeback of all pages has not yet completed: --> writeback for the first 3 pages finishes, we clear EXTENT_BUFFER_UPTODATE from eb on the first page when we get an error. --> at this point eb->io_pages is 1 and we cleared Uptodate bit from the first 3 pages --> read_extent_buffer_pages() does not see EXTENT_BUFFER_UPTODATE() so it continues, it's able to lock the pages since we obviously don't hold the pages locked during writeback --> read_extent_buffer_pages() then computes 'num_reads' as 3, and sets eb->io_pages to 3, since only the first page does not have Uptodate bit set at this point --> writeback for the remaining page completes, we ended decrementing eb->io_pages by 1, resulting in eb->io_pages == 2, and therefore never calling end_extent_buffer_writeback(), so EXTENT_BUFFER_WRITEBACK remains in the eb's flags --> of course, when the read bio completes, it doesn't and shouldn't call end_extent_buffer_writeback() --> we should clear EXTENT_BUFFER_UPTODATE only after all pages of the eb finished writeback? or maybe make the read pages code wait for writeback of all pages of the eb to complete before checking which pages need to be read, touch ->io_pages, submit read bio, etc writeback bit never cleared means we can hang when aborting a transaction, at: btrfs_cleanup_one_transaction() btrfs_destroy_marked_extents() wait_on_extent_buffer_writeback() """ This is a problem because our writes are not synchronized with reads in any way. We clear the UPTODATE flag and then we can easily come in and try to read the EB while we're still waiting on other bio's to complete. We have two options here, we could lock all the pages, and then check to see if eb->io_pages != 0 to know if we've already got an outstanding write on the eb. Or we can simply check to see if we have WRITE_ERR set on this extent buffer. We set this bit _before_ we clear UPTODATE, so if the read gets triggered because we aren't UPTODATE because of a write error we're guaranteed to have WRITE_ERR set, and in this case we can simply return -EIO. This will fix the reported hang. Reported-by: Filipe Manana Fixes: c2e39305299f01 ("btrfs: clear extent buffer uptodate when we fail to write it") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3258b6f01e85b..9234d96a7fd5c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -6611,6 +6611,14 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; + /* + * We could have had EXTENT_BUFFER_UPTODATE cleared by the write + * operation, which could potentially still be in flight. In this case + * we simply want to return an error. + */ + if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))) + return -EIO; + if (eb->fs_info->sectorsize < PAGE_SIZE) return read_extent_buffer_subpage(eb, wait, mirror_num); From 7a1636089acfee7562fe79aff7d1b4c57869896d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 13 Dec 2021 08:45:12 +0000 Subject: [PATCH 187/361] btrfs: fix invalid delayed ref after subvolume creation failure When creating a subvolume, at ioctl.c:create_subvol(), if we fail to insert the new root's root item into the root tree, we are freeing the metadata extent we reserved for the new root to prevent a metadata extent leak, as we don't abort the transaction at that point (since there is nothing at that point that is irreversible). However we allocated the metadata extent for the new root which we are creating for the new subvolume, so its delayed reference refers to the ID of this new root. But when we free the metadata extent we pass the root of the subvolume where the new subvolume is located to btrfs_free_tree_block() - this is incorrect because this will generate a delayed reference that refers to the ID of the parent subvolume's root, and not to ID of the new root. This results in a failure when running delayed references that leads to a transaction abort and a trace like the following: [3868.738042] RIP: 0010:__btrfs_free_extent+0x709/0x950 [btrfs] [3868.739857] Code: 68 0f 85 e6 fb ff (...) [3868.742963] RSP: 0018:ffffb0e9045cf910 EFLAGS: 00010246 [3868.743908] RAX: 00000000fffffffe RBX: 00000000fffffffe RCX: 0000000000000002 [3868.745312] RDX: 00000000fffffffe RSI: 0000000000000002 RDI: ffff90b0cd793b88 [3868.746643] RBP: 000000000e5d8000 R08: 0000000000000000 R09: ffff90b0cd793b88 [3868.747979] R10: 0000000000000002 R11: 00014ded97944d68 R12: 0000000000000000 [3868.749373] R13: ffff90b09afe4a28 R14: 0000000000000000 R15: ffff90b0cd793b88 [3868.750725] FS: 00007f281c4a8b80(0000) GS:ffff90b3ada00000(0000) knlGS:0000000000000000 [3868.752275] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [3868.753515] CR2: 00007f281c6a5000 CR3: 0000000108a42006 CR4: 0000000000370ee0 [3868.754869] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [3868.756228] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [3868.757803] Call Trace: [3868.758281] [3868.758655] ? btrfs_merge_delayed_refs+0x178/0x1c0 [btrfs] [3868.759827] __btrfs_run_delayed_refs+0x2b1/0x1250 [btrfs] [3868.761047] btrfs_run_delayed_refs+0x86/0x210 [btrfs] [3868.762069] ? lock_acquired+0x19f/0x420 [3868.762829] btrfs_commit_transaction+0x69/0xb20 [btrfs] [3868.763860] ? _raw_spin_unlock+0x29/0x40 [3868.764614] ? btrfs_block_rsv_release+0x1c2/0x1e0 [btrfs] [3868.765870] create_subvol+0x1d8/0x9a0 [btrfs] [3868.766766] btrfs_mksubvol+0x447/0x4c0 [btrfs] [3868.767669] ? preempt_count_add+0x49/0xa0 [3868.768444] __btrfs_ioctl_snap_create+0x123/0x190 [btrfs] [3868.769639] ? _copy_from_user+0x66/0xa0 [3868.770391] btrfs_ioctl_snap_create_v2+0xbb/0x140 [btrfs] [3868.771495] btrfs_ioctl+0xd1e/0x35c0 [btrfs] [3868.772364] ? __slab_free+0x10a/0x360 [3868.773198] ? rcu_read_lock_sched_held+0x12/0x60 [3868.774121] ? lock_release+0x223/0x4a0 [3868.774863] ? lock_acquired+0x19f/0x420 [3868.775634] ? rcu_read_lock_sched_held+0x12/0x60 [3868.776530] ? trace_hardirqs_on+0x1b/0xe0 [3868.777373] ? _raw_spin_unlock_irqrestore+0x3e/0x60 [3868.778280] ? kmem_cache_free+0x321/0x3c0 [3868.779011] ? __x64_sys_ioctl+0x83/0xb0 [3868.779718] __x64_sys_ioctl+0x83/0xb0 [3868.780387] do_syscall_64+0x3b/0xc0 [3868.781059] entry_SYSCALL_64_after_hwframe+0x44/0xae [3868.781953] RIP: 0033:0x7f281c59e957 [3868.782585] Code: 3c 1c 48 f7 d8 4c (...) [3868.785867] RSP: 002b:00007ffe1f83e2b8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [3868.787198] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f281c59e957 [3868.788450] RDX: 00007ffe1f83e2c0 RSI: 0000000050009418 RDI: 0000000000000003 [3868.789748] RBP: 00007ffe1f83f300 R08: 0000000000000000 R09: 00007ffe1f83fe36 [3868.791214] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000003 [3868.792468] R13: 0000000000000003 R14: 00007ffe1f83e2c0 R15: 00000000000003cc [3868.793765] [3868.794037] irq event stamp: 0 [3868.794548] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [3868.795670] hardirqs last disabled at (0): [] copy_process+0x934/0x2040 [3868.797086] softirqs last enabled at (0): [] copy_process+0x934/0x2040 [3868.798309] softirqs last disabled at (0): [<0000000000000000>] 0x0 [3868.799284] ---[ end trace be24c7002fe27747 ]--- [3868.799928] BTRFS info (device dm-0): leaf 241188864 gen 1268 total ptrs 214 free space 469 owner 2 [3868.801133] BTRFS info (device dm-0): refs 2 lock_owner 225627 current 225627 [3868.802056] item 0 key (237436928 169 0) itemoff 16250 itemsize 33 [3868.802863] extent refs 1 gen 1265 flags 2 [3868.803447] ref#0: tree block backref root 1610 (...) [3869.064354] item 114 key (241008640 169 0) itemoff 12488 itemsize 33 [3869.065421] extent refs 1 gen 1268 flags 2 [3869.066115] ref#0: tree block backref root 1689 (...) [3869.403834] BTRFS error (device dm-0): unable to find ref byte nr 241008640 parent 0 root 1622 owner 0 offset 0 [3869.405641] BTRFS: error (device dm-0) in __btrfs_free_extent:3076: errno=-2 No such entry [3869.407138] BTRFS: error (device dm-0) in btrfs_run_delayed_refs:2159: errno=-2 No such entry Fix this by passing the new subvolume's root ID to btrfs_free_tree_block(). This requires changing the root argument of btrfs_free_tree_block() from struct btrfs_root * to a u64, since at this point during the subvolume creation we have not yet created the struct btrfs_root for the new subvolume, and btrfs_free_tree_block() only needs a root ID and nothing else from a struct btrfs_root. This was triggered by test case generic/475 from fstests. Fixes: 67addf29004c5b ("btrfs: fix metadata extent leak after failure to create subvolume") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 17 +++++++++-------- fs/btrfs/ctree.h | 7 ++++++- fs/btrfs/extent-tree.c | 13 +++++++------ fs/btrfs/free-space-tree.c | 4 ++-- fs/btrfs/ioctl.c | 9 +++++---- fs/btrfs/qgroup.c | 3 ++- 6 files changed, 31 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 74c8e18f3720d..64599625c7d7e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -462,8 +462,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, BUG_ON(ret < 0); rcu_assign_pointer(root->node, cow); - btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + btrfs_free_tree_block(trans, btrfs_root_id(root), buf, + parent_start, last_ref); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { @@ -484,8 +484,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, return ret; } } - btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + btrfs_free_tree_block(trans, btrfs_root_id(root), buf, + parent_start, last_ref); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -926,7 +926,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, free_extent_buffer(mid); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); /* once for the root ptr */ free_extent_buffer_stale(mid); return 0; @@ -985,7 +985,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(right); del_ptr(root, path, level + 1, pslot + 1); root_sub_used(root, right->len); - btrfs_free_tree_block(trans, root, right, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), right, + 0, 1); free_extent_buffer_stale(right); right = NULL; } else { @@ -1030,7 +1031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(mid); del_ptr(root, path, level + 1, pslot); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); free_extent_buffer_stale(mid); mid = NULL; } else { @@ -4031,7 +4032,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, root_sub_used(root, leaf->len); atomic_inc(&leaf->refs); - btrfs_free_tree_block(trans, root, leaf, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1); free_extent_buffer_stale(leaf); } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7553e9dc5f938..5fe5eccb3c874 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2257,6 +2257,11 @@ static inline bool btrfs_root_dead(const struct btrfs_root *root) return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; } +static inline u64 btrfs_root_id(const struct btrfs_root *root) +{ + return root->root_key.objectid; +} + /* struct btrfs_root_backup */ BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, tree_root, 64); @@ -2719,7 +2724,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, u64 empty_size, enum btrfs_lock_nesting nest); void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + u64 root_id, struct extent_buffer *buf, u64 parent, int last_ref); int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fc4895e6a62cd..25ef6e3fd3069 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3275,20 +3275,20 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, } void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + u64 root_id, struct extent_buffer *buf, u64 parent, int last_ref) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_ref generic_ref = { 0 }; int ret; btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, buf->start, buf->len, parent); btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf), - root->root_key.objectid, 0, false); + root_id, 0, false); - if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + if (root_id != BTRFS_TREE_LOG_OBJECTID) { btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL); BUG_ON(ret); /* -ENOMEM */ @@ -3298,7 +3298,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_block_group *cache; bool must_pin = false; - if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + if (root_id != BTRFS_TREE_LOG_OBJECTID) { ret = check_ref_cleanup(trans, buf->start); if (!ret) { btrfs_redirty_list_add(trans->transaction, buf); @@ -5472,7 +5472,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, goto owner_mismatch; } - btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent, + wc->refs[level] == 1); out: wc->refs[level] = 0; wc->flags[level] = 0; diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index a33bca94d133e..3abec44c62559 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1256,8 +1256,8 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) btrfs_tree_lock(free_space_root->node); btrfs_clean_tree_block(free_space_root->node); btrfs_tree_unlock(free_space_root->node); - btrfs_free_tree_block(trans, free_space_root, free_space_root->node, - 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), + free_space_root->node, 0, 1); btrfs_put_root(free_space_root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1b85d98df66be..a7533416370ab 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -617,11 +617,12 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, * Since we don't abort the transaction in this case, free the * tree block so that we don't leak space and leave the * filesystem in an inconsistent state (an extent item in the - * extent tree without backreferences). Also no need to have - * the tree block locked since it is not in any tree at this - * point, so no other task can find it and use it. + * extent tree with a backreference for a root that does not + * exists). Also no need to have the tree block locked since it + * is not in any tree at this point, so no other task can find + * it and use it. */ - btrfs_free_tree_block(trans, root, leaf, 0, 1); + btrfs_free_tree_block(trans, objectid, leaf, 0, 1); free_extent_buffer(leaf); goto fail; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index db680f5be745a..6c037f1252b77 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1219,7 +1219,8 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) btrfs_tree_lock(quota_root->node); btrfs_clean_tree_block(quota_root->node); btrfs_tree_unlock(quota_root->node); - btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(quota_root), + quota_root->node, 0, 1); btrfs_put_root(quota_root); From 212a58fda9b9077e0efc20200a4feb76afacfd95 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 13 Dec 2021 08:45:13 +0000 Subject: [PATCH 188/361] btrfs: fix warning when freeing leaf after subvolume creation failure When creating a subvolume, at ioctl.c:create_subvol(), if we fail to insert the root item for the new subvolume into the root tree, we can trigger the following warning: [78961.741046] WARNING: CPU: 0 PID: 4079814 at fs/btrfs/extent-tree.c:3357 btrfs_free_tree_block+0x2af/0x310 [btrfs] [78961.743344] Modules linked in: [78961.749440] dm_snapshot dm_thin_pool (...) [78961.773648] CPU: 0 PID: 4079814 Comm: fsstress Not tainted 5.16.0-rc4-btrfs-next-108 #1 [78961.775198] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [78961.777266] RIP: 0010:btrfs_free_tree_block+0x2af/0x310 [btrfs] [78961.778398] Code: 17 00 48 85 (...) [78961.781067] RSP: 0018:ffffaa4001657b28 EFLAGS: 00010202 [78961.781877] RAX: 0000000000000213 RBX: ffff897f8a796910 RCX: 0000000000000000 [78961.782780] RDX: 0000000000000000 RSI: 0000000011004000 RDI: 00000000ffffffff [78961.783764] RBP: ffff8981f490e800 R08: 0000000000000001 R09: 0000000000000000 [78961.784740] R10: 0000000000000000 R11: 0000000000000001 R12: ffff897fc963fcc8 [78961.785665] R13: 0000000000000001 R14: ffff898063548000 R15: ffff898063548000 [78961.786620] FS: 00007f31283c6b80(0000) GS:ffff8982ace00000(0000) knlGS:0000000000000000 [78961.787717] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [78961.788598] CR2: 00007f31285c3000 CR3: 000000023fcc8003 CR4: 0000000000370ef0 [78961.789568] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [78961.790585] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [78961.791684] Call Trace: [78961.792082] [78961.792359] create_subvol+0x5d1/0x9a0 [btrfs] [78961.793054] btrfs_mksubvol+0x447/0x4c0 [btrfs] [78961.794009] ? preempt_count_add+0x49/0xa0 [78961.794705] __btrfs_ioctl_snap_create+0x123/0x190 [btrfs] [78961.795712] ? _copy_from_user+0x66/0xa0 [78961.796382] btrfs_ioctl_snap_create_v2+0xbb/0x140 [btrfs] [78961.797392] btrfs_ioctl+0xd1e/0x35c0 [btrfs] [78961.798172] ? __slab_free+0x10a/0x360 [78961.798820] ? rcu_read_lock_sched_held+0x12/0x60 [78961.799664] ? lock_release+0x223/0x4a0 [78961.800321] ? lock_acquired+0x19f/0x420 [78961.800992] ? rcu_read_lock_sched_held+0x12/0x60 [78961.801796] ? trace_hardirqs_on+0x1b/0xe0 [78961.802495] ? _raw_spin_unlock_irqrestore+0x3e/0x60 [78961.803358] ? kmem_cache_free+0x321/0x3c0 [78961.804071] ? __x64_sys_ioctl+0x83/0xb0 [78961.804711] __x64_sys_ioctl+0x83/0xb0 [78961.805348] do_syscall_64+0x3b/0xc0 [78961.805969] entry_SYSCALL_64_after_hwframe+0x44/0xae [78961.806830] RIP: 0033:0x7f31284bc957 [78961.807517] Code: 3c 1c 48 f7 d8 (...) This is because we are calling btrfs_free_tree_block() on an extent buffer that is dirty. Fix that by cleaning the extent buffer, with btrfs_clean_tree_block(), before freeing it. This was triggered by test case generic/475 from fstests. Fixes: 67addf29004c5b ("btrfs: fix metadata extent leak after failure to create subvolume") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a7533416370ab..8a442b59eee05 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -618,10 +618,11 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, * tree block so that we don't leak space and leave the * filesystem in an inconsistent state (an extent item in the * extent tree with a backreference for a root that does not - * exists). Also no need to have the tree block locked since it - * is not in any tree at this point, so no other task can find - * it and use it. + * exists). */ + btrfs_tree_lock(leaf); + btrfs_clean_tree_block(leaf); + btrfs_tree_unlock(leaf); btrfs_free_tree_block(trans, objectid, leaf, 0, 1); free_extent_buffer(leaf); goto fail; From 4989d4a0aed3fb30f5b48787a689d7090de6f86d Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Wed, 15 Dec 2021 19:38:43 +0900 Subject: [PATCH 189/361] btrfs: fix missing blkdev_put() call in btrfs_scan_one_device() The function btrfs_scan_one_device() calls blkdev_get_by_path() and blkdev_put() to get and release its target block device. However, when btrfs_sb_log_location_bdev() fails, blkdev_put() is not called and the block device is left without clean up. This triggered failure of fstests generic/085. Fix the failure path of btrfs_sb_log_location_bdev() to call blkdev_put(). Fixes: 12659251ca5df ("btrfs: implement log-structured superblock for ZONED mode") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Nikolay Borisov Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cc80f2a97a0ba..b4da58fd0e1a8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1370,8 +1370,10 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, bytenr_orig = btrfs_sb_offset(0); ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr); - if (ret) - return ERR_PTR(ret); + if (ret) { + device = ERR_PTR(ret); + goto error_bdev_put; + } disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig); if (IS_ERR(disk_super)) { From a7083763619f7485ccdade160deb81737cf2732f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 10 Dec 2021 09:55:29 -0700 Subject: [PATCH 190/361] soc/tegra: fuse: Fix bitwise vs. logical OR warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new warning in clang points out two instances where boolean expressions are being used with a bitwise OR instead of logical OR: drivers/soc/tegra/fuse/speedo-tegra20.c:72:9: warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical] reg = tegra_fuse_read_spare(i) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ || drivers/soc/tegra/fuse/speedo-tegra20.c:72:9: note: cast one or both operands to int to silence this warning drivers/soc/tegra/fuse/speedo-tegra20.c:87:9: warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical] reg = tegra_fuse_read_spare(i) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ || drivers/soc/tegra/fuse/speedo-tegra20.c:87:9: note: cast one or both operands to int to silence this warning 2 warnings generated. The motivation for the warning is that logical operations short circuit while bitwise operations do not. In this instance, tegra_fuse_read_spare() is not semantically returning a boolean, it is returning a bit value. Use u32 for its return type so that it can be used with either bitwise or boolean operators without any warnings. Fixes: 25cd5a391478 ("ARM: tegra: Add speedo-based process identification") Link: https://github.com/ClangBuiltLinux/linux/issues/1488 Suggested-by: Michał Mirosław Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Thierry Reding --- drivers/soc/tegra/fuse/fuse-tegra.c | 2 +- drivers/soc/tegra/fuse/fuse.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index f2151815db585..e714ed3b61bc3 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -320,7 +320,7 @@ static struct platform_driver tegra_fuse_driver = { }; builtin_platform_driver(tegra_fuse_driver); -bool __init tegra_fuse_read_spare(unsigned int spare) +u32 __init tegra_fuse_read_spare(unsigned int spare) { unsigned int offset = fuse->soc->info->spare + spare * 4; diff --git a/drivers/soc/tegra/fuse/fuse.h b/drivers/soc/tegra/fuse/fuse.h index de58feba04350..ecff0c08e9595 100644 --- a/drivers/soc/tegra/fuse/fuse.h +++ b/drivers/soc/tegra/fuse/fuse.h @@ -65,7 +65,7 @@ struct tegra_fuse { void tegra_init_revision(void); void tegra_init_apbmisc(void); -bool __init tegra_fuse_read_spare(unsigned int spare); +u32 __init tegra_fuse_read_spare(unsigned int spare); u32 __init tegra_fuse_read_early(unsigned int offset); u8 tegra_get_major_rev(void); From f08adf5add9a071160c68bb2a61d697f39ab0758 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Dec 2021 19:46:21 +0100 Subject: [PATCH 191/361] USB: gadget: bRequestType is a bitfield, not a enum Szymon rightly pointed out that the previous check for the endpoint direction in bRequestType was not looking at only the bit involved, but rather the whole value. Normally this is ok, but for some request types, bits other than bit 8 could be set and the check for the endpoint length could not stall correctly. Fix that up by only checking the single bit. Fixes: 153a2d7e3350 ("USB: gadget: detect too-big endpoint 0 requests") Cc: Felipe Balbi Reported-by: Szymon Heidrich Link: https://lore.kernel.org/r/20211214184621.385828-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 6 +++--- drivers/usb/gadget/legacy/dbgp.c | 6 +++--- drivers/usb/gadget/legacy/inode.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 284eea9f6e4d8..3789c329183ca 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1680,14 +1680,14 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u8 endp; if (w_length > USB_COMP_EP0_BUFSIZ) { - if (ctrl->bRequestType == USB_DIR_OUT) { - goto done; - } else { + if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ); w_length = USB_COMP_EP0_BUFSIZ; + } else { + goto done; } } diff --git a/drivers/usb/gadget/legacy/dbgp.c b/drivers/usb/gadget/legacy/dbgp.c index 355bc7dab9d5f..6bcbad3825802 100644 --- a/drivers/usb/gadget/legacy/dbgp.c +++ b/drivers/usb/gadget/legacy/dbgp.c @@ -346,14 +346,14 @@ static int dbgp_setup(struct usb_gadget *gadget, u16 len = 0; if (length > DBGP_REQ_LEN) { - if (ctrl->bRequestType == USB_DIR_OUT) { - return err; - } else { + if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(DBGP_REQ_LEN); length = DBGP_REQ_LEN; + } else { + return err; } } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 63150e3889efb..3b58f4fc0a806 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1334,14 +1334,14 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u16 w_length = le16_to_cpu(ctrl->wLength); if (w_length > RBUF_SIZE) { - if (ctrl->bRequestType == USB_DIR_OUT) { - return value; - } else { + if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(RBUF_SIZE); w_length = RBUF_SIZE; + } else { + return value; } } From fac6bf87c55f7f0733efb0375565fb6a50cf2caf Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Tue, 7 Dec 2021 13:45:10 +0100 Subject: [PATCH 192/361] usb: dwc2: fix STM ID/VBUS detection startup delay in dwc2_driver_probe When activate_stm_id_vb_detection is enabled, ID and Vbus detection relies on sensing comparators. This detection needs time to stabilize. A delay was already applied in dwc2_resume() when reactivating the detection, but it wasn't done in dwc2_probe(). This patch adds delay after enabling STM ID/VBUS detection. Then, ID state is good when initializing gadget and host, and avoid to get a wrong Connector ID Status Change interrupt. Fixes: a415083a11cc ("usb: dwc2: add support for STM32MP15 SoCs USB OTG HS and FS") Cc: stable Acked-by: Minas Harutyunyan Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20211207124510.268841-1-amelie.delaunay@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/platform.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index c8f18f3ba9e35..c331a5128c2c0 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -575,6 +575,9 @@ static int dwc2_driver_probe(struct platform_device *dev) ggpio |= GGPIO_STM32_OTG_GCCFG_IDEN; ggpio |= GGPIO_STM32_OTG_GCCFG_VBDEN; dwc2_writel(hsotg, ggpio, GGPIO); + + /* ID/VBUS detection startup time */ + usleep_range(5000, 7000); } retval = dwc2_drd_init(hsotg); From f4b3ee3c85551d2d343a3ba159304066523f730f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 9 Dec 2021 11:46:07 -0500 Subject: [PATCH 193/361] audit: improve robustness of the audit queue handling If the audit daemon were ever to get stuck in a stopped state the kernel's kauditd_thread() could get blocked attempting to send audit records to the userspace audit daemon. With the kernel thread blocked it is possible that the audit queue could grow unbounded as certain audit record generating events must be exempt from the queue limits else the system enter a deadlock state. This patch resolves this problem by lowering the kernel thread's socket sending timeout from MAX_SCHEDULE_TIMEOUT to HZ/10 and tweaks the kauditd_send_queue() function to better manage the various audit queues when connection problems occur between the kernel and the audit daemon. With this patch, the backlog may temporarily grow beyond the defined limits when the audit daemon is stopped and the system is under heavy audit pressure, but kauditd_thread() will continue to make progress and drain the queues as it would for other connection problems. For example, with the audit daemon put into a stopped state and the system configured to audit every syscall it was still possible to shutdown the system without a kernel panic, deadlock, etc.; granted, the system was slow to shutdown but that is to be expected given the extreme pressure of recording every syscall. The timeout value of HZ/10 was chosen primarily through experimentation and this developer's "gut feeling". There is likely no one perfect value, but as this scenario is limited in scope (root privileges would be needed to send SIGSTOP to the audit daemon), it is likely not worth exposing this as a tunable at present. This can always be done at a later date if it proves necessary. Cc: stable@vger.kernel.org Fixes: 5b52330bbfe63 ("audit: fix auditd/kernel connection state tracking") Reported-by: Gaosheng Cui Tested-by: Gaosheng Cui Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/audit.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index 121d37e700a62..4cebadb5f30db 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -718,7 +718,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, { int rc = 0; struct sk_buff *skb; - static unsigned int failed = 0; + unsigned int failed = 0; /* NOTE: kauditd_thread takes care of all our locking, we just use * the netlink info passed to us (e.g. sk and portid) */ @@ -735,32 +735,30 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, continue; } +retry: /* grab an extra skb reference in case of error */ skb_get(skb); rc = netlink_unicast(sk, skb, portid, 0); if (rc < 0) { - /* fatal failure for our queue flush attempt? */ + /* send failed - try a few times unless fatal error */ if (++failed >= retry_limit || rc == -ECONNREFUSED || rc == -EPERM) { - /* yes - error processing for the queue */ sk = NULL; if (err_hook) (*err_hook)(skb); - if (!skb_hook) - goto out; - /* keep processing with the skb_hook */ + if (rc == -EAGAIN) + rc = 0; + /* continue to drain the queue */ continue; } else - /* no - requeue to preserve ordering */ - skb_queue_head(queue, skb); + goto retry; } else { - /* it worked - drop the extra reference and continue */ + /* skb sent - drop the extra reference and continue */ consume_skb(skb); failed = 0; } } -out: return (rc >= 0 ? 0 : rc); } @@ -1609,7 +1607,8 @@ static int __net_init audit_net_init(struct net *net) audit_panic("cannot initialize netlink socket in namespace"); return -ENOMEM; } - aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + /* limit the timeout in case auditd is blocked/stopped */ + aunet->sk->sk_sndtimeo = HZ / 10; return 0; } From 584af82154f56e6b2740160fcc84a2966d969e15 Mon Sep 17 00:00:00 2001 From: Karen Sornek Date: Tue, 31 Aug 2021 13:16:35 +0200 Subject: [PATCH 194/361] igb: Fix removal of unicast MAC filters of VFs Move checking condition of VF MAC filter before clearing or adding MAC filter to VF to prevent potential blackout caused by removal of necessary and working VF's MAC filter. Fixes: 1b8b062a99dc ("igb: add VF trust infrastructure") Signed-off-by: Karen Sornek Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 28 +++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fd54d3ef890bc..b597b8bfb9103 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7648,6 +7648,20 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, struct vf_mac_filter *entry = NULL; int ret = 0; + if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && + !vf_data->trusted) { + dev_warn(&pdev->dev, + "VF %d requested MAC filter but is administratively denied\n", + vf); + return -EINVAL; + } + if (!is_valid_ether_addr(addr)) { + dev_warn(&pdev->dev, + "VF %d attempted to set invalid MAC filter\n", + vf); + return -EINVAL; + } + switch (info) { case E1000_VF_MAC_FILTER_CLR: /* remove all unicast MAC filters related to the current VF */ @@ -7661,20 +7675,6 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, } break; case E1000_VF_MAC_FILTER_ADD: - if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && - !vf_data->trusted) { - dev_warn(&pdev->dev, - "VF %d requested MAC filter but is administratively denied\n", - vf); - return -EINVAL; - } - if (!is_valid_ether_addr(addr)) { - dev_warn(&pdev->dev, - "VF %d attempted to set invalid MAC filter\n", - vf); - return -EINVAL; - } - /* try to find empty slot in the list */ list_for_each(pos, &adapter->vf_macs.l) { entry = list_entry(pos, struct vf_mac_filter, l); From b6d335a60dc624c0d279333b22c737faa765b028 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Sat, 13 Nov 2021 11:42:34 +0800 Subject: [PATCH 195/361] igbvf: fix double free in `igbvf_probe` In `igbvf_probe`, if register_netdev() fails, the program will go to label err_hw_init, and then to label err_ioremap. In free_netdev() which is just below label err_ioremap, there is `list_for_each_entry_safe` and `netif_napi_del` which aims to delete all entries in `dev->napi_list`. The program has added an entry `adapter->rx_ring->napi` which is added by `netif_napi_add` in igbvf_alloc_queues(). However, adapter->rx_ring has been freed below label err_hw_init. So this a UAF. In terms of how to patch the problem, we can refer to igbvf_remove() and delete the entry before `adapter->rx_ring`. The KASAN logs are as follows: [ 35.126075] BUG: KASAN: use-after-free in free_netdev+0x1fd/0x450 [ 35.127170] Read of size 8 at addr ffff88810126d990 by task modprobe/366 [ 35.128360] [ 35.128643] CPU: 1 PID: 366 Comm: modprobe Not tainted 5.15.0-rc2+ #14 [ 35.129789] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [ 35.131749] Call Trace: [ 35.132199] dump_stack_lvl+0x59/0x7b [ 35.132865] print_address_description+0x7c/0x3b0 [ 35.133707] ? free_netdev+0x1fd/0x450 [ 35.134378] __kasan_report+0x160/0x1c0 [ 35.135063] ? free_netdev+0x1fd/0x450 [ 35.135738] kasan_report+0x4b/0x70 [ 35.136367] free_netdev+0x1fd/0x450 [ 35.137006] igbvf_probe+0x121d/0x1a10 [igbvf] [ 35.137808] ? igbvf_vlan_rx_add_vid+0x100/0x100 [igbvf] [ 35.138751] local_pci_probe+0x13c/0x1f0 [ 35.139461] pci_device_probe+0x37e/0x6c0 [ 35.165526] [ 35.165806] Allocated by task 366: [ 35.166414] ____kasan_kmalloc+0xc4/0xf0 [ 35.167117] foo_kmem_cache_alloc_trace+0x3c/0x50 [igbvf] [ 35.168078] igbvf_probe+0x9c5/0x1a10 [igbvf] [ 35.168866] local_pci_probe+0x13c/0x1f0 [ 35.169565] pci_device_probe+0x37e/0x6c0 [ 35.179713] [ 35.179993] Freed by task 366: [ 35.180539] kasan_set_track+0x4c/0x80 [ 35.181211] kasan_set_free_info+0x1f/0x40 [ 35.181942] ____kasan_slab_free+0x103/0x140 [ 35.182703] kfree+0xe3/0x250 [ 35.183239] igbvf_probe+0x1173/0x1a10 [igbvf] [ 35.184040] local_pci_probe+0x13c/0x1f0 Fixes: d4e0fe01a38a0 (igbvf: add new driver to support 82576 virtual functions) Reported-by: Zheyu Ma Signed-off-by: Letu Ren Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igbvf/netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 74ccd622251a2..4d988da68394d 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2859,6 +2859,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_hw_init: + netif_napi_del(&adapter->rx_ring->napi); kfree(adapter->tx_ring); kfree(adapter->rx_ring); err_sw_init: From 0182d1f3fa640888a2ed7e3f6df2fdb10adee7c8 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 2 Nov 2021 09:20:06 +0200 Subject: [PATCH 196/361] igc: Fix typo in i225 LTR functions The LTR maximum value was incorrectly written using the scale from the LTR minimum value. This would cause incorrect values to be sent, in cases where the initial calculation lead to different min/max scales. Fixes: 707abf069548 ("igc: Add initial LTR support") Suggested-by: Dima Ruinskiy Signed-off-by: Sasha Neftin Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_i225.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index b2ef9fde97b38..b6807e16eea93 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -636,7 +636,7 @@ s32 igc_set_ltr_i225(struct igc_hw *hw, bool link) ltrv = rd32(IGC_LTRMAXV); if (ltr_max != (ltrv & IGC_LTRMAXV_LTRV_MASK)) { ltrv = IGC_LTRMAXV_LSNP_REQ | ltr_max | - (scale_min << IGC_LTRMAXV_SCALE_SHIFT); + (scale_max << IGC_LTRMAXV_SCALE_SHIFT); wr32(IGC_LTRMAXV, ltrv); } } From 271225fd57c2f1e0b3f8826df51be6c634affefe Mon Sep 17 00:00:00 2001 From: Robert Schlabbach Date: Tue, 26 Oct 2021 02:24:48 +0200 Subject: [PATCH 197/361] ixgbe: Document how to enable NBASE-T support Commit a296d665eae1 ("ixgbe: Add ethtool support to enable 2.5 and 5.0 Gbps support") introduced suppression of the advertisement of NBASE-T speeds by default, according to Todd Fujinaka to accommodate customers with network switches which could not cope with advertised NBASE-T speeds, as posted in the E1000-devel mailing list: https://sourceforge.net/p/e1000/mailman/message/37106269/ However, the suppression was not documented at all, nor was how to enable NBASE-T support. Properly document the NBASE-T suppression and how to enable NBASE-T support. Fixes: a296d665eae1 ("ixgbe: Add ethtool support to enable 2.5 and 5.0 Gbps support") Reported-by: Robert Schlabbach Signed-off-by: Robert Schlabbach Signed-off-by: Tony Nguyen --- .../device_drivers/ethernet/intel/ixgbe.rst | 16 ++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst index f1d5233e5e510..0a233b17c664e 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst @@ -440,6 +440,22 @@ NOTE: For 82599-based network connections, if you are enabling jumbo frames in a virtual function (VF), jumbo frames must first be enabled in the physical function (PF). The VF MTU setting cannot be larger than the PF MTU. +NBASE-T Support +--------------- +The ixgbe driver supports NBASE-T on some devices. However, the advertisement +of NBASE-T speeds is suppressed by default, to accommodate broken network +switches which cannot cope with advertised NBASE-T speeds. Use the ethtool +command to enable advertising NBASE-T speeds on devices which support it:: + + ethtool -s eth? advertise 0x1800000001028 + +On Linux systems with INTERFACES(5), this can be specified as a pre-up command +in /etc/network/interfaces so that the interface is always brought up with +NBASE-T support, e.g.:: + + iface eth? inet dhcp + pre-up ethtool -s eth? advertise 0x1800000001028 || true + Generic Receive Offload, aka GRO -------------------------------- The driver supports the in-kernel software implementation of GRO. GRO has diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0f9f022260d70..45e2ec4d264d9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5531,6 +5531,10 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) if (!speed && hw->mac.ops.get_link_capabilities) { ret = hw->mac.ops.get_link_capabilities(hw, &speed, &autoneg); + /* remove NBASE-T speeds from default autonegotiation + * to accommodate broken network switches in the field + * which cannot cope with advertised NBASE-T speeds + */ speed &= ~(IXGBE_LINK_SPEED_5GB_FULL | IXGBE_LINK_SPEED_2_5GB_FULL); } From 1cef171abd39102dcc862c6bfbf7f954f4f1f66f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 15 Dec 2021 12:31:51 -0500 Subject: [PATCH 198/361] dm integrity: fix data corruption due to improper use of bvec_kmap_local Commit 25058d1c725c ("dm integrity: use bvec_kmap_local in __journal_read_write") didn't account for __journal_read_write() later adding the biovec's bv_offset. As such using bvec_kmap_local() caused the start of the biovec to be skipped. Trivial test that illustrates data corruption: # integritysetup format /dev/pmem0 # integritysetup open /dev/pmem0 integrityroot # mkfs.xfs /dev/mapper/integrityroot ... bad magic number bad magic number Metadata corruption detected at xfs_sb block 0x0/0x1000 libxfs_writebufr: write verifer failed on xfs_sb bno 0x0/0x1000 releasing dirty buffer (bulk) to free list! Fix this by using kmap_local_page() instead of bvec_kmap_local() in __journal_read_write(). Fixes: 25058d1c725c ("dm integrity: use bvec_kmap_local in __journal_read_write") Reported-by: Tony Asleson Reviewed-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 6319deccbe09e..7af242de3202e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1963,7 +1963,7 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, n_sectors -= bv.bv_len >> SECTOR_SHIFT; bio_advance_iter(bio, &bio->bi_iter, bv.bv_len); retry_kmap: - mem = bvec_kmap_local(&bv); + mem = kmap_local_page(bv.bv_page); if (likely(dio->op == REQ_OP_WRITE)) flush_dcache_page(bv.bv_page); From bf0a375055bd1afbbf02a0ef45f7655da7b71317 Mon Sep 17 00:00:00 2001 From: Cyril Novikov Date: Mon, 1 Nov 2021 18:39:36 -0700 Subject: [PATCH 199/361] ixgbe: set X550 MDIO speed before talking to PHY The MDIO bus speed must be initialized before talking to the PHY the first time in order to avoid talking to it using a speed that the PHY doesn't support. This fixes HW initialization error -17 (IXGBE_ERR_PHY_ADDR_INVALID) on Denverton CPUs (a.k.a. the Atom C3000 family) on ports with a 10Gb network plugged in. On those devices, HLREG0[MDCSPD] resets to 1, which combined with the 10Gb network results in a 24MHz MDIO speed, which is apparently too fast for the connected PHY. PHY register reads over MDIO bus return garbage, leading to initialization failure. Reproduced with Linux kernel 4.19 and 5.15-rc7. Can be reproduced using the following setup: * Use an Atom C3000 family system with at least one X552 LAN on the SoC * Disable PXE or other BIOS network initialization if possible (the interface must not be initialized before Linux boots) * Connect a live 10Gb Ethernet cable to an X550 port * Power cycle (not reset, doesn't always work) the system and boot Linux * Observe: ixgbe interfaces w/ 10GbE cables plugged in fail with error -17 Fixes: e84db7272798 ("ixgbe: Introduce function to control MDIO speed") Signed-off-by: Cyril Novikov Reviewed-by: Andrew Lunn Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 9724ffb165189..e4b50c7781ffa 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3405,6 +3405,9 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* flush pending Tx transactions */ ixgbe_clear_tx_pending(hw); + /* set MDIO speed before talking to the PHY in case it's the 1st time */ + ixgbe_set_mdio_speed(hw); + /* PHY ops must be identified and initialized prior to reset */ status = hw->phy.ops.init(hw); if (status == IXGBE_ERR_SFP_NOT_SUPPORTED || From 1ee33b1ca2b8dabfcc17198ffd049a6b55674a86 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 15 Dec 2021 20:52:40 +0900 Subject: [PATCH 200/361] tty: n_hdlc: make n_hdlc_tty_wakeup() asynchronous syzbot is reporting that an unprivileged user who logged in from tty console can crash the system using a reproducer shown below [1], for n_hdlc_tty_wakeup() is synchronously calling n_hdlc_send_frames(). ---------- #include #include int main(int argc, char *argv[]) { const int disc = 0xd; ioctl(1, TIOCSETD, &disc); while (1) { ioctl(1, TCXONC, 0); write(1, "", 1); ioctl(1, TCXONC, 1); /* Kernel panic - not syncing: scheduling while atomic */ } } ---------- Linus suspected that "struct tty_ldisc"->ops->write_wakeup() must not sleep, and Jiri confirmed it from include/linux/tty_ldisc.h. Thus, defer n_hdlc_send_frames() from n_hdlc_tty_wakeup() to a WQ context like net/nfc/nci/uart.c does. Link: https://syzkaller.appspot.com/bug?extid=5f47a8cea6a12b77a876 [1] Reported-by: syzbot Cc: stable Analyzed-by: Fabio M. De Francesco Suggested-by: Linus Torvalds Confirmed-by: Jiri Slaby Reviewed-by: Fabio M. De Francesco Signed-off-by: Tetsuo Handa Link: https://lore.kernel.org/r/40de8b7e-a3be-4486-4e33-1b1d1da452f8@i-love.sakura.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_hdlc.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 7e0884ecc74f5..23ba1fc99df8b 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -140,6 +140,8 @@ struct n_hdlc { struct n_hdlc_buf_list rx_buf_list; struct n_hdlc_buf_list tx_free_buf_list; struct n_hdlc_buf_list rx_free_buf_list; + struct work_struct write_work; + struct tty_struct *tty_for_write_work; }; /* @@ -154,6 +156,7 @@ static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list); /* Local functions */ static struct n_hdlc *n_hdlc_alloc(void); +static void n_hdlc_tty_write_work(struct work_struct *work); /* max frame size for memory allocations */ static int maxframe = 4096; @@ -210,6 +213,8 @@ static void n_hdlc_tty_close(struct tty_struct *tty) wake_up_interruptible(&tty->read_wait); wake_up_interruptible(&tty->write_wait); + cancel_work_sync(&n_hdlc->write_work); + n_hdlc_free_buf_list(&n_hdlc->rx_free_buf_list); n_hdlc_free_buf_list(&n_hdlc->tx_free_buf_list); n_hdlc_free_buf_list(&n_hdlc->rx_buf_list); @@ -241,6 +246,8 @@ static int n_hdlc_tty_open(struct tty_struct *tty) return -ENFILE; } + INIT_WORK(&n_hdlc->write_work, n_hdlc_tty_write_work); + n_hdlc->tty_for_write_work = tty; tty->disc_data = n_hdlc; tty->receive_room = 65536; @@ -334,6 +341,20 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) goto check_again; } /* end of n_hdlc_send_frames() */ +/** + * n_hdlc_tty_write_work - Asynchronous callback for transmit wakeup + * @work: pointer to work_struct + * + * Called when low level device driver can accept more send data. + */ +static void n_hdlc_tty_write_work(struct work_struct *work) +{ + struct n_hdlc *n_hdlc = container_of(work, struct n_hdlc, write_work); + struct tty_struct *tty = n_hdlc->tty_for_write_work; + + n_hdlc_send_frames(n_hdlc, tty); +} /* end of n_hdlc_tty_write_work() */ + /** * n_hdlc_tty_wakeup - Callback for transmit wakeup * @tty: pointer to associated tty instance data @@ -344,7 +365,7 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty->disc_data; - n_hdlc_send_frames(n_hdlc, tty); + schedule_work(&n_hdlc->write_work); } /* end of n_hdlc_tty_wakeup() */ /** From 6c33ff728812aa18792afffaf2c9873b898e7512 Mon Sep 17 00:00:00 2001 From: "Ji-Ze Hong (Peter Hong)" Date: Wed, 15 Dec 2021 15:58:35 +0800 Subject: [PATCH 201/361] serial: 8250_fintek: Fix garbled text for console Commit fab8a02b73eb ("serial: 8250_fintek: Enable high speed mode on Fintek F81866") introduced support to use high baudrate with Fintek SuperIO UARTs. It'll change clocksources when the UART probed. But when user add kernel parameter "console=ttyS0,115200 console=tty0" to make the UART as console output, the console will output garbled text after the following kernel message. [ 3.681188] Serial: 8250/16550 driver, 32 ports, IRQ sharing enabled The issue is occurs in following step: probe_setup_port() -> fintek_8250_goto_highspeed() It change clocksource from 115200 to 921600 with wrong time, it should change clocksource in set_termios() not in probed. The following 3 patches are implemented change clocksource in fintek_8250_set_termios(). Commit 58178914ae5b ("serial: 8250_fintek: UART dynamic clocksource on Fintek F81216H") Commit 195638b6d44f ("serial: 8250_fintek: UART dynamic clocksource on Fintek F81866") Commit 423d9118c624 ("serial: 8250_fintek: Add F81966 Support") Due to the high baud rate had implemented above 3 patches and the patch Commit fab8a02b73eb ("serial: 8250_fintek: Enable high speed mode on Fintek F81866") is bugged, So this patch will remove it. Fixes: fab8a02b73eb ("serial: 8250_fintek: Enable high speed mode on Fintek F81866") Signed-off-by: Ji-Ze Hong (Peter Hong) Link: https://lore.kernel.org/r/20211215075835.2072-1-hpeter+linux_kernel@gmail.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_fintek.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index 31c9e83ea3cb2..251f0018ae8ca 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -290,25 +290,6 @@ static void fintek_8250_set_max_fifo(struct fintek_8250 *pdata) } } -static void fintek_8250_goto_highspeed(struct uart_8250_port *uart, - struct fintek_8250 *pdata) -{ - sio_write_reg(pdata, LDN, pdata->index); - - switch (pdata->pid) { - case CHIP_ID_F81966: - case CHIP_ID_F81866: /* set uart clock for high speed serial mode */ - sio_write_mask_reg(pdata, F81866_UART_CLK, - F81866_UART_CLK_MASK, - F81866_UART_CLK_14_769MHZ); - - uart->port.uartclk = 921600 * 16; - break; - default: /* leave clock speed untouched */ - break; - } -} - static void fintek_8250_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old) @@ -430,7 +411,6 @@ static int probe_setup_port(struct fintek_8250 *pdata, fintek_8250_set_irq_mode(pdata, level_mode); fintek_8250_set_max_fifo(pdata); - fintek_8250_goto_highspeed(uart, pdata); fintek_8250_exit_key(addr[i]); From f886d4fbb7c97b8f5f447c92d2dab99c841803c0 Mon Sep 17 00:00:00 2001 From: Nehal Bakulchandra Shah Date: Wed, 15 Dec 2021 15:02:16 +0530 Subject: [PATCH 202/361] usb: xhci: Extend support for runtime power management for AMD's Yellow carp. AMD's Yellow Carp platform has few more XHCI controllers, enable the runtime power management support for the same. Signed-off-by: Nehal Bakulchandra Shah Cc: stable Link: https://lore.kernel.org/r/20211215093216.1839065-1-Nehal-Bakulchandra.shah@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 92adf61078644..3af0178832311 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -71,6 +71,8 @@ #define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 0x161e #define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 0x15d6 #define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 0x15d7 +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 0x161c +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8 0x161f #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 @@ -330,7 +332,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 || pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 || pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 || - pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6)) + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (xhci->quirks & XHCI_RESET_ON_RESUME) From 0ad3bd562bb91853b9f42bda145b5db6255aee90 Mon Sep 17 00:00:00 2001 From: Jimmy Wang Date: Tue, 14 Dec 2021 09:26:50 +0800 Subject: [PATCH 203/361] USB: NO_LPM quirk Lenovo USB-C to Ethernet Adapher(RTL8153-04) This device doesn't work well with LPM, losing connectivity intermittently. Disable LPM to resolve the issue. Reviewed-by: Signed-off-by: Jimmy Wang Cc: stable Link: https://lore.kernel.org/r/20211214012652.4898-1-wangjm221@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 019351c0b52cf..d3c14b5ed4a1f 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -434,6 +434,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo USB-C to Ethernet Adapter RTL8153-04 */ + { USB_DEVICE(0x17ef, 0x720c), .driver_info = USB_QUIRK_NO_LPM }, + /* Lenovo Powered USB-C Travel Hub (4X90S92381, RTL8153 GigE) */ { USB_DEVICE(0x17ef, 0x721e), .driver_info = USB_QUIRK_NO_LPM }, From 4c4e162d9cf38528c4f13df09d5755cbc06f6c77 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 14 Dec 2021 05:55:27 +0100 Subject: [PATCH 204/361] usb: cdnsp: Fix lack of spin_lock_irqsave/spin_lock_restore Patch puts content of cdnsp_gadget_pullup function inside spin_lock_irqsave and spin_lock_restore section. This construction is required here to keep the data consistency, otherwise some data can be changed e.g. from interrupt context. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Reported-by: Ken (Jian) He cc: Signed-off-by: Pawel Laszczak -- Changelog: v2: - added disable_irq/enable_irq as sugester by Peter Chen drivers/usb/cdns3/cdnsp-gadget.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20211214045527.26823-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 27df0c6978978..e85bf768c66da 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1541,15 +1541,27 @@ static int cdnsp_gadget_pullup(struct usb_gadget *gadget, int is_on) { struct cdnsp_device *pdev = gadget_to_cdnsp(gadget); struct cdns *cdns = dev_get_drvdata(pdev->dev); + unsigned long flags; trace_cdnsp_pullup(is_on); + /* + * Disable events handling while controller is being + * enabled/disabled. + */ + disable_irq(cdns->dev_irq); + spin_lock_irqsave(&pdev->lock, flags); + if (!is_on) { cdnsp_reset_device(pdev); cdns_clear_vbus(cdns); } else { cdns_set_vbus(cdns); } + + spin_unlock_irqrestore(&pdev->lock, flags); + enable_irq(cdns->dev_irq); + return 0; } From 0f7d9b31ce7abdbb29bf018131ac920c9f698518 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Dec 2021 05:45:44 -0800 Subject: [PATCH 205/361] netfilter: nf_tables: fix use-after-free in nft_set_catchall_destroy() We need to use list_for_each_entry_safe() iterator because we can not access @catchall after kfree_rcu() call. syzbot reported: BUG: KASAN: use-after-free in nft_set_catchall_destroy net/netfilter/nf_tables_api.c:4486 [inline] BUG: KASAN: use-after-free in nft_set_destroy net/netfilter/nf_tables_api.c:4504 [inline] BUG: KASAN: use-after-free in nft_set_destroy+0x3fd/0x4f0 net/netfilter/nf_tables_api.c:4493 Read of size 8 at addr ffff8880716e5b80 by task syz-executor.3/8871 CPU: 1 PID: 8871 Comm: syz-executor.3 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0x8d/0x2ed mm/kasan/report.c:247 __kasan_report mm/kasan/report.c:433 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:450 nft_set_catchall_destroy net/netfilter/nf_tables_api.c:4486 [inline] nft_set_destroy net/netfilter/nf_tables_api.c:4504 [inline] nft_set_destroy+0x3fd/0x4f0 net/netfilter/nf_tables_api.c:4493 __nft_release_table+0x79f/0xcd0 net/netfilter/nf_tables_api.c:9626 nft_rcv_nl_event+0x4f8/0x670 net/netfilter/nf_tables_api.c:9688 notifier_call_chain+0xb5/0x200 kernel/notifier.c:83 blocking_notifier_call_chain kernel/notifier.c:318 [inline] blocking_notifier_call_chain+0x67/0x90 kernel/notifier.c:306 netlink_release+0xcb6/0x1dd0 net/netlink/af_netlink.c:788 __sock_release+0xcd/0x280 net/socket.c:649 sock_close+0x18/0x20 net/socket.c:1314 __fput+0x286/0x9f0 fs/file_table.c:280 task_work_run+0xdd/0x1a0 kernel/task_work.c:164 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop kernel/entry/common.c:175 [inline] exit_to_user_mode_prepare+0x27e/0x290 kernel/entry/common.c:207 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300 do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f75fbf28adb Code: 0f 05 48 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8 63 fc ff ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 fc ff ff 8b 44 RSP: 002b:00007ffd8da7ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f75fbf28adb RDX: 00007f75fc08e828 RSI: ffffffffffffffff RDI: 0000000000000003 RBP: 00007f75fc08a960 R08: 0000000000000000 R09: 00007f75fc08e830 R10: 00007ffd8da7ed10 R11: 0000000000000293 R12: 00000000002067c3 R13: 00007ffd8da7ed10 R14: 00007f75fc088f60 R15: 0000000000000032 Allocated by task 8886: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] ____kasan_kmalloc mm/kasan/common.c:513 [inline] ____kasan_kmalloc mm/kasan/common.c:472 [inline] __kasan_kmalloc+0xa6/0xd0 mm/kasan/common.c:522 kasan_kmalloc include/linux/kasan.h:269 [inline] kmem_cache_alloc_trace+0x1ea/0x4a0 mm/slab.c:3575 kmalloc include/linux/slab.h:590 [inline] nft_setelem_catchall_insert net/netfilter/nf_tables_api.c:5544 [inline] nft_setelem_insert net/netfilter/nf_tables_api.c:5562 [inline] nft_add_set_elem+0x232e/0x2f40 net/netfilter/nf_tables_api.c:5936 nf_tables_newsetelem+0x6ff/0xbb0 net/netfilter/nf_tables_api.c:6032 nfnetlink_rcv_batch+0x1710/0x25f0 net/netfilter/nfnetlink.c:513 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:652 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x904/0xdf0 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2409 ___sys_sendmsg+0xf3/0x170 net/socket.c:2463 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2492 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 15335: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free mm/kasan/common.c:328 [inline] __kasan_slab_free+0xd1/0x110 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:235 [inline] __cache_free mm/slab.c:3445 [inline] kmem_cache_free_bulk+0x67/0x1e0 mm/slab.c:3766 kfree_bulk include/linux/slab.h:446 [inline] kfree_rcu_work+0x51c/0xa10 kernel/rcu/tree.c:3273 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 worker_thread+0x658/0x11f0 kernel/workqueue.c:2445 kthread+0x405/0x4f0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Last potentially related work creation: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 __kasan_record_aux_stack+0xb5/0xe0 mm/kasan/generic.c:348 kvfree_call_rcu+0x74/0x990 kernel/rcu/tree.c:3550 nft_set_catchall_destroy net/netfilter/nf_tables_api.c:4489 [inline] nft_set_destroy net/netfilter/nf_tables_api.c:4504 [inline] nft_set_destroy+0x34a/0x4f0 net/netfilter/nf_tables_api.c:4493 __nft_release_table+0x79f/0xcd0 net/netfilter/nf_tables_api.c:9626 nft_rcv_nl_event+0x4f8/0x670 net/netfilter/nf_tables_api.c:9688 notifier_call_chain+0xb5/0x200 kernel/notifier.c:83 blocking_notifier_call_chain kernel/notifier.c:318 [inline] blocking_notifier_call_chain+0x67/0x90 kernel/notifier.c:306 netlink_release+0xcb6/0x1dd0 net/netlink/af_netlink.c:788 __sock_release+0xcd/0x280 net/socket.c:649 sock_close+0x18/0x20 net/socket.c:1314 __fput+0x286/0x9f0 fs/file_table.c:280 task_work_run+0xdd/0x1a0 kernel/task_work.c:164 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop kernel/entry/common.c:175 [inline] exit_to_user_mode_prepare+0x27e/0x290 kernel/entry/common.c:207 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300 do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff8880716e5b80 which belongs to the cache kmalloc-64 of size 64 The buggy address is located 0 bytes inside of 64-byte region [ffff8880716e5b80, ffff8880716e5bc0) The buggy address belongs to the page: page:ffffea0001c5b940 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff8880716e5c00 pfn:0x716e5 flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 ffffea0000911848 ffffea00007c4d48 ffff888010c40200 raw: ffff8880716e5c00 ffff8880716e5000 000000010000001e 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x242040(__GFP_IO|__GFP_NOWARN|__GFP_COMP|__GFP_THISNODE), pid 3638, ts 211086074437, free_ts 211031029429 prep_new_page mm/page_alloc.c:2418 [inline] get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4149 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5369 __alloc_pages_node include/linux/gfp.h:570 [inline] kmem_getpages mm/slab.c:1377 [inline] cache_grow_begin+0x75/0x470 mm/slab.c:2593 cache_alloc_refill+0x27f/0x380 mm/slab.c:2965 ____cache_alloc mm/slab.c:3048 [inline] ____cache_alloc mm/slab.c:3031 [inline] __do_cache_alloc mm/slab.c:3275 [inline] slab_alloc mm/slab.c:3316 [inline] __do_kmalloc mm/slab.c:3700 [inline] __kmalloc+0x3b3/0x4d0 mm/slab.c:3711 kmalloc include/linux/slab.h:595 [inline] kzalloc include/linux/slab.h:724 [inline] tomoyo_get_name+0x234/0x480 security/tomoyo/memory.c:173 tomoyo_parse_name_union+0xbc/0x160 security/tomoyo/util.c:260 tomoyo_update_path_number_acl security/tomoyo/file.c:687 [inline] tomoyo_write_file+0x629/0x7f0 security/tomoyo/file.c:1034 tomoyo_write_domain2+0x116/0x1d0 security/tomoyo/common.c:1152 tomoyo_add_entry security/tomoyo/common.c:2042 [inline] tomoyo_supervisor+0xbc7/0xf00 security/tomoyo/common.c:2103 tomoyo_audit_path_number_log security/tomoyo/file.c:235 [inline] tomoyo_path_number_perm+0x419/0x590 security/tomoyo/file.c:734 security_file_ioctl+0x50/0xb0 security/security.c:1541 __do_sys_ioctl fs/ioctl.c:868 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0xb3/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1338 [inline] free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1389 free_unref_page_prepare mm/page_alloc.c:3309 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3388 slab_destroy mm/slab.c:1627 [inline] slabs_destroy+0x89/0xc0 mm/slab.c:1647 cache_flusharray mm/slab.c:3418 [inline] ___cache_free+0x4cc/0x610 mm/slab.c:3480 qlink_free mm/kasan/quarantine.c:146 [inline] qlist_free_all+0x4e/0x110 mm/kasan/quarantine.c:165 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:272 __kasan_slab_alloc+0x97/0xb0 mm/kasan/common.c:444 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slab.c:3261 [inline] kmem_cache_alloc_node+0x2ea/0x590 mm/slab.c:3599 __alloc_skb+0x215/0x340 net/core/skbuff.c:414 alloc_skb include/linux/skbuff.h:1126 [inline] nlmsg_new include/net/netlink.h:953 [inline] rtmsg_ifinfo_build_skb+0x72/0x1a0 net/core/rtnetlink.c:3808 rtmsg_ifinfo_event net/core/rtnetlink.c:3844 [inline] rtmsg_ifinfo_event net/core/rtnetlink.c:3835 [inline] rtmsg_ifinfo+0x83/0x120 net/core/rtnetlink.c:3853 netdev_state_change net/core/dev.c:1395 [inline] netdev_state_change+0x114/0x130 net/core/dev.c:1386 linkwatch_do_dev+0x10e/0x150 net/core/link_watch.c:167 __linkwatch_run_queue+0x233/0x6a0 net/core/link_watch.c:213 linkwatch_event+0x4a/0x60 net/core/link_watch.c:252 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 Memory state around the buggy address: ffff8880716e5a80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8880716e5b00: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc >ffff8880716e5b80: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ^ ffff8880716e5c00: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8880716e5c80: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc Fixes: aaa31047a6d2 ("netfilter: nftables: add catch-all set element support") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c0851fec11d46..c207728226372 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4481,9 +4481,9 @@ struct nft_set_elem_catchall { static void nft_set_catchall_destroy(const struct nft_ctx *ctx, struct nft_set *set) { - struct nft_set_elem_catchall *catchall; + struct nft_set_elem_catchall *next, *catchall; - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { list_del_rcu(&catchall->list); nft_set_elem_destroy(set, catchall->elem, true); kfree_rcu(catchall); From ca4d8344a72b91fb9d4c8bfbc22204b4c09c5d8f Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 9 Dec 2021 18:15:07 +0800 Subject: [PATCH 206/361] usb: typec: tcpm: fix tcpm unregister port but leave a pending timer In current design, when the tcpm port is unregisterd, the kthread_worker will be destroyed in the last step. Inside the kthread_destroy_worker(), the worker will flush all the works and wait for them to end. However, if one of the works calls hrtimer_start(), this hrtimer will be pending until timeout even though tcpm port is removed. Once the hrtimer timeout, many strange kernel dumps appear. Thus, we can first complete kthread_destroy_worker(), then cancel all the hrtimers. This will guarantee that no hrtimer is pending at the end. Fixes: 3ed8e1c2ac99 ("usb: typec: tcpm: Migrate workqueue to RT priority for processing events") cc: Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20211209101507.499096-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 6010b99011261..59d4fa2443f2b 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -324,6 +324,7 @@ struct tcpm_port { bool attached; bool connected; + bool registered; bool pd_supported; enum typec_port_type port_type; @@ -6291,7 +6292,8 @@ static enum hrtimer_restart state_machine_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, state_machine_timer); - kthread_queue_work(port->wq, &port->state_machine); + if (port->registered) + kthread_queue_work(port->wq, &port->state_machine); return HRTIMER_NORESTART; } @@ -6299,7 +6301,8 @@ static enum hrtimer_restart vdm_state_machine_timer_handler(struct hrtimer *time { struct tcpm_port *port = container_of(timer, struct tcpm_port, vdm_state_machine_timer); - kthread_queue_work(port->wq, &port->vdm_state_machine); + if (port->registered) + kthread_queue_work(port->wq, &port->vdm_state_machine); return HRTIMER_NORESTART; } @@ -6307,7 +6310,8 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, enable_frs_timer); - kthread_queue_work(port->wq, &port->enable_frs); + if (port->registered) + kthread_queue_work(port->wq, &port->enable_frs); return HRTIMER_NORESTART; } @@ -6315,7 +6319,8 @@ static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, send_discover_timer); - kthread_queue_work(port->wq, &port->send_discover_work); + if (port->registered) + kthread_queue_work(port->wq, &port->send_discover_work); return HRTIMER_NORESTART; } @@ -6403,6 +6408,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) typec_port_register_altmodes(port->typec_port, &tcpm_altmode_ops, port, port->port_altmode, ALTMODE_DISCOVERY_MAX); + port->registered = true; mutex_lock(&port->lock); tcpm_init(port); @@ -6424,6 +6430,9 @@ void tcpm_unregister_port(struct tcpm_port *port) { int i; + port->registered = false; + kthread_destroy_worker(port->wq); + hrtimer_cancel(&port->send_discover_timer); hrtimer_cancel(&port->enable_frs_timer); hrtimer_cancel(&port->vdm_state_machine_timer); @@ -6435,7 +6444,6 @@ void tcpm_unregister_port(struct tcpm_port *port) typec_unregister_port(port->typec_port); usb_role_switch_put(port->role_sw); tcpm_debugfs_exit(port); - kthread_destroy_worker(port->wq); } EXPORT_SYMBOL_GPL(tcpm_unregister_port); From ebb966d3bdfed581ecccbb4a7432341baf7619b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacy=20Gaw=C4=99dzki?= Date: Fri, 10 Dec 2021 16:31:27 +0100 Subject: [PATCH 207/361] netfilter: fix regression in looped (broad|multi)cast's MAC handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 5648b5e1169f ("netfilter: nfnetlink_queue: fix OOB when mac header was cleared"), the test for non-empty MAC header introduced in commit 2c38de4c1f8da7 ("netfilter: fix looped (broad|multi)cast's MAC handling") has been replaced with a test for a set MAC header. This breaks the case when the MAC header has been reset (using skb_reset_mac_header), as is the case with looped-back multicast packets. As a result, the packets ending up in NFQUEUE get a bogus hwaddr interpreted from the first bytes of the IP header. This patch adds a test for a non-empty MAC header in addition to the test for a set MAC header. The same two tests are also implemented in nfnetlink_log.c, where the initial code of commit 2c38de4c1f8da7 ("netfilter: fix looped (broad|multi)cast's MAC handling") has not been touched, but where supposedly the same situation may happen. Fixes: 5648b5e1169f ("netfilter: nfnetlink_queue: fix OOB when mac header was cleared") Signed-off-by: Ignacy Gawędzki Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 3 ++- net/netfilter/nfnetlink_queue.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 691ef4cffdd90..7f83f9697fc14 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -556,7 +556,8 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; if (indev && skb->dev && - skb->mac_header != skb->network_header) { + skb_mac_header_was_set(skb) && + skb_mac_header_len(skb) != 0) { struct nfulnl_msg_packet_hw phw; int len; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 5837e8efc9c20..f0b9e21a24524 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -560,7 +560,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nla_put_failure; if (indev && entskb->dev && - skb_mac_header_was_set(entskb)) { + skb_mac_header_was_set(entskb) && + skb_mac_header_len(entskb) != 0) { struct nfqnl_msg_packet_hw phw; int len; From 972ce7e3801e790bb348bfe98be1ab65af15bacd Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Wed, 15 Dec 2021 12:58:31 +0200 Subject: [PATCH 208/361] dpaa2-eth: fix ethtool statistics Unfortunately, with the blamed commit I also added a side effect in the ethtool stats shown. Because I added two more fields in the per channel structure without verifying if its size is used in any way, part of the ethtool statistics were off by 2. Fix this by not looking up the size of the structure but instead on a fixed value kept in a macro. Fixes: fc398bec0387 ("net: dpaa2: add adaptive interrupt coalescing") Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20211215105831.290070-1-ioana.ciornei@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 2 ++ drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 2085844227fe5..e54e70ebdd059 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -388,6 +388,8 @@ struct dpaa2_eth_ch_stats { __u64 bytes_per_cdan; }; +#define DPAA2_ETH_CH_STATS 7 + /* Maximum number of queues associated with a DPNI */ #define DPAA2_ETH_MAX_TCS 8 #define DPAA2_ETH_MAX_RX_QUEUES_PER_TC 16 diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index adb8ce5306ee8..3fdbf87dccb1e 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -278,7 +278,7 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev, /* Per-channel stats */ for (k = 0; k < priv->num_channels; k++) { ch_stats = &priv->channel[k]->stats; - for (j = 0; j < sizeof(*ch_stats) / sizeof(__u64) - 1; j++) + for (j = 0; j < DPAA2_ETH_CH_STATS; j++) *((__u64 *)data + i + j) += *((__u64 *)ch_stats + j); } i += j; From 481221775d53d6215a6e5e9ce1cce6d2b4ab9a46 Mon Sep 17 00:00:00 2001 From: Haimin Zhang Date: Wed, 15 Dec 2021 19:15:30 +0800 Subject: [PATCH 209/361] netdevsim: Zero-initialize memory for new map's value in function nsim_bpf_map_alloc Zero-initialize memory for new map's value in function nsim_bpf_map_alloc since it may cause a potential kernel information leak issue, as follows: 1. nsim_bpf_map_alloc calls nsim_map_alloc_elem to allocate elements for a new map. 2. nsim_map_alloc_elem uses kmalloc to allocate map's value, but doesn't zero it. 3. A user application can use IOCTL BPF_MAP_LOOKUP_ELEM to get specific element's information in the map. 4. The kernel function map_lookup_elem will call bpf_map_copy_value to get the information allocated at step-2, then use copy_to_user to copy to the user buffer. This can only leak information for an array map. Fixes: 395cacb5f1a0 ("netdevsim: bpf: support fake map offload") Suggested-by: Jakub Kicinski Acked-by: Jakub Kicinski Signed-off-by: Haimin Zhang Link: https://lore.kernel.org/r/20211215111530.72103-1-tcs.kernel@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 90aafb56f1409..a438202129323 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -514,6 +514,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) goto err_free; key = nmap->entry[i].key; *key = i; + memset(nmap->entry[i].value, 0, offmap->map.value_size); } } From ec6af094ea28f0f2dda1a6a33b14cd57e36a9755 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 15 Dec 2021 09:39:37 -0500 Subject: [PATCH 210/361] net/packet: rx_owner_map depends on pg_vec Packet sockets may switch ring versions. Avoid misinterpreting state between versions, whose fields share a union. rx_owner_map is only allocated with a packet ring (pg_vec) and both are swapped together. If pg_vec is NULL, meaning no packet ring was allocated, then neither was rx_owner_map. And the field may be old state from a tpacket_v3. Fixes: 61fad6816fc1 ("net/packet: tpacket_rcv: avoid a producer race condition") Reported-by: Syzbot Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20211215143937.106178-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 46943a18a10d5..76c2dca7f0a59 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4492,9 +4492,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } out_free_pg_vec: - bitmap_free(rx_owner_map); - if (pg_vec) + if (pg_vec) { + bitmap_free(rx_owner_map); free_pg_vec(pg_vec, order, req->tp_block_nr); + } out: return err; } From ef8a0f6eab1ca5d1a75c242c5c7b9d386735fa0a Mon Sep 17 00:00:00 2001 From: Greg Jesionowski Date: Tue, 14 Dec 2021 15:10:27 -0700 Subject: [PATCH 211/361] net: usb: lan78xx: add Allied Telesis AT29M2-AF This adds the vendor and product IDs for the AT29M2-AF which is a lan7801-based device. Signed-off-by: Greg Jesionowski Link: https://lore.kernel.org/r/20211214221027.305784-1-jesionowskigreg@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 8cd265fc1fd9d..075f8abde5cd7 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -76,6 +76,8 @@ #define LAN7801_USB_PRODUCT_ID (0x7801) #define LAN78XX_EEPROM_MAGIC (0x78A5) #define LAN78XX_OTP_MAGIC (0x78F3) +#define AT29M2AF_USB_VENDOR_ID (0x07C9) +#define AT29M2AF_USB_PRODUCT_ID (0x0012) #define MII_READ 1 #define MII_WRITE 0 @@ -4734,6 +4736,10 @@ static const struct usb_device_id products[] = { /* LAN7801 USB Gigabit Ethernet Device */ USB_DEVICE(LAN78XX_USB_VENDOR_ID, LAN7801_USB_PRODUCT_ID), }, + { + /* ATM2-AF USB Gigabit Ethernet Device */ + USB_DEVICE(AT29M2AF_USB_VENDOR_ID, AT29M2AF_USB_PRODUCT_ID), + }, {}, }; MODULE_DEVICE_TABLE(usb, products); From b67210cc217f9ca1c576909454d846970c13dfd4 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Wed, 15 Dec 2021 10:58:08 +0100 Subject: [PATCH 212/361] pinctrl: stm32: consider the GPIO offset to expose all the GPIO lines Consider the GPIO controller offset (from "gpio-ranges") to compute the maximum GPIO line number. This fixes an issue where gpio-ranges uses a non-null offset. e.g.: gpio-ranges = <&pinctrl 6 86 10> In that case the last valid GPIO line is not 9 but 15 (6 + 10 - 1) Cc: stable@vger.kernel.org Fixes: 67e2996f72c7 ("pinctrl: stm32: fix the reported number of GPIO lines per bank") Reported-by: Christoph Fritz Signed-off-by: Fabien Dessenne Link: https://lore.kernel.org/r/20211215095808.621716-1-fabien.dessenne@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 24764ebcc9368..9ed7647315707 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1251,10 +1251,10 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK; bank->gpio_chip.base = args.args[1]; - npins = args.args[2]; - while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, - ++i, &args)) - npins += args.args[2]; + /* get the last defined gpio line (offset + nb of pins) */ + npins = args.args[0] + args.args[2]; + while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, ++i, &args)) + npins = max(npins, (int)(args.args[0] + args.args[2])); } else { bank_nr = pctl->nbanks; bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK; From 0fd08a34e8e3b67ec9bd8287ac0facf8374b844a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: [PATCH 213/361] xen/blkfront: harden blkfront against event channel storms The Xen blkfront driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using lateeoi event channels. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/block/xen-blkfront.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8e3983e456f3c..286cf1afad781 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1512,9 +1512,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; struct blkfront_info *info = rinfo->dev_info; + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); return IRQ_HANDLED; + } spin_lock_irqsave(&rinfo->ring_lock, flags); again: @@ -1530,6 +1533,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long id; unsigned int op; + eoiflag = 0; + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); id = bret.id; @@ -1646,6 +1651,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; err: @@ -1653,6 +1660,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); + /* No EOI in order to avoid further interrupts. */ + pr_alert("%s disabled for further use\n", info->gd->disk_name); return IRQ_HANDLED; } @@ -1692,8 +1701,8 @@ static int setup_blkring(struct xenbus_device *dev, if (err) goto fail; - err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0, - "blkif", rinfo); + err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt, + 0, "blkif", rinfo); if (err <= 0) { xenbus_dev_fatal(dev, err, "bind_evtchn_to_irqhandler failed"); From b27d47950e481f292c0a5ad57357edb9d95d03ba Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: [PATCH 214/361] xen/netfront: harden netfront against event channel storms The Xen netfront driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using lateeoi event channels. For being able to detect the case of no rx responses being added while the carrier is down a new lock is needed in order to update and test rsp_cons and the number of seen unconsumed responses atomically. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - don't eoi irq in case of interface set broken (Jan Beulich) - handle carrier off + no new responses added (Jan Beulich) V3: - add rx_ prefix to rsp_unconsumed (Jan Beulich) - correct xennet_set_rx_rsp_cons() spelling (Jan Beulich) --- drivers/net/xen-netfront.c | 125 ++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 31 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 911f43986a8c9..d514d96027a6f 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -148,6 +148,9 @@ struct netfront_queue { grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + unsigned int rx_rsp_unconsumed; + spinlock_t rx_cons_lock; + struct page_pool *page_pool; struct xdp_rxq_info xdp_rxq; }; @@ -376,12 +379,13 @@ static int xennet_open(struct net_device *dev) return 0; } -static void xennet_tx_buf_gc(struct netfront_queue *queue) +static bool xennet_tx_buf_gc(struct netfront_queue *queue) { RING_IDX cons, prod; unsigned short id; struct sk_buff *skb; bool more_to_do; + bool work_done = false; const struct device *dev = &queue->info->netdev->dev; BUG_ON(!netif_carrier_ok(queue->info->netdev)); @@ -398,6 +402,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) for (cons = queue->tx.rsp_cons; cons != prod; cons++) { struct xen_netif_tx_response txrsp; + work_done = true; + RING_COPY_RESPONSE(&queue->tx, cons, &txrsp); if (txrsp.status == XEN_NETIF_RSP_NULL) continue; @@ -441,11 +447,13 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) xennet_maybe_wake_tx(queue); - return; + return work_done; err: queue->info->broken = true; dev_alert(dev, "Disabled for further use\n"); + + return work_done; } struct xennet_gnttab_make_txreq { @@ -834,6 +842,16 @@ static int xennet_close(struct net_device *dev) return 0; } +static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + queue->rx.rsp_cons = val; + queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); +} + static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb, grant_ref_t ref) { @@ -885,7 +903,7 @@ static int xennet_get_extras(struct netfront_queue *queue, xennet_move_rx_slot(queue, skb, ref); } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return err; } @@ -1039,7 +1057,7 @@ static int xennet_get_responses(struct netfront_queue *queue, } if (unlikely(err)) - queue->rx.rsp_cons = cons + slots; + xennet_set_rx_rsp_cons(queue, cons + slots); return err; } @@ -1093,7 +1111,8 @@ static int xennet_fill_frags(struct netfront_queue *queue, __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { - queue->rx.rsp_cons = ++cons + skb_queue_len(list); + xennet_set_rx_rsp_cons(queue, + ++cons + skb_queue_len(list)); kfree_skb(nskb); return -ENOENT; } @@ -1106,7 +1125,7 @@ static int xennet_fill_frags(struct netfront_queue *queue, kfree_skb(nskb); } - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return 0; } @@ -1229,7 +1248,9 @@ static int xennet_poll(struct napi_struct *napi, int budget) if (unlikely(xennet_set_skb_gso(skb, gso))) { __skb_queue_head(&tmpq, skb); - queue->rx.rsp_cons += skb_queue_len(&tmpq); + xennet_set_rx_rsp_cons(queue, + queue->rx.rsp_cons + + skb_queue_len(&tmpq)); goto err; } } @@ -1253,7 +1274,8 @@ static int xennet_poll(struct napi_struct *napi, int budget) __skb_queue_tail(&rxq, skb); - i = ++queue->rx.rsp_cons; + i = queue->rx.rsp_cons + 1; + xennet_set_rx_rsp_cons(queue, i); work_done++; } if (need_xdp_flush) @@ -1417,40 +1439,79 @@ static int xennet_set_features(struct net_device *dev, return 0; } -static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; unsigned long flags; - if (queue->info->broken) - return IRQ_HANDLED; + if (unlikely(queue->info->broken)) + return false; spin_lock_irqsave(&queue->tx_lock, flags); - xennet_tx_buf_gc(queue); + if (xennet_tx_buf_gc(queue)) + *eoi = 0; spin_unlock_irqrestore(&queue->tx_lock, flags); + return true; +} + +static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_tx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } -static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; - struct net_device *dev = queue->info->netdev; + unsigned int work_queued; + unsigned long flags; - if (queue->info->broken) - return IRQ_HANDLED; + if (unlikely(queue->info->broken)) + return false; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + if (work_queued > queue->rx_rsp_unconsumed) { + queue->rx_rsp_unconsumed = work_queued; + *eoi = 0; + } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) { + const struct device *dev = &queue->info->netdev->dev; + + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + dev_alert(dev, "RX producer index going backwards\n"); + dev_alert(dev, "Disabled for further use\n"); + queue->info->broken = true; + return false; + } + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); - if (likely(netif_carrier_ok(dev) && - RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) + if (likely(netif_carrier_ok(queue->info->netdev) && work_queued)) napi_schedule(&queue->napi); + return true; +} + +static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_rx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } static irqreturn_t xennet_interrupt(int irq, void *dev_id) { - xennet_tx_interrupt(irq, dev_id); - xennet_rx_interrupt(irq, dev_id); + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (xennet_handle_tx(dev_id, &eoiflag) && + xennet_handle_rx(dev_id, &eoiflag)) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } @@ -1768,9 +1829,10 @@ static int setup_netfront_single(struct netfront_queue *queue) if (err < 0) goto fail; - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_interrupt, - 0, queue->info->netdev->name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_interrupt, 0, + queue->info->netdev->name, + queue); if (err < 0) goto bind_fail; queue->rx_evtchn = queue->tx_evtchn; @@ -1798,18 +1860,18 @@ static int setup_netfront_split(struct netfront_queue *queue) snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_tx_interrupt, - 0, queue->tx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_tx_interrupt, 0, + queue->tx_irq_name, queue); if (err < 0) goto bind_tx_fail; queue->tx_irq = err; snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_evtchn_to_irqhandler(queue->rx_evtchn, - xennet_rx_interrupt, - 0, queue->rx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn, + xennet_rx_interrupt, 0, + queue->rx_irq_name, queue); if (err < 0) goto bind_rx_fail; queue->rx_irq = err; @@ -1911,6 +1973,7 @@ static int xennet_init_queue(struct netfront_queue *queue) spin_lock_init(&queue->tx_lock); spin_lock_init(&queue->rx_lock); + spin_lock_init(&queue->rx_cons_lock); timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0); From fe415186b43df0db1f17fa3a46275fd92107fe71 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: [PATCH 215/361] xen/console: harden hvc_xen against event channel storms The Xen console driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using a lateeoi event channel. For the normal domU initial console this requires the introduction of bind_evtchn_to_irq_lateeoi() as there is no xenbus device available at the time the event channel is bound to the irq. As the decision whether an interrupt was spurious or not requires to test for bytes having been read from the backend, move sending the event into the if statement, as sending an event without having found any bytes to be read is making no sense at all. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - slightly adapt spurious irq detection (Jan Beulich) V3: - fix spurious irq detection (Jan Beulich) --- drivers/tty/hvc/hvc_xen.c | 30 +++++++++++++++++++++++++++--- drivers/xen/events/events_base.c | 6 ++++++ include/xen/events.h | 1 + 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 71e0dd2c0ce5b..ebaf7500f48f1 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -37,6 +37,8 @@ struct xencons_info { struct xenbus_device *xbdev; struct xencons_interface *intf; unsigned int evtchn; + XENCONS_RING_IDX out_cons; + unsigned int out_cons_same; struct hvc_struct *hvc; int irq; int vtermno; @@ -138,6 +140,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) XENCONS_RING_IDX cons, prod; int recv = 0; struct xencons_info *xencons = vtermno_to_xencons(vtermno); + unsigned int eoiflag = 0; + if (xencons == NULL) return -EINVAL; intf = xencons->intf; @@ -157,7 +161,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) mb(); /* read ring before consuming */ intf->in_cons = cons; - notify_daemon(xencons); + /* + * When to mark interrupt having been spurious: + * - there was no new data to be read, and + * - the backend did not consume some output bytes, and + * - the previous round with no read data didn't see consumed bytes + * (we might have a race with an interrupt being in flight while + * updating xencons->out_cons, so account for that by allowing one + * round without any visible reason) + */ + if (intf->out_cons != xencons->out_cons) { + xencons->out_cons = intf->out_cons; + xencons->out_cons_same = 0; + } + if (recv) { + notify_daemon(xencons); + } else if (xencons->out_cons_same++ > 1) { + eoiflag = XEN_EOI_FLAG_SPURIOUS; + } + + xen_irq_lateeoi(xencons->irq, eoiflag); + return recv; } @@ -386,7 +410,7 @@ static int xencons_connect_backend(struct xenbus_device *dev, if (ret) return ret; info->evtchn = evtchn; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn); if (irq < 0) return irq; info->irq = irq; @@ -551,7 +575,7 @@ static int __init xen_hvc_init(void) return r; info = vtermno_to_xencons(HVC_COOKIE); - info->irq = bind_evtchn_to_irq(info->evtchn); + info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn); } if (info->irq < 0) info->irq = 0; /* NO_IRQ */ diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index a78704ae36186..46d9295d9a6e4 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1251,6 +1251,12 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn) } EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); + static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; diff --git a/include/xen/events.h b/include/xen/events.h index c204262d9fc24..344081e71584b 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -17,6 +17,7 @@ struct xenbus_device; unsigned xen_evtchn_nr_channels(void); int bind_evtchn_to_irq(evtchn_port_t evtchn); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, From 6032046ec4b70176d247a71836186d47b25d1684 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:25:12 +0100 Subject: [PATCH 216/361] xen/netback: fix rx queue stall detection Commit 1d5d48523900a4b ("xen-netback: require fewer guest Rx slots when not using GSO") introduced a security problem in netback, as an interface would only be regarded to be stalled if no slot is available in the rx queue ring page. In case the SKB at the head of the queued requests will need more than one rx slot and only one slot is free the stall detection logic will never trigger, as the test for that is only looking for at least one slot to be free. Fix that by testing for the needed number of slots instead of only one slot being available. In order to not have to take the rx queue lock that often, store the number of needed slots in the queue data. As all SKB dequeue operations happen in the rx queue kernel thread this is safe, as long as the number of needed slots is accessed via READ/WRITE_ONCE() only and updates are always done with the rx queue lock held. Add a small helper for obtaining the number of free slots. This is part of XSA-392 Fixes: 1d5d48523900a4b ("xen-netback: require fewer guest Rx slots when not using GSO") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/net/xen-netback/common.h | 1 + drivers/net/xen-netback/rx.c | 65 ++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 4a16d6e33c093..d9dea4829c86e 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -203,6 +203,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned int rx_queue_max; unsigned int rx_queue_len; unsigned long last_rx_time; + unsigned int rx_slots_needed; bool stalled; struct xenvif_copy_state rx_copy; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index accc991d153f7..a8511e27d6c13 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -33,28 +33,36 @@ #include #include -static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) +/* + * Update the needed ring page slots for the first SKB queued. + * Note that any call sequence outside the RX thread calling this function + * needs to wake up the RX thread via a call of xenvif_kick_thread() + * afterwards in order to avoid a race with putting the thread to sleep. + */ +static void xenvif_update_needed_slots(struct xenvif_queue *queue, + const struct sk_buff *skb) { - RING_IDX prod, cons; - struct sk_buff *skb; - int needed; - unsigned long flags; - - spin_lock_irqsave(&queue->rx_queue.lock, flags); + unsigned int needed = 0; - skb = skb_peek(&queue->rx_queue); - if (!skb) { - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); - return false; + if (skb) { + needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); + if (skb_is_gso(skb)) + needed++; + if (skb->sw_hash) + needed++; } - needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); - if (skb_is_gso(skb)) - needed++; - if (skb->sw_hash) - needed++; + WRITE_ONCE(queue->rx_slots_needed, needed); +} - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); +static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) +{ + RING_IDX prod, cons; + unsigned int needed; + + needed = READ_ONCE(queue->rx_slots_needed); + if (!needed) + return false; do { prod = queue->rx.sring->req_prod; @@ -80,6 +88,9 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) spin_lock_irqsave(&queue->rx_queue.lock, flags); + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); + __skb_queue_tail(&queue->rx_queue, skb); queue->rx_queue_len += skb->len; @@ -100,6 +111,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) skb = __skb_dequeue(&queue->rx_queue); if (skb) { + xenvif_update_needed_slots(queue, skb_peek(&queue->rx_queue)); + queue->rx_queue_len -= skb->len; if (queue->rx_queue_len < queue->rx_queue_max) { struct netdev_queue *txq; @@ -487,27 +500,31 @@ void xenvif_rx_action(struct xenvif_queue *queue) xenvif_rx_copy_flush(queue); } -static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) +static RING_IDX xenvif_rx_queue_slots(const struct xenvif_queue *queue) { RING_IDX prod, cons; prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; + return prod - cons; +} + +static bool xenvif_rx_queue_stalled(const struct xenvif_queue *queue) +{ + unsigned int needed = READ_ONCE(queue->rx_slots_needed); + return !queue->stalled && - prod - cons < 1 && + xenvif_rx_queue_slots(queue) < needed && time_after(jiffies, queue->last_rx_time + queue->vif->stall_timeout); } static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) { - RING_IDX prod, cons; - - prod = queue->rx.sring->req_prod; - cons = queue->rx.req_cons; + unsigned int needed = READ_ONCE(queue->rx_slots_needed); - return queue->stalled && prod - cons >= 1; + return queue->stalled && xenvif_rx_queue_slots(queue) >= needed; } bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) From be81992f9086b230623ae3ebbc85ecee4d00a3d3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 30 Nov 2021 08:36:12 +0100 Subject: [PATCH 217/361] xen/netback: don't queue unlimited number of packages In case a guest isn't consuming incoming network traffic as fast as it is coming in, xen-netback is buffering network packages in unlimited numbers today. This can result in host OOM situations. Commit f48da8b14d04ca8 ("xen-netback: fix unlimited guest Rx internal queue and carrier flapping") meant to introduce a mechanism to limit the amount of buffered data by stopping the Tx queue when reaching the data limit, but this doesn't work for cases like UDP. When hitting the limit don't queue further SKBs, but drop them instead. In order to be able to tell Rx packages have been dropped increment the rx_dropped statistics counter in this case. It should be noted that the old solution to continue queueing SKBs had the additional problem of an overflow of the 32-bit rx_queue_len value would result in intermittent Tx queue enabling. This is part of XSA-392 Fixes: f48da8b14d04ca8 ("xen-netback: fix unlimited guest Rx internal queue and carrier flapping") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/net/xen-netback/rx.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index a8511e27d6c13..dbac4c03d21a1 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -88,16 +88,19 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) spin_lock_irqsave(&queue->rx_queue.lock, flags); - if (skb_queue_empty(&queue->rx_queue)) - xenvif_update_needed_slots(queue, skb); - - __skb_queue_tail(&queue->rx_queue, skb); - - queue->rx_queue_len += skb->len; - if (queue->rx_queue_len > queue->rx_queue_max) { + if (queue->rx_queue_len >= queue->rx_queue_max) { struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); + kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; + } else { + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); + + __skb_queue_tail(&queue->rx_queue, skb); + + queue->rx_queue_len += skb->len; } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); @@ -147,6 +150,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) break; xenvif_rx_dequeue(queue); kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; } } From 0546b224cc7717cc8a2db076b0bb069a9c430794 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 14 Dec 2021 19:10:09 +0000 Subject: [PATCH 218/361] net: stmmac: dwmac-rk: fix oob read in rk_gmac_setup KASAN reports an out-of-bounds read in rk_gmac_setup on the line: while (ops->regs[i]) { This happens for most platforms since the regs flexible array member is empty, so the memory after the ops structure is being read here. It seems that mostly this happens to contain zero anyway, so we get lucky and everything still works. To avoid adding redundant data to nearly all the ops structures, add a new flag to indicate whether the regs field is valid and avoid this loop when it is not. Fixes: 3bb3d6b1c195 ("net: stmmac: Add RK3566/RK3568 SoC support") Signed-off-by: John Keeping Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 6924a6aacbd53..c469abc91fa1b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -33,6 +33,7 @@ struct rk_gmac_ops { void (*set_rgmii_speed)(struct rk_priv_data *bsp_priv, int speed); void (*set_rmii_speed)(struct rk_priv_data *bsp_priv, int speed); void (*integrated_phy_powerup)(struct rk_priv_data *bsp_priv); + bool regs_valid; u32 regs[]; }; @@ -1092,6 +1093,7 @@ static const struct rk_gmac_ops rk3568_ops = { .set_to_rmii = rk3568_set_to_rmii, .set_rgmii_speed = rk3568_set_gmac_speed, .set_rmii_speed = rk3568_set_gmac_speed, + .regs_valid = true, .regs = { 0xfe2a0000, /* gmac0 */ 0xfe010000, /* gmac1 */ @@ -1383,7 +1385,7 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, * to be distinguished. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res) { + if (res && ops->regs_valid) { int i = 0; while (ops->regs[i]) { From 407ecd1bd726f240123f704620d46e285ff30dd9 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 15 Dec 2021 22:37:31 +0800 Subject: [PATCH 219/361] sfc_ef100: potential dereference of null pointer The return value of kmalloc() needs to be checked. To avoid use in efx_nic_update_stats() in case of the failure of alloc. Fixes: b593b6f1b492 ("sfc_ef100: statistics gathering") Signed-off-by: Jiasheng Jiang Reported-by: kernel test robot Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100_nic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 6aa81229b68a9..e77a5cb4e40d7 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -609,6 +609,9 @@ static size_t ef100_update_stats(struct efx_nic *efx, ef100_common_stat_mask(mask); ef100_ethtool_stat_mask(mask); + if (!mc_stats) + return 0; + efx_nic_copy_stats(efx, mc_stats); efx_nic_update_stats(ef100_stat_desc, EF100_STAT_COUNT, mask, stats, mc_stats, false); From e08cdf63049b711099efff0811273449083bb958 Mon Sep 17 00:00:00 2001 From: Andrey Eremeev Date: Wed, 15 Dec 2021 20:30:32 +0300 Subject: [PATCH 220/361] dsa: mv88e6xxx: fix debug print for SPEED_UNFORCED Debug print uses invalid check to detect if speed is unforced: (speed != SPEED_UNFORCED) should be used instead of (!speed). Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Andrey Eremeev Fixes: 96a2b40c7bd3 ("net: dsa: mv88e6xxx: add port's MAC speed setter") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index d9817b20ea641..ab41619a809b3 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -283,7 +283,7 @@ static int mv88e6xxx_port_set_speed_duplex(struct mv88e6xxx_chip *chip, if (err) return err; - if (speed) + if (speed != SPEED_UNFORCED) dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed); else dev_dbg(chip->dev, "p%d: Speed unforced\n", port); @@ -516,7 +516,7 @@ int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, if (err) return err; - if (speed) + if (speed != SPEED_UNFORCED) dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed); else dev_dbg(chip->dev, "p%d: Speed unforced\n", port); From 053c9e18c6f9cf82242ef35ac21cae1842725714 Mon Sep 17 00:00:00 2001 From: Wenliang Wang Date: Thu, 16 Dec 2021 11:11:35 +0800 Subject: [PATCH 221/361] virtio_net: fix rx_drops stat for small pkts We found the stat of rx drops for small pkts does not increment when build_skb fail, it's not coherent with other mode's rx drops stat. Signed-off-by: Wenliang Wang Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 55db6a336f7ea..b107835242ade 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -733,7 +733,7 @@ static struct sk_buff *receive_small(struct net_device *dev, pr_debug("%s: rx error: len %u exceeds max size %d\n", dev->name, len, GOOD_PACKET_LEN); dev->stats.rx_length_errors++; - goto err_len; + goto err; } if (likely(!vi->xdp_enabled)) { @@ -825,10 +825,8 @@ static struct sk_buff *receive_small(struct net_device *dev, skip_xdp: skb = build_skb(buf, buflen); - if (!skb) { - put_page(page); + if (!skb) goto err; - } skb_reserve(skb, headroom - delta); skb_put(skb, len); if (!xdp_prog) { @@ -839,13 +837,12 @@ static struct sk_buff *receive_small(struct net_device *dev, if (metasize) skb_metadata_set(skb, metasize); -err: return skb; err_xdp: rcu_read_unlock(); stats->xdp_drops++; -err_len: +err: stats->drops++; put_page(page); xdp_xmit: From 8a03ef676ade55182f9b05115763aeda6dc08159 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 16 Dec 2021 11:28:25 +0200 Subject: [PATCH 222/361] net: Fix double 0x prefix print in SKB dump When printing netdev features %pNF already takes care of the 0x prefix, remove the explicit one. Fixes: 6413139dfc64 ("skbuff: increase verbosity when dumping skb data") Signed-off-by: Gal Pressman Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ba2f38246f07e..909db87d7383d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -832,7 +832,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) ntohs(skb->protocol), skb->pkt_type, skb->skb_iif); if (dev) - printk("%sdev name=%s feat=0x%pNF\n", + printk("%sdev name=%s feat=%pNF\n", level, dev->name, &dev->features); if (sk) printk("%ssk family=%hu type=%u proto=%u\n", From 76f12e632a15a20c8de3532d64a0708cf0e32f11 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 9 Dec 2021 17:39:26 +0100 Subject: [PATCH 223/361] netfilter: ctnetlink: remove expired entries first When dumping conntrack table to userspace via ctnetlink, check if the ct has already expired before doing any of the 'skip' checks. This expires dead entries faster. /proc handler also removes outdated entries first. Reported-by: Vitaly Zuevsky Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 81d03acf68d4d..ec4164c32d270 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1195,8 +1195,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) } hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], hnnode) { - if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { if (i < ARRAY_SIZE(nf_ct_evict) && @@ -1208,6 +1206,9 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) if (!net_eq(net, nf_ct_net(ct))) continue; + if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + if (cb->args[1]) { if (ct != last) continue; From 5c15b3123f65f8fbb1b445d9a7e8812e0e435df2 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Wed, 15 Dec 2021 20:29:21 +0800 Subject: [PATCH 224/361] net/smc: Prevent smc_release() from long blocking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In nginx/wrk benchmark, there's a hung problem with high probability on case likes that: (client will last several minutes to exit) server: smc_run nginx client: smc_run wrk -c 10000 -t 1 http://server Client hangs with the following backtrace: 0 [ffffa7ce8Of3bbf8] __schedule at ffffffff9f9eOd5f 1 [ffffa7ce8Of3bc88] schedule at ffffffff9f9eløe6 2 [ffffa7ce8Of3bcaO] schedule_timeout at ffffffff9f9e3f3c 3 [ffffa7ce8Of3bd2O] wait_for_common at ffffffff9f9el9de 4 [ffffa7ce8Of3bd8O] __flush_work at ffffffff9fOfeOl3 5 [ffffa7ce8øf3bdfO] smc_release at ffffffffcO697d24 [smc] 6 [ffffa7ce8Of3be2O] __sock_release at ffffffff9f8O2e2d 7 [ffffa7ce8Of3be4ø] sock_close at ffffffff9f8ø2ebl 8 [ffffa7ce8øf3be48] __fput at ffffffff9f334f93 9 [ffffa7ce8Of3be78] task_work_run at ffffffff9flOlff5 10 [ffffa7ce8Of3beaO] do_exit at ffffffff9fOe5Ol2 11 [ffffa7ce8Of3bflO] do_group_exit at ffffffff9fOe592a 12 [ffffa7ce8Of3bf38] __x64_sys_exit_group at ffffffff9fOe5994 13 [ffffa7ce8Of3bf4O] do_syscall_64 at ffffffff9f9d4373 14 [ffffa7ce8Of3bfsO] entry_SYSCALL_64_after_hwframe at ffffffff9fa0007c This issue dues to flush_work(), which is used to wait for smc_connect_work() to finish in smc_release(). Once lots of smc_connect_work() was pending or all executing work dangling, smc_release() has to block until one worker comes to free, which is equivalent to wait another smc_connnect_work() to finish. In order to fix this, There are two changes: 1. For those idle smc_connect_work(), cancel it from the workqueue; for executing smc_connect_work(), waiting for it to finish. For that purpose, replace flush_work() with cancel_work_sync(). 2. Since smc_connect() hold a reference for passive closing, if smc_connect_work() has been cancelled, release the reference. Fixes: 24ac3a08e658 ("net/smc: rebuild nonblocking connect") Reported-by: Tony Lu Tested-by: Dust Li Reviewed-by: Dust Li Reviewed-by: Tony Lu Signed-off-by: D. Wythe Acked-by: Karsten Graul Link: https://lore.kernel.org/r/1639571361-101128-1-git-send-email-alibuda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 230072f9ec48e..1c9289f56dc47 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -194,7 +194,9 @@ static int smc_release(struct socket *sock) /* cleanup for a dangling non-blocking connect */ if (smc->connect_nonblock && sk->sk_state == SMC_INIT) tcp_abort(smc->clcsock->sk, ECONNABORTED); - flush_work(&smc->connect_work); + + if (cancel_work_sync(&smc->connect_work)) + sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */ if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires From 8b8e6e782456f1ce02a7ae914bbd5b1053f0b034 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 15 Dec 2021 12:24:49 -0800 Subject: [PATCH 225/361] net: systemport: Add global locking for descriptor lifecycle The descriptor list is a shared resource across all of the transmit queues, and the locking mechanism used today only protects concurrency across a given transmit queue between the transmit and reclaiming. This creates an opportunity for the SYSTEMPORT hardware to work on corrupted descriptors if we have multiple producers at once which is the case when using multiple transmit queues. This was particularly noticeable when using multiple flows/transmit queues and it showed up in interesting ways in that UDP packets would get a correct UDP header checksum being calculated over an incorrect packet length. Similarly TCP packets would get an equally correct checksum computed by the hardware over an incorrect packet length. The SYSTEMPORT hardware maintains an internal descriptor list that it re-arranges when the driver produces a new descriptor anytime it writes to the WRITE_PORT_{HI,LO} registers, there is however some delay in the hardware to re-organize its descriptors and it is possible that concurrent TX queues eventually break this internal allocation scheme to the point where the length/status part of the descriptor gets used for an incorrect data buffer. The fix is to impose a global serialization for all TX queues in the short section where we are writing to the WRITE_PORT_{HI,LO} registers which solves the corruption even with multiple concurrent TX queues being used. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20211215202450.4086240-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bcmsysport.c | 5 ++++- drivers/net/ethernet/broadcom/bcmsysport.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 40933bf5a7100..60dde29974bfe 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1309,11 +1309,11 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, struct bcm_sysport_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; struct bcm_sysport_tx_ring *ring; + unsigned long flags, desc_flags; struct bcm_sysport_cb *cb; struct netdev_queue *txq; u32 len_status, addr_lo; unsigned int skb_len; - unsigned long flags; dma_addr_t mapping; u16 queue; int ret; @@ -1373,8 +1373,10 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, ring->desc_count--; /* Ports are latched, so write upper address first */ + spin_lock_irqsave(&priv->desc_lock, desc_flags); tdma_writel(priv, len_status, TDMA_WRITE_PORT_HI(ring->index)); tdma_writel(priv, addr_lo, TDMA_WRITE_PORT_LO(ring->index)); + spin_unlock_irqrestore(&priv->desc_lock, desc_flags); /* Check ring space and update SW control flow */ if (ring->desc_count == 0) @@ -2013,6 +2015,7 @@ static int bcm_sysport_open(struct net_device *dev) } /* Initialize both hardware and software ring */ + spin_lock_init(&priv->desc_lock); for (i = 0; i < dev->num_tx_queues; i++) { ret = bcm_sysport_init_tx_ring(priv, i); if (ret) { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 984f76e74b43e..16b73bb9acc78 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -711,6 +711,7 @@ struct bcm_sysport_priv { int wol_irq; /* Transmit rings */ + spinlock_t desc_lock; struct bcm_sysport_tx_ring *tx_rings; /* Receive queue */ From e28587cc491ef0f3c51258fdc87fbc386b1d4c59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Dec 2021 03:17:41 -0800 Subject: [PATCH 226/361] sit: do not call ipip6_dev_free() from sit_init_net() ipip6_dev_free is sit dev->priv_destructor, already called by register_netdevice() if something goes wrong. Alternative would be to make ipip6_dev_free() robust against multiple invocations, but other drivers do not implement this strategy. syzbot reported: dst_release underflow WARNING: CPU: 0 PID: 5059 at net/core/dst.c:173 dst_release+0xd8/0xe0 net/core/dst.c:173 Modules linked in: CPU: 1 PID: 5059 Comm: syz-executor.4 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:dst_release+0xd8/0xe0 net/core/dst.c:173 Code: 4c 89 f2 89 d9 31 c0 5b 41 5e 5d e9 da d5 44 f9 e8 1d 90 5f f9 c6 05 87 48 c6 05 01 48 c7 c7 80 44 99 8b 31 c0 e8 e8 67 29 f9 <0f> 0b eb 85 0f 1f 40 00 53 48 89 fb e8 f7 8f 5f f9 48 83 c3 a8 48 RSP: 0018:ffffc9000aa5faa0 EFLAGS: 00010246 RAX: d6894a925dd15a00 RBX: 00000000ffffffff RCX: 0000000000040000 RDX: ffffc90005e19000 RSI: 000000000003ffff RDI: 0000000000040000 RBP: 0000000000000000 R08: ffffffff816a1f42 R09: ffffed1017344f2c R10: ffffed1017344f2c R11: 0000000000000000 R12: 0000607f462b1358 R13: 1ffffffff1bfd305 R14: ffffe8ffffcb1358 R15: dffffc0000000000 FS: 00007f66c71a2700(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f88aaed5058 CR3: 0000000023e0f000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dst_cache_destroy+0x107/0x1e0 net/core/dst_cache.c:160 ipip6_dev_free net/ipv6/sit.c:1414 [inline] sit_init_net+0x229/0x550 net/ipv6/sit.c:1936 ops_init+0x313/0x430 net/core/net_namespace.c:140 setup_net+0x35b/0x9d0 net/core/net_namespace.c:326 copy_net_ns+0x359/0x5c0 net/core/net_namespace.c:470 create_new_namespaces+0x4ce/0xa00 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0x11e/0x180 kernel/nsproxy.c:226 ksys_unshare+0x57d/0xb50 kernel/fork.c:3075 __do_sys_unshare kernel/fork.c:3146 [inline] __se_sys_unshare kernel/fork.c:3144 [inline] __x64_sys_unshare+0x34/0x40 kernel/fork.c:3144 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f66c882ce99 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f66c71a2168 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 00007f66c893ff60 RCX: 00007f66c882ce99 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000048040200 RBP: 00007f66c8886ff1 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff6634832f R14: 00007f66c71a2300 R15: 0000000000022000 Fixes: cf124db566e6 ("net: Fix inconsistent teardown and release of private netdev state.") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20211216111741.1387540-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1b57ee36d6682..8a3618a30632a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1933,7 +1933,6 @@ static int __net_init sit_init_net(struct net *net) return 0; err_reg_dev: - ipip6_dev_free(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; From 1744a22ae948799da7927b53ec97ccc877ff9d61 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Dec 2021 09:22:12 +0000 Subject: [PATCH 227/361] afs: Fix mmap Fix afs_add_open_map() to check that the vnode isn't already on the list when it adds it. It's possible that afs_drop_open_mmap() decremented the cb_nr_mmap counter, but hadn't yet got into the locked section to remove it. Also vnode->cb_mmap_link should be initialised, so fix that too. Fixes: 6e0e99d58a65 ("afs: Fix mmap coherency vs 3rd-party changes") Reported-by: kafs-testing+fedora34_64checkkafs-build-300@auristor.com Suggested-by: Marc Dionne Signed-off-by: David Howells Tested-by: kafs-testing+fedora34_64checkkafs-build-300@auristor.com cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/686465.1639435380@warthog.procyon.org.uk/ # v1 Signed-off-by: Linus Torvalds --- fs/afs/file.c | 5 +++-- fs/afs/super.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index cb6ad61eec3bf..afe4b803f84b4 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -514,8 +514,9 @@ static void afs_add_open_mmap(struct afs_vnode *vnode) if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) { down_write(&vnode->volume->cell->fs_open_mmaps_lock); - list_add_tail(&vnode->cb_mmap_link, - &vnode->volume->cell->fs_open_mmaps); + if (list_empty(&vnode->cb_mmap_link)) + list_add_tail(&vnode->cb_mmap_link, + &vnode->volume->cell->fs_open_mmaps); up_write(&vnode->volume->cell->fs_open_mmaps_lock); } diff --git a/fs/afs/super.c b/fs/afs/super.c index d110def8aa8eb..34c68724c98be 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -667,6 +667,7 @@ static void afs_i_init_once(void *_vnode) INIT_LIST_HEAD(&vnode->pending_locks); INIT_LIST_HEAD(&vnode->granted_locks); INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); + INIT_LIST_HEAD(&vnode->cb_mmap_link); seqlock_init(&vnode->cb_lock); } From 9c5d89bc10551f1aecd768b00fca3339a7b8c8ee Mon Sep 17 00:00:00 2001 From: Lakshmi Ramasubramanian Date: Thu, 9 Dec 2021 17:01:21 -0800 Subject: [PATCH 228/361] arm64: kexec: Fix missing error code 'ret' warning in load_other_segments() Since commit ac10be5cdbfa ("arm64: Use common of_kexec_alloc_and_setup_fdt()"), smatch reports the following warning: arch/arm64/kernel/machine_kexec_file.c:152 load_other_segments() warn: missing error code 'ret' Return code is not set to an error code in load_other_segments() when of_kexec_alloc_and_setup_fdt() call returns a NULL dtb. This results in status success (return code set to 0) being returned from load_other_segments(). Set return code to -EINVAL if of_kexec_alloc_and_setup_fdt() returns NULL dtb. Signed-off-by: Lakshmi Ramasubramanian Reported-by: kernel test robot Reported-by: Dan Carpenter Fixes: ac10be5cdbfa ("arm64: Use common of_kexec_alloc_and_setup_fdt()") Link: https://lore.kernel.org/r/20211210010121.101823-1-nramas@linux.microsoft.com Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/machine_kexec_file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 63634b4d72c15..59c648d518488 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -149,6 +149,7 @@ int load_other_segments(struct kimage *image, initrd_len, cmdline, 0); if (!dtb) { pr_err("Preparing for new dtb failed\n"); + ret = -EINVAL; goto out_err; } From ef399469d9ceb9f2171cdd79863f9434b9fa3edc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Nov 2021 15:50:47 +0300 Subject: [PATCH 229/361] ksmbd: fix error code in ndr_read_int32() This is a failure path and it should return -EINVAL instead of success. Otherwise it could result in the caller using uninitialized memory. Fixes: 303fff2b8c77 ("ksmbd: add validation for ndr read/write functions") Cc: stable@vger.kernel.org # v5.15 Acked-by: Namjae Jeon Signed-off-by: Dan Carpenter Signed-off-by: Steve French --- fs/ksmbd/ndr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c index 8317f7ca402b4..5052be9261d91 100644 --- a/fs/ksmbd/ndr.c +++ b/fs/ksmbd/ndr.c @@ -148,7 +148,7 @@ static int ndr_read_int16(struct ndr *n, __u16 *value) static int ndr_read_int32(struct ndr *n, __u32 *value) { if (n->offset + sizeof(__u32) > n->length) - return 0; + return -EINVAL; if (value) *value = le32_to_cpu(*(__le32 *)ndr_get_field(n)); From f2e78affc48dee29b989c1d9b0d89b503dcd1204 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 Dec 2021 10:12:39 +0900 Subject: [PATCH 230/361] ksmbd: fix uninitialized symbol 'pntsd_size' No check for if "rc" is an error code for build_sec_desc(). This can cause problems with using uninitialized pntsd_size. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org # v5.15 Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 49c9da37315c8..125590d5e9402 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -2962,6 +2962,10 @@ int smb2_open(struct ksmbd_work *work) &pntsd_size, &fattr); posix_acl_release(fattr.cf_acls); posix_acl_release(fattr.cf_dacls); + if (rc) { + kfree(pntsd); + goto err_out; + } rc = ksmbd_vfs_set_sd_xattr(conn, user_ns, From 3cf2b61eb06765e27fec6799292d9fb46d0b7e60 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Dec 2021 22:02:19 +0000 Subject: [PATCH 231/361] bpf: Fix signed bounds propagation after mov32 For the case where both s32_{min,max}_value bounds are positive, the __reg_assign_32_into_64() directly propagates them to their 64 bit counterparts, otherwise it pessimises them into [0,u32_max] universe and tries to refine them later on by learning through the tnum as per comment in mentioned function. However, that does not always happen, for example, in mov32 operation we call zext_32_to_64(dst_reg) which invokes the __reg_assign_32_into_64() as is without subsequent bounds update as elsewhere thus no refinement based on tnum takes place. Thus, not calling into the __update_reg_bounds() / __reg_deduce_bounds() / __reg_bound_offset() triplet as we do, for example, in case of ALU ops via adjust_scalar_min_max_vals(), will lead to more pessimistic bounds when dumping the full register state: Before fix: 0: (b4) w0 = -1 1: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=4294967295,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) 1: (bc) w0 = w0 2: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=0,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) Technically, the smin_value=0 and smax_value=4294967295 bounds are not incorrect, but given the register is still a constant, they break assumptions about const scalars that smin_value == smax_value and umin_value == umax_value. After fix: 0: (b4) w0 = -1 1: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=4294967295,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) 1: (bc) w0 = w0 2: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=4294967295,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) Without the smin_value == smax_value and umin_value == umax_value invariant being intact for const scalars, it is possible to leak out kernel pointers from unprivileged user space if the latter is enabled. For example, when such registers are involved in pointer arithmtics, then adjust_ptr_min_max_vals() will taint the destination register into an unknown scalar, and the latter can be exported and stored e.g. into a BPF map value. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Reported-by: Kuee K1r0a Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2d48159b58bd3..0872b6c9fb33f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8317,6 +8317,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->dst_reg); } zext_32_to_64(dst_reg); + + __update_reg_bounds(dst_reg); + __reg_deduce_bounds(dst_reg); + __reg_bound_offset(dst_reg); } } else { /* case: R = imm From e572ff80f05c33cd0cb4860f864f5c9c044280b6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Dec 2021 22:28:48 +0000 Subject: [PATCH 232/361] bpf: Make 32->64 bounds propagation slightly more robust Make the bounds propagation in __reg_assign_32_into_64() slightly more robust and readable by aligning it similarly as we did back in the __reg_combine_64_into_32() counterpart. Meaning, only propagate or pessimize them as a smin/smax pair. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0872b6c9fb33f..b532f1058d35f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1366,22 +1366,28 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); } +static bool __reg32_bound_s64(s32 a) +{ + return a >= 0 && a <= S32_MAX; +} + static void __reg_assign_32_into_64(struct bpf_reg_state *reg) { reg->umin_value = reg->u32_min_value; reg->umax_value = reg->u32_max_value; - /* Attempt to pull 32-bit signed bounds into 64-bit bounds - * but must be positive otherwise set to worse case bounds - * and refine later from tnum. + + /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must + * be positive otherwise set to worse case bounds and refine later + * from tnum. */ - if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0) - reg->smax_value = reg->s32_max_value; - else - reg->smax_value = U32_MAX; - if (reg->s32_min_value >= 0) + if (__reg32_bound_s64(reg->s32_min_value) && + __reg32_bound_s64(reg->s32_max_value)) { reg->smin_value = reg->s32_min_value; - else + reg->smax_value = reg->s32_max_value; + } else { reg->smin_value = 0; + reg->smax_value = U32_MAX; + } } static void __reg_combine_32_into_64(struct bpf_reg_state *reg) From b1a7288dedc6caf9023f2676b4f5ed34cf0d4029 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Dec 2021 23:48:54 +0000 Subject: [PATCH 233/361] bpf, selftests: Add test case trying to taint map value pointer Add a test case which tries to taint map value pointer arithmetic into a unknown scalar with subsequent export through the map. Before fix: # ./test_verifier 1186 #1186/u map access: trying to leak tained dst reg FAIL Unexpected success to load! verification time 24 usec stack depth 8 processed 15 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1 #1186/p map access: trying to leak tained dst reg FAIL Unexpected success to load! verification time 8 usec stack depth 8 processed 15 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1 Summary: 0 PASSED, 0 SKIPPED, 2 FAILED After fix: # ./test_verifier 1186 #1186/u map access: trying to leak tained dst reg OK #1186/p map access: trying to leak tained dst reg OK Summary: 2 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- .../selftests/bpf/verifier/value_ptr_arith.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 2debba4e8a3a8..4d347bc53aa28 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -1077,6 +1077,29 @@ .errstr = "R0 invalid mem access 'inv'", .errstr_unpriv = "R0 pointer -= pointer prohibited", }, +{ + "map access: trying to leak tained dst reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_1, 0xFFFFFFFF), + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 4 }, + .result = REJECT, + .errstr = "math between map_value pointer and 4294967295 is not allowed", +}, { "32bit pkt_ptr -= scalar", .insns = { From 433956e91200734d09958673a56df02d00a917c2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Dec 2021 18:38:30 -0800 Subject: [PATCH 234/361] bpf: Fix extable fixup offset. The prog - start_of_ldx is the offset before the faulting ldx to the location after it, so this will be used to adjust pt_regs->ip for jumping over it and continuing, and with old temp it would have been fixed up to the wrong offset, causing crash. Fixes: 4c5de127598e ("bpf: Emit explicit NULL pointer checks for PROBE_LDX instructions.") Signed-off-by: Alexei Starovoitov Reviewed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 726700fabca6d..fa58681db45e4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1305,7 +1305,7 @@ st: if (is_imm8(insn->off)) * End result: x86 insn "mov rbx, qword ptr [rax+0x14]" * of 4 bytes will be ignored and rbx will be zero inited. */ - ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8); + ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8); } break; From 588a25e92458c6efeb7a261d5ca5726f5de89184 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 14 Dec 2021 19:25:13 -0800 Subject: [PATCH 235/361] bpf: Fix extable address check. The verifier checks that PTR_TO_BTF_ID pointer is either valid or NULL, but it cannot distinguish IS_ERR pointer from valid one. When offset is added to IS_ERR pointer it may become small positive value which is a user address that is not handled by extable logic and has to be checked for at the runtime. Tighten BPF_PROBE_MEM pointer check code to prevent this case. Fixes: 4c5de127598e ("bpf: Emit explicit NULL pointer checks for PROBE_LDX instructions.") Reported-by: Lorenzo Fontana Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp.c | 49 +++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index fa58681db45e4..bafe36e69227d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1252,19 +1252,54 @@ st: if (is_imm8(insn->off)) case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { - /* test src_reg, src_reg */ - maybe_emit_mod(&prog, src_reg, src_reg, true); /* always 1 byte */ - EMIT2(0x85, add_2reg(0xC0, src_reg, src_reg)); - /* jne start_of_ldx */ - EMIT2(X86_JNE, 0); + /* Though the verifier prevents negative insn->off in BPF_PROBE_MEM + * add abs(insn->off) to the limit to make sure that negative + * offset won't be an issue. + * insn->off is s16, so it won't affect valid pointers. + */ + u64 limit = TASK_SIZE_MAX + PAGE_SIZE + abs(insn->off); + u8 *end_of_jmp1, *end_of_jmp2; + + /* Conservatively check that src_reg + insn->off is a kernel address: + * 1. src_reg + insn->off >= limit + * 2. src_reg + insn->off doesn't become small positive. + * Cannot do src_reg + insn->off >= limit in one branch, + * since it needs two spare registers, but JIT has only one. + */ + + /* movabsq r11, limit */ + EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); + EMIT((u32)limit, 4); + EMIT(limit >> 32, 4); + /* cmp src_reg, r11 */ + maybe_emit_mod(&prog, src_reg, AUX_REG, true); + EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); + /* if unsigned '<' goto end_of_jmp2 */ + EMIT2(X86_JB, 0); + end_of_jmp1 = prog; + + /* mov r11, src_reg */ + emit_mov_reg(&prog, true, AUX_REG, src_reg); + /* add r11, insn->off */ + maybe_emit_1mod(&prog, AUX_REG, true); + EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off); + /* jmp if not carry to start_of_ldx + * Otherwise ERR_PTR(-EINVAL) + 128 will be the user addr + * that has to be rejected. + */ + EMIT2(0x73 /* JNC */, 0); + end_of_jmp2 = prog; + /* xor dst_reg, dst_reg */ emit_mov_imm32(&prog, false, dst_reg, 0); /* jmp byte_after_ldx */ EMIT2(0xEB, 0); - /* populate jmp_offset for JNE above */ - temp[4] = prog - temp - 5 /* sizeof(test + jne) */; + /* populate jmp_offset for JB above to jump to xor dst_reg */ + end_of_jmp1[-1] = end_of_jmp2 - end_of_jmp1; + /* populate jmp_offset for JNC above to jump to start_of_ldx */ start_of_ldx = prog; + end_of_jmp2[-1] = start_of_ldx - end_of_jmp2; } emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { From 7edc3fcbf9a2b2e3df53c9656a9f85bf807affac Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Dec 2021 12:35:34 -0800 Subject: [PATCH 236/361] selftest/bpf: Add a test that reads various addresses. Add a function to bpf_testmod that returns invalid kernel and user addresses. Then attach an fexit program to that function that tries to read memory through these addresses. This logic checks that bpf_probe_read_kernel and BPF_PROBE_MEM logic is sane. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 20 +++++++++++++++++++ .../selftests/bpf/progs/test_module_attach.c | 12 +++++++++++ 2 files changed, 32 insertions(+) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 5d52ea2768df4..df3b292a8ffec 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -33,6 +33,22 @@ noinline int bpf_testmod_loop_test(int n) return sum; } +__weak noinline struct file *bpf_testmod_return_ptr(int arg) +{ + static struct file f = {}; + + switch (arg) { + case 1: return (void *)EINVAL; /* user addr */ + case 2: return (void *)0xcafe4a11; /* user addr */ + case 3: return (void *)-EINVAL; /* canonical, but invalid */ + case 4: return (void *)(1ull << 60); /* non-canonical and invalid */ + case 5: return (void *)~(1ull << 30); /* trigger extable */ + case 6: return &f; /* valid addr */ + case 7: return (void *)((long)&f | 1); /* kernel tricks */ + default: return NULL; + } +} + noinline ssize_t bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -43,6 +59,10 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, .off = off, .len = len, }; + int i = 1; + + while (bpf_testmod_return_ptr(i)) + i++; /* This is always true. Use the check to make sure the compiler * doesn't remove bpf_testmod_loop_test. diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index b36857093f71f..50ce16d02da7b 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -87,6 +87,18 @@ int BPF_PROG(handle_fexit, return 0; } +SEC("fexit/bpf_testmod_return_ptr") +int BPF_PROG(handle_fexit_ret, int arg, struct file *ret) +{ + long buf = 0; + + bpf_probe_read_kernel(&buf, 8, ret); + bpf_probe_read_kernel(&buf, 8, (char *)ret + 256); + *(volatile long long *)ret; + *(volatile int *)&ret->f_mode; + return 0; +} + __u32 fmod_ret_read_sz = 0; SEC("fmod_ret/bpf_testmod_test_read") From c2fcbf81c332b42382a0c439bfe2414a241e4f5b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 16 Dec 2021 11:16:30 -0800 Subject: [PATCH 237/361] bpf, selftests: Fix racing issue in btf_skc_cls_ingress test The libbpf CI reported occasional failure in btf_skc_cls_ingress: test_syncookie:FAIL:Unexpected syncookie states gen_cookie:80326634 recv_cookie:0 bpf prog error at line 97 "error at line 97" means the bpf prog cannot find the listening socket when the final ack is received. It then skipped processing the syncookie in the final ack which then led to "recv_cookie:0". The problem is the userspace program did not do accept() and went ahead to close(listen_fd) before the kernel (and the bpf prog) had a chance to process the final ack. The fix is to add accept() call so that the userspace will wait for the kernel to finish processing the final ack first before close()-ing everything. Fixes: 9a856cae2217 ("bpf: selftest: Add test_btf_skc_cls_ingress") Reported-by: Andrii Nakryiko Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211216191630.466151-1-kafai@fb.com --- .../bpf/prog_tests/btf_skc_cls_ingress.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 762f6a9da8b5e..664ffc0364f4f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -90,7 +90,7 @@ static void print_err_line(void) static void test_conn(void) { - int listen_fd = -1, cli_fd = -1, err; + int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; socklen_t addrlen = sizeof(srv_sa6); int srv_port; @@ -112,6 +112,10 @@ static void test_conn(void) if (CHECK_FAIL(cli_fd == -1)) goto done; + srv_fd = accept(listen_fd, NULL, NULL); + if (CHECK_FAIL(srv_fd == -1)) + goto done; + if (CHECK(skel->bss->listen_tp_sport != srv_port || skel->bss->req_sk_sport != srv_port, "Unexpected sk src port", @@ -134,11 +138,13 @@ static void test_conn(void) close(listen_fd); if (cli_fd != -1) close(cli_fd); + if (srv_fd != -1) + close(srv_fd); } static void test_syncookie(void) { - int listen_fd = -1, cli_fd = -1, err; + int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; socklen_t addrlen = sizeof(srv_sa6); int srv_port; @@ -161,6 +167,10 @@ static void test_syncookie(void) if (CHECK_FAIL(cli_fd == -1)) goto done; + srv_fd = accept(listen_fd, NULL, NULL); + if (CHECK_FAIL(srv_fd == -1)) + goto done; + if (CHECK(skel->bss->listen_tp_sport != srv_port, "Unexpected tp src port", "listen_tp_sport:%u expected:%u\n", @@ -188,6 +198,8 @@ static void test_syncookie(void) close(listen_fd); if (cli_fd != -1) close(cli_fd); + if (srv_fd != -1) + close(srv_fd); } struct test { From cc274ae7763d9700a56659f3228641d7069e7a3f Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 15 Dec 2021 16:28:40 -0500 Subject: [PATCH 238/361] selinux: fix sleeping function called from invalid context selinux_sb_mnt_opts_compat() is called via sget_fc() under the sb_lock spinlock, so it can't use GFP_KERNEL allocations: [ 868.565200] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:230 [ 868.568246] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 4914, name: mount.nfs [ 868.569626] preempt_count: 1, expected: 0 [ 868.570215] RCU nest depth: 0, expected: 0 [ 868.570809] Preemption disabled at: [ 868.570810] [<0000000000000000>] 0x0 [ 868.571848] CPU: 1 PID: 4914 Comm: mount.nfs Kdump: loaded Tainted: G W 5.16.0-rc5.2585cf9dfa #1 [ 868.573273] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-4.fc34 04/01/2014 [ 868.574478] Call Trace: [ 868.574844] [ 868.575156] dump_stack_lvl+0x34/0x44 [ 868.575692] __might_resched.cold+0xd6/0x10f [ 868.576308] slab_pre_alloc_hook.constprop.0+0x89/0xf0 [ 868.577046] __kmalloc_track_caller+0x72/0x420 [ 868.577684] ? security_context_to_sid_core+0x48/0x2b0 [ 868.578569] kmemdup_nul+0x22/0x50 [ 868.579108] security_context_to_sid_core+0x48/0x2b0 [ 868.579854] ? _nfs4_proc_pathconf+0xff/0x110 [nfsv4] [ 868.580742] ? nfs_reconfigure+0x80/0x80 [nfs] [ 868.581355] security_context_str_to_sid+0x36/0x40 [ 868.581960] selinux_sb_mnt_opts_compat+0xb5/0x1e0 [ 868.582550] ? nfs_reconfigure+0x80/0x80 [nfs] [ 868.583098] security_sb_mnt_opts_compat+0x2a/0x40 [ 868.583676] nfs_compare_super+0x113/0x220 [nfs] [ 868.584249] ? nfs_try_mount_request+0x210/0x210 [nfs] [ 868.584879] sget_fc+0xb5/0x2f0 [ 868.585267] nfs_get_tree_common+0x91/0x4a0 [nfs] [ 868.585834] vfs_get_tree+0x25/0xb0 [ 868.586241] fc_mount+0xe/0x30 [ 868.586605] do_nfs4_mount+0x130/0x380 [nfsv4] [ 868.587160] nfs4_try_get_tree+0x47/0xb0 [nfsv4] [ 868.587724] vfs_get_tree+0x25/0xb0 [ 868.588193] do_new_mount+0x176/0x310 [ 868.588782] __x64_sys_mount+0x103/0x140 [ 868.589388] do_syscall_64+0x3b/0x90 [ 868.589935] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 868.590699] RIP: 0033:0x7f2b371c6c4e [ 868.591239] Code: 48 8b 0d dd 71 0e 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d aa 71 0e 00 f7 d8 64 89 01 48 [ 868.593810] RSP: 002b:00007ffc83775d88 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [ 868.594691] RAX: ffffffffffffffda RBX: 00007ffc83775f10 RCX: 00007f2b371c6c4e [ 868.595504] RDX: 0000555d517247a0 RSI: 0000555d51724700 RDI: 0000555d51724540 [ 868.596317] RBP: 00007ffc83775f10 R08: 0000555d51726890 R09: 0000555d51726890 [ 868.597162] R10: 0000000000000000 R11: 0000000000000246 R12: 0000555d51726890 [ 868.598005] R13: 0000000000000003 R14: 0000555d517246e0 R15: 0000555d511ac925 [ 868.598826] Cc: stable@vger.kernel.org Fixes: 69c4a42d72eb ("lsm,selinux: add new hook to compare new mount to an existing mount") Signed-off-by: Scott Mayhew [PM: cleanup/line-wrap the backtrace] Signed-off-by: Paul Moore --- security/selinux/hooks.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 62d30c0a30c29..1afc06ffd969f 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -611,10 +611,11 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag, return 0; } -static int parse_sid(struct super_block *sb, const char *s, u32 *sid) +static int parse_sid(struct super_block *sb, const char *s, u32 *sid, + gfp_t gfp) { int rc = security_context_str_to_sid(&selinux_state, s, - sid, GFP_KERNEL); + sid, gfp); if (rc) pr_warn("SELinux: security_context_str_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -685,7 +686,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, */ if (opts) { if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &fscontext_sid); + rc = parse_sid(sb, opts->fscontext, &fscontext_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, @@ -694,7 +696,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= FSCONTEXT_MNT; } if (opts->context) { - rc = parse_sid(sb, opts->context, &context_sid); + rc = parse_sid(sb, opts->context, &context_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, @@ -703,7 +706,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= CONTEXT_MNT; } if (opts->rootcontext) { - rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid); + rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, @@ -712,7 +716,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= ROOTCONTEXT_MNT; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &defcontext_sid); + rc = parse_sid(sb, opts->defcontext, &defcontext_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, @@ -2702,14 +2707,14 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) return (sbsec->flags & SE_MNTMASK) ? 1 : 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid); + rc = parse_sid(sb, opts->fscontext, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) return 1; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid); + rc = parse_sid(sb, opts->context, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) @@ -2719,14 +2724,14 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid); + rc = parse_sid(sb, opts->rootcontext, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) return 1; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid); + rc = parse_sid(sb, opts->defcontext, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) @@ -2749,14 +2754,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) return 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid); + rc = parse_sid(sb, opts->fscontext, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) goto out_bad_option; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid); + rc = parse_sid(sb, opts->context, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) @@ -2765,14 +2770,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) if (opts->rootcontext) { struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid); + rc = parse_sid(sb, opts->rootcontext, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) goto out_bad_option; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid); + rc = parse_sid(sb, opts->defcontext, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) From 5da5231bb47864e5dd6c6731151e98b6ee498827 Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Tue, 14 Dec 2021 09:45:10 -0500 Subject: [PATCH 239/361] libata: if T_LENGTH is zero, dma direction should be DMA_NONE Avoid data corruption by rejecting pass-through commands where T_LENGTH is zero (No data is transferred) and the dma direction is not DMA_NONE. Cc: Reported-by: syzkaller Signed-off-by: George Kennedy Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 1b84d5526d77a..313e9475507b5 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2859,8 +2859,19 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) goto invalid_fld; } - if (ata_is_ncq(tf->protocol) && (cdb[2 + cdb_offset] & 0x3) == 0) - tf->protocol = ATA_PROT_NCQ_NODATA; + if ((cdb[2 + cdb_offset] & 0x3) == 0) { + /* + * When T_LENGTH is zero (No data is transferred), dir should + * be DMA_NONE. + */ + if (scmd->sc_data_direction != DMA_NONE) { + fp = 2 + cdb_offset; + goto invalid_fld; + } + + if (ata_is_ncq(tf->protocol)) + tf->protocol = ATA_PROT_NCQ_NODATA; + } /* enable LBA */ tf->flags |= ATA_TFLAG_LBA; From 27750a315aba7e6675bb1c3dfd4481c4f6888af1 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Wed, 17 Nov 2021 14:30:34 +0000 Subject: [PATCH 240/361] crypto: qat - do not handle PFVF sources for qat_4xxx The QAT driver does not have support for PFVF interrupts for GEN4 devices, therefore report the vf2pf sources as 0. This prevents a NULL pointer dereference in the function adf_msix_isr_ae() if the device triggers a spurious interrupt. Fixes: 993161d36ab5 ("crypto: qat - fix handling of VF to PF interrupts") Reported-by: Adam Guerin Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index fa768f10635fd..fd29861526d6b 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -211,6 +211,12 @@ static u32 uof_get_ae_mask(u32 obj_num) return adf_4xxx_fw_config[obj_num].ae_mask; } +static u32 get_vf2pf_sources(void __iomem *pmisc_addr) +{ + /* For the moment do not report vf2pf sources */ + return 0; +} + void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &adf_4xxx_class; @@ -254,6 +260,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) hw_data->set_msix_rttable = set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; hw_data->enable_pfvf_comms = pfvf_comms_disabled; + hw_data->get_vf2pf_sources = get_vf2pf_sources; hw_data->disable_iov = adf_disable_sriov; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; From ea81b91e4e256b0bb75d47ad3a5c230b2171a005 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:37 +0000 Subject: [PATCH 241/361] riscv: dts: sifive unmatched: Name gpio lines Follow the pin descriptions given in the version 3 of the board schematics. Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 3c796d64cf51f..f8648ee1785ac 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -247,4 +247,8 @@ &gpio { status = "okay"; + gpio-line-names = "J29.1", "PMICNTB", "PMICSHDN", "J8.1", "J8.3", + "PCIe_PWREN", "THERM", "UBRDG_RSTN", "PCIe_PERSTN", + "ULPI_RSTN", "J8.2", "UHUB_RSTN", "GEMGXL_RST", "J8.4", + "EN_VDD_SD", "SD_CD"; }; From 8120393b74b31bbaf293f59896de6b0d50febc48 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:38 +0000 Subject: [PATCH 242/361] riscv: dts: sifive unmatched: Expose the board ID eeprom Mark it as read-only as it is factory-programmed with identifying information, and no executable nor configuration: - eth MAC address - board model (PCB version, BoM version) - board serial number Accidental modification would cause misidentification which could brick the board, so marking read-only seem like both a safe and non-constraining choice. Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index f8648ee1785ac..d1f2289e529be 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -59,6 +59,16 @@ interrupts = <6 IRQ_TYPE_LEVEL_LOW>; }; + eeprom@54 { + compatible = "microchip,24c02", "atmel,24c02"; + reg = <0x54>; + vcc-supply = <&vdd_bpro>; + label = "board-id"; + pagesize = <16>; + read-only; + size = <256>; + }; + pmic@58 { compatible = "dlg,da9063"; reg = <0x58>; From cd29cc8ad2540a4f9a0a3e174394d39e648ef941 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:39 +0000 Subject: [PATCH 243/361] riscv: dts: sifive unmatched: Expose the PMIC sub-functions These sub-functions are available in the chip revision on this board, so expose them. Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index d1f2289e529be..91b3e76b2bb2a 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -76,6 +76,18 @@ interrupts = <1 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + onkey { + compatible = "dlg,da9063-onkey"; + }; + + rtc { + compatible = "dlg,da9063-rtc"; + }; + + wdt { + compatible = "dlg,da9063-watchdog"; + }; + regulators { vdd_bcore1: bcore1 { regulator-min-microvolt = <900000>; From ad931d9b3b2e21586de8e6b34346d0a30c13721d Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:41 +0000 Subject: [PATCH 244/361] riscv: dts: sifive unmatched: Fix regulator for board rev3 The existing values are rejected by the da9063 regulator driver, as they are unachievable with the declared chip setup (non-merged vcore and bmem are unable to provide the declared curent). Fix voltages to match rev3 schematics, which also matches their boot-up configuration within the chip's available precision. Declare bcore1/bcore2 and bmem/bio as merged. Set ldo09 and ldo10 as always-on as their consumers are not declared but exist. Drop ldo current limits as there is no current limit feature for these regulators in the DA9063. Fixes warnings like: DA9063_LDO3: Operation of current configuration missing Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- .../boot/dts/sifive/hifive-unmatched-a00.dts | 84 ++++++------------- 1 file changed, 24 insertions(+), 60 deletions(-) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 91b3e76b2bb2a..58de5a312fc99 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -89,47 +89,31 @@ }; regulators { - vdd_bcore1: bcore1 { - regulator-min-microvolt = <900000>; - regulator-max-microvolt = <900000>; - regulator-min-microamp = <5000000>; - regulator-max-microamp = <5000000>; - regulator-always-on; - }; - - vdd_bcore2: bcore2 { - regulator-min-microvolt = <900000>; - regulator-max-microvolt = <900000>; - regulator-min-microamp = <5000000>; - regulator-max-microamp = <5000000>; + vdd_bcore: bcores-merged { + regulator-min-microvolt = <1050000>; + regulator-max-microvolt = <1050000>; + regulator-min-microamp = <4800000>; + regulator-max-microamp = <4800000>; regulator-always-on; }; vdd_bpro: bpro { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <2500000>; - regulator-max-microamp = <2500000>; + regulator-min-microamp = <2400000>; + regulator-max-microamp = <2400000>; regulator-always-on; }; vdd_bperi: bperi { - regulator-min-microvolt = <1050000>; - regulator-max-microvolt = <1050000>; + regulator-min-microvolt = <1060000>; + regulator-max-microvolt = <1060000>; regulator-min-microamp = <1500000>; regulator-max-microamp = <1500000>; regulator-always-on; }; - vdd_bmem: bmem { - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; - regulator-min-microamp = <3000000>; - regulator-max-microamp = <3000000>; - regulator-always-on; - }; - - vdd_bio: bio { + vdd_bmem_bio: bmem-bio-merged { regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-min-microamp = <3000000>; @@ -140,86 +124,66 @@ vdd_ldo1: ldo1 { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <100000>; - regulator-max-microamp = <100000>; regulator-always-on; }; vdd_ldo2: ldo2 { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; regulator-always-on; }; vdd_ldo3: ldo3 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo4: ldo4 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <2500000>; regulator-always-on; }; vdd_ldo5: ldo5 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <100000>; - regulator-max-microamp = <100000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo6: ldo6 { - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; regulator-always-on; }; vdd_ldo7: ldo7 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo8: ldo8 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ld09: ldo9 { regulator-min-microvolt = <1050000>; regulator-max-microvolt = <1050000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-always-on; }; vdd_ldo10: ldo10 { regulator-min-microvolt = <1000000>; regulator-max-microvolt = <1000000>; - regulator-min-microamp = <300000>; - regulator-max-microamp = <300000>; + regulator-always-on; }; vdd_ldo11: ldo11 { regulator-min-microvolt = <2500000>; regulator-max-microvolt = <2500000>; - regulator-min-microamp = <300000>; - regulator-max-microamp = <300000>; regulator-always-on; }; }; From f6f7fbb89bf8dc9132fde55cfe67483138eea880 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:42 +0000 Subject: [PATCH 245/361] riscv: dts: sifive unmatched: Link the tmp451 with its power supply Fixes the following probe warning: lm90 0-004c: Looking up vcc-supply from device tree lm90 0-004c: Looking up vcc-supply property in node /soc/i2c@10030000/temperature-sensor@4c failed lm90 0-004c: supply vcc not found, using dummy regulator Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 58de5a312fc99..6bfa1f24d3deb 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -55,6 +55,7 @@ temperature-sensor@4c { compatible = "ti,tmp451"; reg = <0x4c>; + vcc-supply = <&vdd_bpro>; interrupt-parent = <&gpio>; interrupts = <6 IRQ_TYPE_LEVEL_LOW>; }; From 8ffea2599f63fdbee968b894eab78170abf3ec2c Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 17 Dec 2021 15:15:45 +0900 Subject: [PATCH 246/361] zonefs: add MODULE_ALIAS_FS Add MODULE_ALIAS_FS() to load the module automatically when you do "mount -t zonefs". Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Cc: stable # 5.6+ Signed-off-by: Naohiro Aota Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 259ee2bda4926..b76dfb310ab65 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1787,5 +1787,6 @@ static void __exit zonefs_exit(void) MODULE_AUTHOR("Damien Le Moal"); MODULE_DESCRIPTION("Zone file system for zoned block devices"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_FS("zonefs"); module_init(zonefs_init); module_exit(zonefs_exit); From bce472f90952cc8be03dded25c4aa109d27e5924 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 17 Dec 2021 16:41:17 +0900 Subject: [PATCH 247/361] MAITAINERS: Change zonefs maintainer email address Update my email address from damien.lemoal@wdc.com to damien.lemoal@opensource.wdc.com. Signed-off-by: Damien Le Moal --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 13f9a84a617e3..d01ae22c55f81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21059,7 +21059,7 @@ S: Maintained F: arch/x86/kernel/cpu/zhaoxin.c ZONEFS FILESYSTEM -M: Damien Le Moal +M: Damien Le Moal M: Naohiro Aota R: Johannes Thumshirn L: linux-fsdevel@vger.kernel.org From b62e3317b68d9c84301940ca8ca9c35a584111b2 Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Thu, 16 Dec 2021 23:19:16 +0800 Subject: [PATCH 248/361] net: fix typo in a comment The double 'as' in a comment is repeated, thus it should be removed. Signed-off-by: Xiang wangx Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index be5cb3360b944..6aadcc0ecb5b0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1937,7 +1937,7 @@ enum netdev_ml_priv_type { * @udp_tunnel_nic: UDP tunnel offload state * @xdp_state: stores info on attached XDP BPF programs * - * @nested_level: Used as as a parameter of spin_lock_nested() of + * @nested_level: Used as a parameter of spin_lock_nested() of * dev->addr_list_lock. * @unlink_list: As netif_addr_lock() can be called recursively, * keep a list of interfaces to be deleted. From 7202216a6f34d571a22274e729f841256bf8b1ef Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 25 Nov 2021 12:05:19 +0100 Subject: [PATCH 249/361] ARM: 9160/1: NOMMU: Reload __secondary_data after PROCINFO_INITFUNC __secondary_data used to reside in r7 around call to PROCINFO_INITFUNC. After commit 95731b8ee63e ("ARM: 9059/1: cache-v7: get rid of mini-stack") r7 is used as a scratch register, so we have to reload __secondary_data before we setup the stack pointer. Fixes: 95731b8ee63e ("ARM: 9059/1: cache-v7: get rid of mini-stack") Signed-off-by: Vladimir Murzin Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/head-nommu.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index fadfee9e2b45e..950bef83339f5 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -114,6 +114,7 @@ ENTRY(secondary_startup) add r12, r12, r10 ret r12 1: bl __after_proc_init + ldr r7, __secondary_data @ reload r7 ldr sp, [r7, #12] @ set up the stack pointer ldr r0, [r7, #16] @ set up task pointer mov fp, #0 From 8536a5ef886005bc443c2da9b842d69fd3d7647f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Dec 2021 09:31:36 +0100 Subject: [PATCH 250/361] ARM: 9169/1: entry: fix Thumb2 bug in iWMMXt exception handling The Thumb2 version of the FP exception handling entry code treats the register holding the CP number (R8) differently, resulting in the iWMMXT CP number check to be incorrect. Fix this by unifying the ARM and Thumb2 code paths, and switch the order of the additions of the TI_USED_CP offset and the shifted CP index. Cc: Fixes: b86040a59feb ("Thumb-2: Implementation of the unified start-up and exceptions code") Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/entry-armv.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index deff286eb5ea0..5cd057859fe90 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -596,11 +596,9 @@ call_fpe: tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 reteq lr and r8, r0, #0x00000f00 @ mask out CP number - THUMB( lsr r8, r8, #8 ) mov r7, #1 - add r6, r10, #TI_USED_CP - ARM( strb r7, [r6, r8, lsr #8] ) @ set appropriate used_cp[] - THUMB( strb r7, [r6, r8] ) @ set appropriate used_cp[] + add r6, r10, r8, lsr #8 @ add used_cp[] array offset first + strb r7, [r6, #TI_USED_CP] @ set appropriate used_cp[] #ifdef CONFIG_IWMMXT @ Test if we need to give access to iWMMXt coprocessors ldr r5, [r10, #TI_FLAGS] @@ -609,7 +607,7 @@ call_fpe: bcs iwmmxt_task_enable #endif ARM( add pc, pc, r8, lsr #6 ) - THUMB( lsl r8, r8, #2 ) + THUMB( lsr r8, r8, #6 ) THUMB( add pc, r8 ) nop From c4d936efa46d8ea183df16c0f3fa4423327da51d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 Dec 2021 16:24:30 +0100 Subject: [PATCH 251/361] Revert "usb: early: convert to readl_poll_timeout_atomic()" This reverts commit 796eed4b2342c9d6b26c958e92af91253a2390e1. This change causes boot lockups when using "arlyprintk=xdbc" because ktime can not be used at this point in time in the boot process. Also, it is not needed for very small delays like this. Reported-by: Mathias Nyman Reported-by: Peter Zijlstra Cc: Jann Horn Cc: Chunfeng Yun Fixes: 796eed4b2342 ("usb: early: convert to readl_poll_timeout_atomic()") Link: https://lore.kernel.org/r/c2b5c9bb-1b75-bf56-3754-b5b18812d65e@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/early/xhci-dbc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c index 933d77ad0a642..4502108069cd9 100644 --- a/drivers/usb/early/xhci-dbc.c +++ b/drivers/usb/early/xhci-dbc.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -136,9 +135,17 @@ static int handshake(void __iomem *ptr, u32 mask, u32 done, int wait, int delay) { u32 result; - return readl_poll_timeout_atomic(ptr, result, - ((result & mask) == done), - delay, wait); + /* Can not use readl_poll_timeout_atomic() for early boot things */ + do { + result = readl(ptr); + result &= mask; + if (result == done) + return 0; + udelay(delay); + wait -= delay; + } while (wait > 0); + + return -ETIMEDOUT; } static void __init xdbc_bios_handoff(void) From 544e737dea5ad1a457f25dbddf68761ff25e028b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 16 Dec 2021 20:30:18 +0100 Subject: [PATCH 252/361] PM: sleep: Fix error handling in dpm_prepare() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 2aa36604e824 ("PM: sleep: Avoid calling put_device() under dpm_list_mtx") forgot to update the while () loop termination condition to also break the loop if error is nonzero, which causes the loop to become infinite if device_prepare() returns an error for one device. Add the missing !error check. Fixes: 2aa36604e824 ("PM: sleep: Avoid calling put_device() under dpm_list_mtx") Signed-off-by: Rafael J. Wysocki Reported-by: Thomas Hellström Reviewed-by: Thomas Hellström Reviewed-by: Ulf Hansson Cc: All applicable --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f4d0c555de29b..04ea92cbd9cfd 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1902,7 +1902,7 @@ int dpm_prepare(pm_message_t state) device_block_probing(); mutex_lock(&dpm_list_mtx); - while (!list_empty(&dpm_list)) { + while (!list_empty(&dpm_list) && !error) { struct device *dev = to_device(dpm_list.next); get_device(dev); From afe8a3ba85ec2a6b6849367e25c06a2f8e0ddd05 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 13 Dec 2021 16:31:06 +0100 Subject: [PATCH 253/361] ice: xsk: return xsk buffers back to pool when cleaning the ring Currently we only NULL the xdp_buff pointer in the internal SW ring but we never give it back to the xsk buffer pool. This means that buffers can be leaked out of the buff pool and never be used again. Add missing xsk_buff_free() call to the routine that is supposed to clean the entries that are left in the ring so that these buffers in the umem can be used by other sockets. Also, only go through the space that is actually left to be cleaned instead of a whole ring. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Magnus Karlsson Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index bb9a808472988..8593717a755e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -811,14 +811,14 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi) */ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { - u16 i; - - for (i = 0; i < rx_ring->count; i++) { - struct xdp_buff **xdp = &rx_ring->xdp_buf[i]; + u16 count_mask = rx_ring->count - 1; + u16 ntc = rx_ring->next_to_clean; + u16 ntu = rx_ring->next_to_use; - if (!xdp) - continue; + for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) { + struct xdp_buff **xdp = &rx_ring->xdp_buf[ntc]; + xsk_buff_free(*xdp); *xdp = NULL; } } From 617f3e1b588c802517c236087561c6bcb0b4afd6 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 13 Dec 2021 16:31:07 +0100 Subject: [PATCH 254/361] ice: xsk: allocate separate memory for XDP SW ring Currently, the zero-copy data path is reusing the memory region that was initially allocated for an array of struct ice_rx_buf for its own purposes. This is error prone as it is based on the ice_rx_buf struct always being the same size or bigger than what the zero-copy path needs. There can also be old values present in that array giving rise to errors when the zero-copy path uses it. Fix this by freeing the ice_rx_buf region and allocating a new array for the zero-copy path that has the right length and is initialized to zero. Fixes: 57f7f8b6bc0b ("ice: Use xdp_buf instead of rx_buf for xsk zero-copy") Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 17 ++++++++++++ drivers/net/ethernet/intel/ice/ice_txrx.c | 19 ++++++++----- drivers/net/ethernet/intel/ice/ice_xsk.c | 33 ++++++++++++----------- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 1efc635cc0f5e..fafe020e46eec 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -6,6 +6,18 @@ #include "ice_lib.h" #include "ice_dcb_lib.h" +static bool ice_alloc_rx_buf_zc(struct ice_rx_ring *rx_ring) +{ + rx_ring->xdp_buf = kcalloc(rx_ring->count, sizeof(*rx_ring->xdp_buf), GFP_KERNEL); + return !!rx_ring->xdp_buf; +} + +static bool ice_alloc_rx_buf(struct ice_rx_ring *rx_ring) +{ + rx_ring->rx_buf = kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL); + return !!rx_ring->rx_buf; +} + /** * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI * @qs_cfg: gathered variables needed for PF->VSI queues assignment @@ -492,8 +504,11 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id); + kfree(ring->rx_buf); ring->xsk_pool = ice_xsk_pool(ring); if (ring->xsk_pool) { + if (!ice_alloc_rx_buf_zc(ring)) + return -ENOMEM; xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); ring->rx_buf_len = @@ -508,6 +523,8 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { + if (!ice_alloc_rx_buf(ring)) + return -ENOMEM; if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) /* coverity[check_return] */ xdp_rxq_info_reg(&ring->xdp_rxq, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index bc3ba19dc88f8..dccf09eefc754 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -419,7 +419,10 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) } rx_skip_free: - memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count); + if (rx_ring->xsk_pool) + memset(rx_ring->xdp_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->xdp_buf))); + else + memset(rx_ring->rx_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->rx_buf))); /* Zero out the descriptor ring */ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), @@ -446,8 +449,13 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); rx_ring->xdp_prog = NULL; - devm_kfree(rx_ring->dev, rx_ring->rx_buf); - rx_ring->rx_buf = NULL; + if (rx_ring->xsk_pool) { + kfree(rx_ring->xdp_buf); + rx_ring->xdp_buf = NULL; + } else { + kfree(rx_ring->rx_buf); + rx_ring->rx_buf = NULL; + } if (rx_ring->desc) { size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), @@ -475,8 +483,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) /* warn if we are about to overwrite the pointer */ WARN_ON(rx_ring->rx_buf); rx_ring->rx_buf = - devm_kcalloc(dev, sizeof(*rx_ring->rx_buf), rx_ring->count, - GFP_KERNEL); + kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL); if (!rx_ring->rx_buf) return -ENOMEM; @@ -505,7 +512,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) return 0; err: - devm_kfree(dev, rx_ring->rx_buf); + kfree(rx_ring->rx_buf); rx_ring->rx_buf = NULL; return -ENOMEM; } diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8593717a755e5..c124229d98fe4 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -12,6 +12,11 @@ #include "ice_txrx_lib.h" #include "ice_lib.h" +static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx) +{ + return &rx_ring->xdp_buf[idx]; +} + /** * ice_qp_reset_stats - Resets all stats for rings of given index * @vsi: VSI that contains rings of interest @@ -372,7 +377,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) dma_addr_t dma; rx_desc = ICE_RX_DESC(rx_ring, ntu); - xdp = &rx_ring->xdp_buf[ntu]; + xdp = ice_xdp_buf(rx_ring, ntu); nb_buffs = min_t(u16, count, rx_ring->count - ntu); nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs); @@ -419,19 +424,18 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring) /** * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer * @rx_ring: Rx ring - * @xdp_arr: Pointer to the SW ring of xdp_buff pointers + * @xdp: Pointer to XDP buffer * * This function allocates a new skb from a zero-copy Rx buffer. * * Returns the skb on success, NULL on failure. */ static struct sk_buff * -ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff **xdp_arr) +ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { - struct xdp_buff *xdp = *xdp_arr; + unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; unsigned int metasize = xdp->data - xdp->data_meta; unsigned int datasize = xdp->data_end - xdp->data; - unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; struct sk_buff *skb; skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, @@ -445,7 +449,6 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff **xdp_arr) skb_metadata_set(skb, metasize); xsk_buff_free(xdp); - *xdp_arr = NULL; return skb; } @@ -522,7 +525,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) while (likely(total_rx_packets < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; unsigned int size, xdp_res = 0; - struct xdp_buff **xdp; + struct xdp_buff *xdp; struct sk_buff *skb; u16 stat_err_bits; u16 vlan_tag = 0; @@ -545,18 +548,17 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) if (!size) break; - xdp = &rx_ring->xdp_buf[rx_ring->next_to_clean]; - xsk_buff_set_size(*xdp, size); - xsk_buff_dma_sync_for_cpu(*xdp, rx_ring->xsk_pool); + xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean); + xsk_buff_set_size(xdp, size); + xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool); - xdp_res = ice_run_xdp_zc(rx_ring, *xdp, xdp_prog, xdp_ring); + xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring); if (xdp_res) { if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) xdp_xmit |= xdp_res; else - xsk_buff_free(*xdp); + xsk_buff_free(xdp); - *xdp = NULL; total_rx_bytes += size; total_rx_packets++; cleaned_count++; @@ -816,10 +818,9 @@ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) u16 ntu = rx_ring->next_to_use; for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) { - struct xdp_buff **xdp = &rx_ring->xdp_buf[ntc]; + struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc); - xsk_buff_free(*xdp); - *xdp = NULL; + xsk_buff_free(xdp); } } From 0708b6facb4d165ef22bccddf2dc3e1eb9a12d03 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Dec 2021 16:31:08 +0100 Subject: [PATCH 255/361] ice: remove dead store on XSK hotpath The 'if (ntu == rx_ring->count)' block in ice_alloc_rx_buffers_zc() was previously residing in the loop, but after introducing the batched interface it is used only to wrap-around the NTU descriptor, thus no more need to assign 'xdp'. Fixes: db804cfc21e9 ("ice: Use the xsk batched rx allocation interface") Signed-off-by: Alexander Lobakin Acked-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index c124229d98fe4..27f5f64dcbd6d 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -397,7 +397,6 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) ntu += nb_buffs; if (ntu == rx_ring->count) { rx_desc = ICE_RX_DESC(rx_ring, 0); - xdp = rx_ring->xdp_buf; ntu = 0; } From 8b51a13c37c24c08e488bd58303cb437814f4454 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 13 Dec 2021 16:31:09 +0100 Subject: [PATCH 256/361] ice: xsk: do not clear status_error0 for ntu + nb_buffs descriptor The descriptor that ntu is pointing at when we exit ice_alloc_rx_bufs_zc() should not have its corresponding DD bit cleared as descriptor is not allocated in there and it is not valid for HW usage. The allocation routine at the entry will fill the descriptor that ntu points to after it was set to ntu + nb_buffs on previous call. Even the spec says: "The tail pointer should be set to one descriptor beyond the last empty descriptor in host descriptor ring." Therefore, step away from clearing the status_error0 on ntu + nb_buffs descriptor. Fixes: db804cfc21e9 ("ice: Use the xsk batched rx allocation interface") Reported-by: Elza Mathew Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 27f5f64dcbd6d..ffa9a160766ae 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -395,13 +395,9 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) } ntu += nb_buffs; - if (ntu == rx_ring->count) { - rx_desc = ICE_RX_DESC(rx_ring, 0); + if (ntu == rx_ring->count) ntu = 0; - } - /* clear the status bits for the next_to_use descriptor */ - rx_desc->wb.status_error0 = 0; ice_release_rx_desc(rx_ring, ntu); return count == nb_buffs; From 8bea15ab7485863d900982ee6a0ff6f78b339c77 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 13 Dec 2021 16:31:10 +0100 Subject: [PATCH 257/361] ice: xsk: allow empty Rx descriptors on XSK ZC data path Commit ac6f733a7bd5 ("ice: allow empty Rx descriptors") stated that ice HW can produce empty descriptors that are valid and they should be processed. Add this support to xsk ZC path to avoid potential processing problems. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index ffa9a160766ae..c1491dc0675da 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -538,12 +538,18 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) */ dma_rmb(); + xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean); + size = le16_to_cpu(rx_desc->wb.pkt_len) & ICE_RX_FLX_DESC_PKT_LEN_M; - if (!size) - break; + if (!size) { + xdp->data = NULL; + xdp->data_end = NULL; + xdp->data_hard_start = NULL; + xdp->data_meta = NULL; + goto construct_skb; + } - xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean); xsk_buff_set_size(xdp, size); xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool); @@ -561,7 +567,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) ice_bump_ntc(rx_ring); continue; } - +construct_skb: /* XDP_PASS path */ skb = ice_construct_skb_zc(rx_ring, xdp); if (!skb) { From dcbaf72aa4232a7aa5db5e483972a6fe4ba2b41c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 13 Dec 2021 16:31:11 +0100 Subject: [PATCH 258/361] ice: xsk: fix cleaned_count setting Currently cleaned_count is initialized to ICE_DESC_UNUSED(rx_ring) and later on during the Rx processing it is incremented per each frame that driver consumed. This can result in excessive buffers requested from xsk pool based on that value. To address this, just drop cleaned_count and pass ICE_DESC_UNUSED(rx_ring) directly as a function argument to ice_alloc_rx_bufs_zc(). Idea is to ask for buffers as many as consumed. Let us also call ice_alloc_rx_bufs_zc unconditionally at the end of ice_clean_rx_irq_zc. This has been changed in that way for corresponding ice_clean_rx_irq, but not here. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 - drivers/net/ethernet/intel/ice/ice_xsk.c | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index c56dd17499031..b7b3bd4816f0d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -24,7 +24,6 @@ #define ICE_MAX_DATA_PER_TXD_ALIGNED \ (~(ICE_MAX_READ_REQ_SIZE - 1) & ICE_MAX_DATA_PER_TXD) -#define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */ #define ICE_MAX_TXQ_PER_TXQG 128 /* Attempt to maximize the headroom available for incoming frames. We use a 2K diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index c1491dc0675da..c895351b25e0a 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -505,7 +505,6 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; - u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); struct ice_tx_ring *xdp_ring; unsigned int xdp_xmit = 0; struct bpf_prog *xdp_prog; @@ -562,7 +561,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) total_rx_bytes += size; total_rx_packets++; - cleaned_count++; ice_bump_ntc(rx_ring); continue; @@ -575,7 +573,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) break; } - cleaned_count++; ice_bump_ntc(rx_ring); if (eth_skb_pad(skb)) { @@ -597,8 +594,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) ice_receive_skb(rx_ring, skb, vlan_tag); } - if (cleaned_count >= ICE_RX_BUF_WRITE) - failure = !ice_alloc_rx_bufs_zc(rx_ring, cleaned_count); + failure = !ice_alloc_rx_bufs_zc(rx_ring, ICE_DESC_UNUSED(rx_ring)); ice_finalize_xdp_rx(xdp_ring, xdp_xmit); ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); From 2b5160b12091285c5aca45980f100a9294af7b04 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 17 Dec 2021 12:44:09 -0300 Subject: [PATCH 259/361] ipmi: bail out if init_srcu_struct fails In case, init_srcu_struct fails (because of memory allocation failure), we might proceed with the driver initialization despite srcu_struct not being entirely initialized. Fixes: 913a89f009d9 ("ipmi: Don't initialize anything in the core until something uses it") Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Corey Minyard Cc: stable@vger.kernel.org Message-Id: <20211217154410.1228673-1-cascardo@canonical.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c837d5416e0ee..84975b21fff23 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -5392,7 +5392,9 @@ static int ipmi_init_msghandler(void) if (initialized) goto out; - init_srcu_struct(&ipmi_interfaces_srcu); + rv = init_srcu_struct(&ipmi_interfaces_srcu); + if (rv) + goto out; timer_setup(&ipmi_timer, ipmi_timeout, 0); mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); From 75d70d76cb7b927cace2cb34265d68ebb3306b13 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 17 Dec 2021 12:44:10 -0300 Subject: [PATCH 260/361] ipmi: fix initialization when workqueue allocation fails If the workqueue allocation fails, the driver is marked as not initialized, and timer and panic_notifier will be left registered. Instead of removing those when workqueue allocation fails, do the workqueue initialization before doing it, and cleanup srcu_struct if it fails. Fixes: 1d49eb91e86e ("ipmi: Move remove_work to dedicated workqueue") Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Corey Minyard Cc: Ioanna Alifieraki Cc: stable@vger.kernel.org Message-Id: <20211217154410.1228673-2-cascardo@canonical.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 84975b21fff23..266c7bc58ddae 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -5396,20 +5396,23 @@ static int ipmi_init_msghandler(void) if (rv) goto out; - timer_setup(&ipmi_timer, ipmi_timeout, 0); - mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); - - atomic_notifier_chain_register(&panic_notifier_list, &panic_block); - remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq"); if (!remove_work_wq) { pr_err("unable to create ipmi-msghandler-remove-wq workqueue"); rv = -ENOMEM; - goto out; + goto out_wq; } + timer_setup(&ipmi_timer, ipmi_timeout, 0); + mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); + + atomic_notifier_chain_register(&panic_notifier_list, &panic_block); + initialized = true; +out_wq: + if (rv) + cleanup_srcu_struct(&ipmi_interfaces_srcu); out: mutex_unlock(&ipmi_interfaces_mutex); return rv; From 4e8c11b6b3f0b6a283e898344f154641eda94266 Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Mon, 13 Dec 2021 21:57:27 +0800 Subject: [PATCH 261/361] timekeeping: Really make sure wall_to_monotonic isn't positive Even after commit e1d7ba873555 ("time: Always make sure wall_to_monotonic isn't positive") it is still possible to make wall_to_monotonic positive by running the following code: int main(void) { struct timespec time; clock_gettime(CLOCK_MONOTONIC, &time); time.tv_nsec = 0; clock_settime(CLOCK_REALTIME, &time); return 0; } The reason is that the second parameter of timespec64_compare(), ts_delta, may be unnormalized because the delta is calculated with an open coded substraction which causes the comparison of tv_sec to yield the wrong result: wall_to_monotonic = { .tv_sec = -10, .tv_nsec = 900000000 } ts_delta = { .tv_sec = -9, .tv_nsec = -900000000 } That makes timespec64_compare() claim that wall_to_monotonic < ts_delta, but actually the result should be wall_to_monotonic > ts_delta. After normalization, the result of timespec64_compare() is correct because the tv_sec comparison is not longer misleading: wall_to_monotonic = { .tv_sec = -10, .tv_nsec = 900000000 } ts_delta = { .tv_sec = -10, .tv_nsec = 100000000 } Use timespec64_sub() to ensure that ts_delta is normalized, which fixes the issue. Fixes: e1d7ba873555 ("time: Always make sure wall_to_monotonic isn't positive") Signed-off-by: Yu Liao Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211213135727.1656662-1-liaoyu15@huawei.com --- kernel/time/timekeeping.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b348749a9fc62..dcdcb85121e40 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1306,8 +1306,7 @@ int do_settimeofday64(const struct timespec64 *ts) timekeeping_forward_now(tk); xt = tk_xtime(tk); - ts_delta.tv_sec = ts->tv_sec - xt.tv_sec; - ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec; + ts_delta = timespec64_sub(*ts, xt); if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) { ret = -EINVAL; From b774302e885697dde027825f8de9beb985d037bd Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 8 Dec 2021 16:33:19 +0000 Subject: [PATCH 262/361] cifs: ignore resource_id while getting fscache super cookie We have a cyclic dependency between fscache super cookie and root inode cookie. The super cookie relies on tcon->resource_id, which gets populated from the root inode number. However, fetching the root inode initializes inode cookie as a child of super cookie, which is yet to be populated. resource_id is only used as auxdata to check the validity of super cookie. We can completely avoid setting resource_id to remove the circular dependency. Since vol creation time and vol serial numbers are used for auxdata, we should be fine. Additionally, there will be auxiliary data check for each inode cookie as well. Fixes: 5bf91ef03d98 ("cifs: wait for tcon resource_id before getting fscache super") CC: David Howells Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/connect.c | 7 +++++++ fs/cifs/inode.c | 13 ------------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 18448dbd762a8..1060164b984a7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3064,6 +3064,13 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); + /* + * The cookie is initialized from volume info returned above. + * Inside cifs_fscache_get_super_cookie it checks + * that we do not get super cookie twice. + */ + cifs_fscache_get_super_cookie(tcon); + out: mnt_ctx->server = server; mnt_ctx->ses = ses; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 96d083db17372..279622e4eb1c2 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1356,11 +1356,6 @@ struct inode *cifs_root_iget(struct super_block *sb) goto out; } -#ifdef CONFIG_CIFS_FSCACHE - /* populate tcon->resource_id */ - tcon->resource_id = CIFS_I(inode)->uniqueid; -#endif - if (rc && tcon->pipe) { cifs_dbg(FYI, "ipc connection - fake read inode\n"); spin_lock(&inode->i_lock); @@ -1375,14 +1370,6 @@ struct inode *cifs_root_iget(struct super_block *sb) iget_failed(inode); inode = ERR_PTR(rc); } - - /* - * The cookie is initialized from volume info returned above. - * Inside cifs_fscache_get_super_cookie it checks - * that we do not get super cookie twice. - */ - cifs_fscache_get_super_cookie(tcon); - out: kfree(path); free_xid(xid); From a31080899d5fdafcccf7f39dd214a814a2c82626 Mon Sep 17 00:00:00 2001 From: Thiago Rafael Becker Date: Fri, 17 Dec 2021 15:20:22 -0300 Subject: [PATCH 263/361] cifs: sanitize multiple delimiters in prepath mount.cifs can pass a device with multiple delimiters in it. This will cause rename(2) to fail with ENOENT. V2: - Make sanitize_path more readable. - Fix multiple delimiters between UNC and prepath. - Avoid a memory leak if a bad user starts putting a lot of delimiters in the path on purpose. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=2031200 Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Cc: stable@vger.kernel.org # 5.11+ Acked-by: Ronnie Sahlberg Signed-off-by: Thiago Rafael Becker Signed-off-by: Steve French --- fs/cifs/fs_context.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 6a179ae753c11..e3ed25dc6f3f6 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -434,6 +434,42 @@ int smb3_parse_opt(const char *options, const char *key, char **val) return rc; } +/* + * Remove duplicate path delimiters. Windows is supposed to do that + * but there are some bugs that prevent rename from working if there are + * multiple delimiters. + * + * Returns a sanitized duplicate of @path. The caller is responsible for + * cleaning up the original. + */ +#define IS_DELIM(c) ((c) == '/' || (c) == '\\') +static char *sanitize_path(char *path) +{ + char *cursor1 = path, *cursor2 = path; + + /* skip all prepended delimiters */ + while (IS_DELIM(*cursor1)) + cursor1++; + + /* copy the first letter */ + *cursor2 = *cursor1; + + /* copy the remainder... */ + while (*(cursor1++)) { + /* ... skipping all duplicated delimiters */ + if (IS_DELIM(*cursor1) && IS_DELIM(*cursor2)) + continue; + *(++cursor2) = *cursor1; + } + + /* if the last character is a delimiter, skip it */ + if (IS_DELIM(*(cursor2 - 1))) + cursor2--; + + *(cursor2) = '\0'; + return kstrdup(path, GFP_KERNEL); +} + /* * Parse a devname into substrings and populate the ctx->UNC and ctx->prepath * fields with the result. Returns 0 on success and an error otherwise @@ -493,7 +529,7 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) if (!*pos) return 0; - ctx->prepath = kstrdup(pos, GFP_KERNEL); + ctx->prepath = sanitize_path(pos); if (!ctx->prepath) return -ENOMEM; From 83912d6d55be10d65b5268d1871168b9ebe1ec4b Mon Sep 17 00:00:00 2001 From: Marcos Del Sol Vives Date: Thu, 16 Dec 2021 11:37:22 +0100 Subject: [PATCH 264/361] ksmbd: disable SMB2_GLOBAL_CAP_ENCRYPTION for SMB 3.1.1 According to the official Microsoft MS-SMB2 document section 3.3.5.4, this flag should be used only for 3.0 and 3.0.2 dialects. Setting it for 3.1.1 is a violation of the specification. This causes my Windows 10 client to detect an anomaly in the negotiation, and disable encryption entirely despite being explicitly enabled in ksmbd, causing all data transfers to go in plain text. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org # v5.15 Acked-by: Namjae Jeon Signed-off-by: Marcos Del Sol Vives Signed-off-by: Steve French --- fs/ksmbd/smb2ops.c | 3 --- fs/ksmbd/smb2pdu.c | 25 +++++++++++++++++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c index 0a5d8450e835f..02a44d28bdafc 100644 --- a/fs/ksmbd/smb2ops.c +++ b/fs/ksmbd/smb2ops.c @@ -271,9 +271,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; - if (conn->cipher_type) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 125590d5e9402..b8b3a4c28b749 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -915,6 +915,25 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn, } } +/** + * smb3_encryption_negotiated() - checks if server and client agreed on enabling encryption + * @conn: smb connection + * + * Return: true if connection should be encrypted, else false + */ +static bool smb3_encryption_negotiated(struct ksmbd_conn *conn) +{ + if (!conn->ops->generate_encryptionkey) + return false; + + /* + * SMB 3.0 and 3.0.2 dialects use the SMB2_GLOBAL_CAP_ENCRYPTION flag. + * SMB 3.1.1 uses the cipher_type field. + */ + return (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) || + conn->cipher_type; +} + static void decode_compress_ctxt(struct ksmbd_conn *conn, struct smb2_compression_capabilities_context *pneg_ctxt) { @@ -1469,8 +1488,7 @@ static int ntlm_authenticate(struct ksmbd_work *work) (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) sess->sign = true; - if (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION && - conn->ops->generate_encryptionkey && + if (smb3_encryption_negotiated(conn) && !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { rc = conn->ops->generate_encryptionkey(sess); if (rc) { @@ -1559,8 +1577,7 @@ static int krb5_authenticate(struct ksmbd_work *work) (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) sess->sign = true; - if ((conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) && - conn->ops->generate_encryptionkey) { + if (smb3_encryption_negotiated(conn)) { retval = conn->ops->generate_encryptionkey(sess); if (retval) { ksmbd_debug(SMB, From ec624fe740b416fb68d536b37fb8eef46f90b5c2 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:33 +0200 Subject: [PATCH 265/361] net/sched: Extend qdisc control block with tc control block BPF layer extends the qdisc control block via struct bpf_skb_data_end and because of that there is no more room to add variables to the qdisc layer control block without going over the skb->cb size. Extend the qdisc control block with a tc control block, and move all tc related variables to there as a pre-step for extending the tc control block with additional members. Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/net/pkt_sched.h | 15 +++++++++++++++ include/net/sch_generic.h | 2 -- net/core/dev.c | 8 ++++---- net/sched/act_ct.c | 14 +++++++------- net/sched/cls_api.c | 6 ++++-- net/sched/cls_flower.c | 3 ++- net/sched/sch_frag.c | 3 ++- 7 files changed, 34 insertions(+), 17 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index bf79f3a890af2..05f18e81f3e87 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -193,4 +193,19 @@ static inline void skb_txtime_consumed(struct sk_buff *skb) skb->tstamp = ktime_set(0, 0); } +struct tc_skb_cb { + struct qdisc_skb_cb qdisc_cb; + + u16 mru; + bool post_ct; +}; + +static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) +{ + struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; + + BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); + return cb; +} + #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 22179b2fda72a..c70e6d2b2fdd6 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -447,8 +447,6 @@ struct qdisc_skb_cb { }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; - u16 mru; - bool post_ct; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); diff --git a/net/core/dev.c b/net/core/dev.c index 2a352e668d103..c4708e2487fb6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3941,8 +3941,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) return skb; /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ - qdisc_skb_cb(skb)->mru = 0; - qdisc_skb_cb(skb)->post_ct = false; + tc_skb_cb(skb)->mru = 0; + tc_skb_cb(skb)->post_ct = false; mini_qdisc_bstats_cpu_update(miniq, skb); switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) { @@ -5103,8 +5103,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, } qdisc_skb_cb(skb)->pkt_len = skb->len; - qdisc_skb_cb(skb)->mru = 0; - qdisc_skb_cb(skb)->post_ct = false; + tc_skb_cb(skb)->mru = 0; + tc_skb_cb(skb)->post_ct = false; skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 90866ae45573a..98e248b9c0b17 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, u8 family, u16 zone, bool *defrag) { enum ip_conntrack_info ctinfo; - struct qdisc_skb_cb cb; struct nf_conn *ct; int err = 0; bool frag; + u16 mru; /* Previously seen (loopback)? Ignore. */ ct = nf_ct_get(skb, &ctinfo); @@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, return err; skb_get(skb); - cb = *qdisc_skb_cb(skb); + mru = tc_skb_cb(skb)->mru; if (family == NFPROTO_IPV4) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; @@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (!err) { *defrag = true; - cb.mru = IPCB(skb)->frag_max_size; + mru = IPCB(skb)->frag_max_size; } } else { /* NFPROTO_IPV6 */ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) @@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (!err) { *defrag = true; - cb.mru = IP6CB(skb)->frag_max_size; + mru = IP6CB(skb)->frag_max_size; } #else err = -EOPNOTSUPP; @@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, } if (err != -EINPROGRESS) - *qdisc_skb_cb(skb) = cb; + tc_skb_cb(skb)->mru = mru; skb_clear_hash(skb); skb->ignore_df = 1; return err; @@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, tcf_action_update_bstats(&c->common, skb); if (clear) { - qdisc_skb_cb(skb)->post_ct = false; + tc_skb_cb(skb)->post_ct = false; ct = nf_ct_get(skb, &ctinfo); if (ct) { nf_conntrack_put(&ct->ct_general); @@ -1048,7 +1048,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, out_push: skb_push_rcsum(skb, nh_ofs); - qdisc_skb_cb(skb)->post_ct = true; + tc_skb_cb(skb)->post_ct = true; out_clear: if (defrag) qdisc_skb_cb(skb)->pkt_len = skb->len; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e54f0a42270c1..ff8a9383bf1c4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1617,12 +1617,14 @@ int tcf_classify(struct sk_buff *skb, /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { + struct tc_skb_cb *cb = tc_skb_cb(skb); + ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) return TC_ACT_SHOT; ext->chain = last_executed_chain; - ext->mru = qdisc_skb_cb(skb)->mru; - ext->post_ct = qdisc_skb_cb(skb)->post_ct; + ext->mru = cb->mru; + ext->post_ct = cb->post_ct; } return ret; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index aab13ba117672..9782b93db1b34 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -309,7 +310,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_fl_head *head = rcu_dereference_bh(tp->root); - bool post_ct = qdisc_skb_cb(skb)->post_ct; + bool post_ct = tc_skb_cb(skb)->post_ct; struct fl_flow_key skb_key; struct fl_flow_mask *mask; struct cls_fl_filter *f; diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index 8c06381391d6f..5ded4c8672a64 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB #include #include +#include #include #include #include @@ -137,7 +138,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)) { - u16 mru = qdisc_skb_cb(skb)->mru; + u16 mru = tc_skb_cb(skb)->mru; int err; if (mru && skb->len > mru + skb->dev->hard_header_len) From 3849595866166b23bf6a0cb9ff87e06423167f67 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:34 +0200 Subject: [PATCH 266/361] net/sched: flow_dissector: Fix matching on zone id for invalid conns If ct rejects a flow, it removes the conntrack info from the skb. act_ct sets the post_ct variable so the dissector will see this case as an +tracked +invalid state, but the zone id is lost with the conntrack info. To restore the zone id on such cases, set the last executed zone, via the tc control block, when passing ct, and read it back in the dissector if there is no ct info on the skb (invalid connection). Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support") Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- include/net/pkt_sched.h | 1 + net/core/flow_dissector.c | 3 ++- net/sched/act_ct.c | 1 + net/sched/cls_flower.c | 3 ++- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c8cb7e697d479..2ecf8cfd22231 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1380,7 +1380,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, size_t mapsize, - bool post_ct); + bool post_ct, u16 zone); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 05f18e81f3e87..9e71691c491b7 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -198,6 +198,7 @@ struct tc_skb_cb { u16 mru; bool post_ct; + u16 zone; /* Only valid if post_ct = true */ }; static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3255f57f5131a..1b094c481f1d0 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -238,7 +238,7 @@ void skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, - size_t mapsize, bool post_ct) + size_t mapsize, bool post_ct, u16 zone) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) struct flow_dissector_key_ct *key; @@ -260,6 +260,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, if (!ct) { key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_INVALID; + key->ct_zone = zone; return; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 98e248b9c0b17..ab3591408419f 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1049,6 +1049,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, skb_push_rcsum(skb, nh_ofs); tc_skb_cb(skb)->post_ct = true; + tc_skb_cb(skb)->zone = p->zone; out_clear: if (defrag) qdisc_skb_cb(skb)->pkt_len = skb->len; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9782b93db1b34..ef54ed3958742 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -311,6 +311,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, { struct cls_fl_head *head = rcu_dereference_bh(tp->root); bool post_ct = tc_skb_cb(skb)->post_ct; + u16 zone = tc_skb_cb(skb)->zone; struct fl_flow_key skb_key; struct fl_flow_mask *mask; struct cls_fl_filter *f; @@ -328,7 +329,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, fl_ct_info_to_flower_map, ARRAY_SIZE(fl_ct_info_to_flower_map), - post_ct); + post_ct, zone); skb_flow_dissect_hash(skb, &mask->dissector, &skb_key); skb_flow_dissect(skb, &mask->dissector, &skb_key, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); From 635d448a1cce4b4ebee52b351052c70434fa90ea Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:35 +0200 Subject: [PATCH 267/361] net: openvswitch: Fix matching zone id for invalid conns arriving from tc Zone id is not restored if we passed ct and ct rejected the connection, as there is no ct info on the skb. Save the zone from tc skb cb to tc skb extension and pass it on to ovs, use that info to restore the zone id for invalid connections. Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct") Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + net/openvswitch/flow.c | 8 +++++++- net/sched/cls_api.c | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2ecf8cfd22231..4507d77d6941f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -286,6 +286,7 @@ struct nf_bridge_info { struct tc_skb_ext { __u32 chain; __u16 mru; + __u16 zone; bool post_ct; }; #endif diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9713035b89e3a..6d262d9aa10ea 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "conntrack.h" #include "datapath.h" @@ -860,6 +861,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, #endif bool post_ct = false; int res, err; + u16 zone = 0; /* Extract metadata from packet. */ if (tun_info) { @@ -898,6 +900,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->recirc_id = tc_ext ? tc_ext->chain : 0; OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; post_ct = tc_ext ? tc_ext->post_ct : false; + zone = post_ct ? tc_ext->zone : 0; } else { key->recirc_id = 0; } @@ -906,8 +909,11 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, #endif err = key_extract(skb, key); - if (!err) + if (!err) { ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */ + if (post_ct && !skb_get_nfct(skb)) + key->ct_zone = zone; + } return err; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ff8a9383bf1c4..35c74bdde848e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1625,6 +1625,7 @@ int tcf_classify(struct sk_buff *skb, ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; + ext->zone = cb->zone; } return ret; From 1488fc204568f707fe2a42a913788c00a95af30e Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Fri, 17 Dec 2021 01:07:40 +0100 Subject: [PATCH 268/361] net: lantiq_xrx200: increase buffer reservation If the user sets a lower mtu on the CPU port than on the switch, then DMA inserts a few more bytes into the buffer than expected. In the worst case, it may exceed the size of the buffer. The experiments showed that the buffer should be a multiple of the burst length value. This patch rounds the length of the rx buffer upwards and fixes this bug. The reservation of FCS space in the buffer has been removed as PMAC strips the FCS. Fixes: 998ac358019e ("net: lantiq: add support for jumbo frames") Reported-by: Thomas Nixon Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/lantiq_xrx200.c | 34 ++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 0da09ea819809..96bd6f2b21ed9 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -71,6 +71,8 @@ struct xrx200_priv { struct xrx200_chan chan_tx; struct xrx200_chan chan_rx; + u16 rx_buf_size; + struct net_device *net_dev; struct device *dev; @@ -97,6 +99,16 @@ static void xrx200_pmac_mask(struct xrx200_priv *priv, u32 clear, u32 set, xrx200_pmac_w32(priv, val, offset); } +static int xrx200_max_frame_len(int mtu) +{ + return VLAN_ETH_HLEN + mtu; +} + +static int xrx200_buffer_size(int mtu) +{ + return round_up(xrx200_max_frame_len(mtu), 4 * XRX200_DMA_BURST_LEN); +} + /* drop all the packets from the DMA ring */ static void xrx200_flush_dma(struct xrx200_chan *ch) { @@ -109,8 +121,7 @@ static void xrx200_flush_dma(struct xrx200_chan *ch) break; desc->ctl = LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) | - (ch->priv->net_dev->mtu + VLAN_ETH_HLEN + - ETH_FCS_LEN); + ch->priv->rx_buf_size; ch->dma.desc++; ch->dma.desc %= LTQ_DESC_NUM; } @@ -158,21 +169,21 @@ static int xrx200_close(struct net_device *net_dev) static int xrx200_alloc_skb(struct xrx200_chan *ch) { - int len = ch->priv->net_dev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; struct sk_buff *skb = ch->skb[ch->dma.desc]; + struct xrx200_priv *priv = ch->priv; dma_addr_t mapping; int ret = 0; - ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(ch->priv->net_dev, - len); + ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(priv->net_dev, + priv->rx_buf_size); if (!ch->skb[ch->dma.desc]) { ret = -ENOMEM; goto skip; } - mapping = dma_map_single(ch->priv->dev, ch->skb[ch->dma.desc]->data, - len, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) { + mapping = dma_map_single(priv->dev, ch->skb[ch->dma.desc]->data, + priv->rx_buf_size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(priv->dev, mapping))) { dev_kfree_skb_any(ch->skb[ch->dma.desc]); ch->skb[ch->dma.desc] = skb; ret = -ENOMEM; @@ -184,7 +195,7 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch) wmb(); skip: ch->dma.desc_base[ch->dma.desc].ctl = - LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) | len; + LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) | priv->rx_buf_size; return ret; } @@ -356,6 +367,7 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu) int ret = 0; net_dev->mtu = new_mtu; + priv->rx_buf_size = xrx200_buffer_size(new_mtu); if (new_mtu <= old_mtu) return ret; @@ -375,6 +387,7 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu) ret = xrx200_alloc_skb(ch_rx); if (ret) { net_dev->mtu = old_mtu; + priv->rx_buf_size = xrx200_buffer_size(old_mtu); break; } dev_kfree_skb_any(skb); @@ -505,7 +518,8 @@ static int xrx200_probe(struct platform_device *pdev) net_dev->netdev_ops = &xrx200_netdev_ops; SET_NETDEV_DEV(net_dev, dev); net_dev->min_mtu = ETH_ZLEN; - net_dev->max_mtu = XRX200_DMA_DATA_LEN - VLAN_ETH_HLEN - ETH_FCS_LEN; + net_dev->max_mtu = XRX200_DMA_DATA_LEN - xrx200_max_frame_len(0); + priv->rx_buf_size = xrx200_buffer_size(ETH_DATA_LEN); /* load the memory ranges */ priv->pmac_reg = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); From f845fe5819efc4111c456c102f15db6d9ed3406e Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 17 Dec 2021 10:00:59 +0700 Subject: [PATCH 269/361] Revert "tipc: use consistent GFP flags" This reverts commit 86c3a3e964d910a62eeb277d60b2a60ebefa9feb. The tipc_aead_init() function can be calling from an interrupt routine. This allocation might sleep with GFP_KERNEL flag, hence the following BUG is reported. [ 17.657509] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:230 [ 17.660916] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/3 [ 17.664093] preempt_count: 302, expected: 0 [ 17.665619] RCU nest depth: 2, expected: 0 [ 17.667163] Preemption disabled at: [ 17.667165] [<0000000000000000>] 0x0 [ 17.669753] CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Tainted: G W 5.16.0-rc4+ #1 [ 17.673006] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 [ 17.675540] Call Trace: [ 17.676285] [ 17.676913] dump_stack_lvl+0x34/0x44 [ 17.678033] __might_resched.cold+0xd6/0x10f [ 17.679311] kmem_cache_alloc_trace+0x14d/0x220 [ 17.680663] tipc_crypto_start+0x4a/0x2b0 [tipc] [ 17.682146] ? kmem_cache_alloc_trace+0xd3/0x220 [ 17.683545] tipc_node_create+0x2f0/0x790 [tipc] [ 17.684956] tipc_node_check_dest+0x72/0x680 [tipc] [ 17.686706] ? ___cache_free+0x31/0x350 [ 17.688008] ? skb_release_data+0x128/0x140 [ 17.689431] tipc_disc_rcv+0x479/0x510 [tipc] [ 17.690904] tipc_rcv+0x71c/0x730 [tipc] [ 17.692219] ? __netif_receive_skb_core+0xb7/0xf60 [ 17.693856] tipc_l2_rcv_msg+0x5e/0x90 [tipc] [ 17.695333] __netif_receive_skb_list_core+0x20b/0x260 [ 17.697072] netif_receive_skb_list_internal+0x1bf/0x2e0 [ 17.698870] ? dev_gro_receive+0x4c2/0x680 [ 17.700255] napi_complete_done+0x6f/0x180 [ 17.701657] virtnet_poll+0x29c/0x42e [virtio_net] [ 17.703262] __napi_poll+0x2c/0x170 [ 17.704429] net_rx_action+0x22f/0x280 [ 17.705706] __do_softirq+0xfd/0x30a [ 17.706921] common_interrupt+0xa4/0xc0 [ 17.708206] [ 17.708922] [ 17.709651] asm_common_interrupt+0x1e/0x40 [ 17.711078] RIP: 0010:default_idle+0x18/0x20 Fixes: 86c3a3e964d9 ("tipc: use consistent GFP flags") Acked-by: Jon Maloy Signed-off-by: Hoang Le Link: https://lore.kernel.org/r/20211217030059.5947-1-hoang.h.le@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/crypto.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index b4d9419a015b1..d293614d5fc65 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -524,7 +524,7 @@ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, return -EEXIST; /* Allocate a new AEAD */ - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (unlikely(!tmp)) return -ENOMEM; @@ -1474,7 +1474,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, return -EEXIST; /* Allocate crypto */ - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return -ENOMEM; @@ -1488,7 +1488,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, } /* Allocate statistic structure */ - c->stats = alloc_percpu(struct tipc_crypto_stats); + c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); if (!c->stats) { if (c->wq) destroy_workqueue(c->wq); @@ -2461,7 +2461,7 @@ static void tipc_crypto_work_tx(struct work_struct *work) } /* Lets duplicate it first */ - skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_KERNEL); + skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC); rcu_read_unlock(); /* Now, generate new key, initiate & distribute it */ From 8b681bd7c301c423fbe97a6b23388a2180ff04ca Mon Sep 17 00:00:00 2001 From: Yevhen Orlov Date: Thu, 16 Dec 2021 19:07:36 +0200 Subject: [PATCH 270/361] net: marvell: prestera: fix incorrect return of port_find In case, when some ports is in list and we don't find requested - we return last iterator state and not return NULL as expected. Fixes: 501ef3066c89 ("net: marvell: prestera: Add driver for Prestera family ASIC devices") Signed-off-by: Yevhen Orlov Link: https://lore.kernel.org/r/20211216170736.8851-1-yevhen.orlov@plvision.eu Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/prestera/prestera_main.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 4369a3ffad45b..6c24375ad9cf8 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -54,12 +54,14 @@ int prestera_port_pvid_set(struct prestera_port *port, u16 vid) struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw, u32 dev_id, u32 hw_id) { - struct prestera_port *port = NULL; + struct prestera_port *port = NULL, *tmp; read_lock(&sw->port_list_lock); - list_for_each_entry(port, &sw->port_list, list) { - if (port->dev_id == dev_id && port->hw_id == hw_id) + list_for_each_entry(tmp, &sw->port_list, list) { + if (tmp->dev_id == dev_id && tmp->hw_id == hw_id) { + port = tmp; break; + } } read_unlock(&sw->port_list_lock); @@ -68,12 +70,14 @@ struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw, struct prestera_port *prestera_find_port(struct prestera_switch *sw, u32 id) { - struct prestera_port *port = NULL; + struct prestera_port *port = NULL, *tmp; read_lock(&sw->port_list_lock); - list_for_each_entry(port, &sw->port_list, list) { - if (port->id == id) + list_for_each_entry(tmp, &sw->port_list, list) { + if (tmp->id == id) { + port = tmp; break; + } } read_unlock(&sw->port_list_lock); From 2efc2256febf214e7b2bdaa21fe6c3c3146acdcb Mon Sep 17 00:00:00 2001 From: Yevhen Orlov Date: Thu, 16 Dec 2021 19:17:14 +0200 Subject: [PATCH 271/361] net: marvell: prestera: fix incorrect structure access In line: upper = info->upper_dev; We access upper_dev field, which is related only for particular events (e.g. event == NETDEV_CHANGEUPPER). So, this line cause invalid memory access for another events, when ptr is not netdev_notifier_changeupper_info. The KASAN logs are as follows: [ 30.123165] BUG: KASAN: stack-out-of-bounds in prestera_netdev_port_event.constprop.0+0x68/0x538 [prestera] [ 30.133336] Read of size 8 at addr ffff80000cf772b0 by task udevd/778 [ 30.139866] [ 30.141398] CPU: 0 PID: 778 Comm: udevd Not tainted 5.16.0-rc3 #6 [ 30.147588] Hardware name: DNI AmazonGo1 A7040 board (DT) [ 30.153056] Call trace: [ 30.155547] dump_backtrace+0x0/0x2c0 [ 30.159320] show_stack+0x18/0x30 [ 30.162729] dump_stack_lvl+0x68/0x84 [ 30.166491] print_address_description.constprop.0+0x74/0x2b8 [ 30.172346] kasan_report+0x1e8/0x250 [ 30.176102] __asan_load8+0x98/0xe0 [ 30.179682] prestera_netdev_port_event.constprop.0+0x68/0x538 [prestera] [ 30.186847] prestera_netdev_event_handler+0x1b4/0x1c0 [prestera] [ 30.193313] raw_notifier_call_chain+0x74/0xa0 [ 30.197860] call_netdevice_notifiers_info+0x68/0xc0 [ 30.202924] register_netdevice+0x3cc/0x760 [ 30.207190] register_netdev+0x24/0x50 [ 30.211015] prestera_device_register+0x8a0/0xba0 [prestera] Fixes: 3d5048cc54bd ("net: marvell: prestera: move netdev topology validation to prestera_main") Signed-off-by: Yevhen Orlov Link: https://lore.kernel.org/r/20211216171714.11341-1-yevhen.orlov@plvision.eu Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/prestera/prestera_main.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 6c24375ad9cf8..c687dc9aa9737 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -768,23 +768,27 @@ static int prestera_netdev_port_event(struct net_device *lower, struct net_device *dev, unsigned long event, void *ptr) { - struct netdev_notifier_changeupper_info *info = ptr; + struct netdev_notifier_info *info = ptr; + struct netdev_notifier_changeupper_info *cu_info; struct prestera_port *port = netdev_priv(dev); struct netlink_ext_ack *extack; struct net_device *upper; - extack = netdev_notifier_info_to_extack(&info->info); - upper = info->upper_dev; + extack = netdev_notifier_info_to_extack(info); + cu_info = container_of(info, + struct netdev_notifier_changeupper_info, + info); switch (event) { case NETDEV_PRECHANGEUPPER: + upper = cu_info->upper_dev; if (!netif_is_bridge_master(upper) && !netif_is_lag_master(upper)) { NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); return -EINVAL; } - if (!info->linking) + if (!cu_info->linking) break; if (netdev_has_any_upper_dev(upper)) { @@ -793,7 +797,7 @@ static int prestera_netdev_port_event(struct net_device *lower, } if (netif_is_lag_master(upper) && - !prestera_lag_master_check(upper, info->upper_info, extack)) + !prestera_lag_master_check(upper, cu_info->upper_info, extack)) return -EOPNOTSUPP; if (netif_is_lag_master(upper) && vlan_uses_dev(dev)) { NL_SET_ERR_MSG_MOD(extack, @@ -809,14 +813,15 @@ static int prestera_netdev_port_event(struct net_device *lower, break; case NETDEV_CHANGEUPPER: + upper = cu_info->upper_dev; if (netif_is_bridge_master(upper)) { - if (info->linking) + if (cu_info->linking) return prestera_bridge_port_join(upper, port, extack); else prestera_bridge_port_leave(upper, port); } else if (netif_is_lag_master(upper)) { - if (info->linking) + if (cu_info->linking) return prestera_lag_port_add(port, upper); else prestera_lag_port_del(port); From 158b515f703e75e7d68289bf4d98c664e1d632df Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Thu, 16 Dec 2021 13:25:32 -0500 Subject: [PATCH 272/361] tun: avoid double free in tun_free_netdev Avoid double free in tun_free_netdev() by moving the dev->tstats and tun->security allocs to a new ndo_init routine (tun_net_init()) that will be called by register_netdevice(). ndo_init is paired with the desctructor (tun_free_netdev()), so if there's an error in register_netdevice() the destructor will handle the frees. BUG: KASAN: double-free or invalid-free in selinux_tun_dev_free_security+0x1a/0x20 security/selinux/hooks.c:5605 CPU: 0 PID: 25750 Comm: syz-executor416 Not tainted 5.16.0-rc2-syzk #1 Hardware name: Red Hat KVM, BIOS Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x89/0xb5 lib/dump_stack.c:106 print_address_description.constprop.9+0x28/0x160 mm/kasan/report.c:247 kasan_report_invalid_free+0x55/0x80 mm/kasan/report.c:372 ____kasan_slab_free mm/kasan/common.c:346 [inline] __kasan_slab_free+0x107/0x120 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:235 [inline] slab_free_hook mm/slub.c:1723 [inline] slab_free_freelist_hook mm/slub.c:1749 [inline] slab_free mm/slub.c:3513 [inline] kfree+0xac/0x2d0 mm/slub.c:4561 selinux_tun_dev_free_security+0x1a/0x20 security/selinux/hooks.c:5605 security_tun_dev_free_security+0x4f/0x90 security/security.c:2342 tun_free_netdev+0xe6/0x150 drivers/net/tun.c:2215 netdev_run_todo+0x4df/0x840 net/core/dev.c:10627 rtnl_unlock+0x13/0x20 net/core/rtnetlink.c:112 __tun_chr_ioctl+0x80c/0x2870 drivers/net/tun.c:3302 tun_chr_ioctl+0x2f/0x40 drivers/net/tun.c:3311 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x19d/0x220 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3a/0x80 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Reported-by: syzkaller Signed-off-by: George Kennedy Suggested-by: Jakub Kicinski Link: https://lore.kernel.org/r/1639679132-19884-1-git-send-email-george.kennedy@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 115 ++++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 1572878c34031..45a67e72a02c6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -209,6 +209,9 @@ struct tun_struct { struct tun_prog __rcu *steering_prog; struct tun_prog __rcu *filter_prog; struct ethtool_link_ksettings link_ksettings; + /* init args */ + struct file *file; + struct ifreq *ifr; }; struct veth { @@ -216,6 +219,9 @@ struct veth { __be16 h_vlan_TCI; }; +static void tun_flow_init(struct tun_struct *tun); +static void tun_flow_uninit(struct tun_struct *tun); + static int tun_napi_receive(struct napi_struct *napi, int budget) { struct tun_file *tfile = container_of(napi, struct tun_file, napi); @@ -953,6 +959,49 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) static const struct ethtool_ops tun_ethtool_ops; +static int tun_net_init(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + struct ifreq *ifr = tun->ifr; + int err; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + spin_lock_init(&tun->lock); + + err = security_tun_dev_alloc_security(&tun->security); + if (err < 0) { + free_percpu(dev->tstats); + return err; + } + + tun_flow_init(tun); + + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | + TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + dev->features = dev->hw_features | NETIF_F_LLTX; + dev->vlan_features = dev->features & + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + tun->flags = (tun->flags & ~TUN_FEATURES) | + (ifr->ifr_flags & TUN_FEATURES); + + INIT_LIST_HEAD(&tun->disabled); + err = tun_attach(tun, tun->file, false, ifr->ifr_flags & IFF_NAPI, + ifr->ifr_flags & IFF_NAPI_FRAGS, false); + if (err < 0) { + tun_flow_uninit(tun); + security_tun_dev_free_security(tun->security); + free_percpu(dev->tstats); + return err; + } + return 0; +} + /* Net device detach from fd. */ static void tun_net_uninit(struct net_device *dev) { @@ -1169,6 +1218,7 @@ static int tun_net_change_carrier(struct net_device *dev, bool new_carrier) } static const struct net_device_ops tun_netdev_ops = { + .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, @@ -1252,6 +1302,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) } static const struct net_device_ops tap_netdev_ops = { + .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, @@ -1292,7 +1343,7 @@ static void tun_flow_uninit(struct tun_struct *tun) #define MAX_MTU 65535 /* Initialize net device. */ -static void tun_net_init(struct net_device *dev) +static void tun_net_initialize(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -2206,11 +2257,6 @@ static void tun_free_netdev(struct net_device *dev) BUG_ON(!(list_empty(&tun->disabled))); free_percpu(dev->tstats); - /* We clear tstats so that tun_set_iff() can tell if - * tun_free_netdev() has been called from register_netdevice(). - */ - dev->tstats = NULL; - tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); __tun_set_ebpf(tun, &tun->steering_prog, NULL); @@ -2716,41 +2762,16 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->rx_batched = 0; RCU_INIT_POINTER(tun->steering_prog, NULL); - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) { - err = -ENOMEM; - goto err_free_dev; - } - - spin_lock_init(&tun->lock); - - err = security_tun_dev_alloc_security(&tun->security); - if (err < 0) - goto err_free_stat; - - tun_net_init(dev); - tun_flow_init(tun); + tun->ifr = ifr; + tun->file = file; - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | - TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; - dev->features = dev->hw_features | NETIF_F_LLTX; - dev->vlan_features = dev->features & - ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - - tun->flags = (tun->flags & ~TUN_FEATURES) | - (ifr->ifr_flags & TUN_FEATURES); - - INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, - ifr->ifr_flags & IFF_NAPI_FRAGS, false); - if (err < 0) - goto err_free_flow; + tun_net_initialize(dev); err = register_netdevice(tun->dev); - if (err < 0) - goto err_detach; + if (err < 0) { + free_netdev(dev); + return err; + } /* free_netdev() won't check refcnt, to avoid race * with dev_put() we need publish tun after registration. */ @@ -2767,24 +2788,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) strcpy(ifr->ifr_name, tun->dev->name); return 0; - -err_detach: - tun_detach_all(dev); - /* We are here because register_netdevice() has failed. - * If register_netdevice() already called tun_free_netdev() - * while dealing with the error, dev->stats has been cleared. - */ - if (!dev->tstats) - goto err_free_dev; - -err_free_flow: - tun_flow_uninit(tun); - security_tun_dev_free_security(tun->security); -err_free_stat: - free_percpu(dev->tstats); -err_free_dev: - free_netdev(dev); - return err; } static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) From 8f556a326c93213927e683fc32bbf5be1b62540a Mon Sep 17 00:00:00 2001 From: Zqiang Date: Fri, 17 Dec 2021 15:42:07 +0800 Subject: [PATCH 273/361] locking/rtmutex: Fix incorrect condition in rtmutex_spin_on_owner() Optimistic spinning needs to be terminated when the spinning waiter is not longer the top waiter on the lock, but the condition is negated. It terminates if the waiter is the top waiter, which is defeating the whole purpose. Fixes: c3123c431447 ("locking/rtmutex: Dont dereference waiter lockless") Signed-off-by: Zqiang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211217074207.77425-1-qiang1.zhang@intel.com --- kernel/locking/rtmutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 0c6a48dfcecb3..1f25a4d7de273 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1380,7 +1380,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * - the VCPU on which owner runs is preempted */ if (!owner->on_cpu || need_resched() || - rt_mutex_waiter_is_top_waiter(lock, waiter) || + !rt_mutex_waiter_is_top_waiter(lock, waiter) || vcpu_is_preempted(task_cpu(owner))) { res = false; break; From 0a515a06c5ebfa46fee3ac519e418f801e718da4 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 12 Dec 2021 06:25:02 +0000 Subject: [PATCH 274/361] perf expr: Fix missing check for return value of hashmap__new() The hashmap__new() function may return ERR_PTR(-ENOMEM) when malloc() fails, add IS_ERR() checking for ctx->ids. Signed-off-by: Miaoqian Lin Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211212062504.25841-1-linmq006@gmail.com [ s/kfree()/free()/ and add missing linux/err.h include ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 1d532b9fed29c..254601060b392 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -12,6 +12,7 @@ #include "expr-bison.h" #include "expr-flex.h" #include "smt.h" +#include #include #include #include @@ -299,6 +300,10 @@ struct expr_parse_ctx *expr__ctx_new(void) return NULL; ctx->ids = hashmap__new(key_hash, key_equal, NULL); + if (IS_ERR(ctx->ids)) { + free(ctx); + return NULL; + } ctx->runtime = 0; return ctx; From 0c8e32fe48f549eef27c8c6b0a63530f83c3a643 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Dec 2021 10:48:28 +0200 Subject: [PATCH 275/361] perf inject: Fix segfault due to close without open The fixed commit attempts to close inject.output even if it was never opened e.g. $ perf record uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (7 samples) ] $ perf inject -i perf.data --vm-time-correlation=dry-run Segmentation fault (core dumped) $ gdb --quiet perf Reading symbols from perf... (gdb) r inject -i perf.data --vm-time-correlation=dry-run Starting program: /home/ahunter/bin/perf inject -i perf.data --vm-time-correlation=dry-run [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Program received signal SIGSEGV, Segmentation fault. 0x00007eff8afeef5b in _IO_new_fclose (fp=0x0) at iofclose.c:48 48 iofclose.c: No such file or directory. (gdb) bt #0 0x00007eff8afeef5b in _IO_new_fclose (fp=0x0) at iofclose.c:48 #1 0x0000557fc7b74f92 in perf_data__close (data=data@entry=0x7ffcdafa6578) at util/data.c:376 #2 0x0000557fc7a6b807 in cmd_inject (argc=, argv=) at builtin-inject.c:1085 #3 0x0000557fc7ac4783 in run_builtin (p=0x557fc8074878 , argc=4, argv=0x7ffcdafb6a60) at perf.c:313 #4 0x0000557fc7a25d5c in handle_internal_command (argv=, argc=) at perf.c:365 #5 run_argv (argcp=, argv=) at perf.c:409 #6 main (argc=4, argv=0x7ffcdafb6a60) at perf.c:539 (gdb) Fixes: 02e6246f5364d526 ("perf inject: Close inject.output on exit") Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Riccardo Mancini Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20211213084829.114772-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b9d6306cc14ea..af70f1c72052a 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -1078,7 +1078,8 @@ int cmd_inject(int argc, const char **argv) zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); out_close_output: - perf_data__close(&inject.output); + if (!inject.in_place_update) + perf_data__close(&inject.output); free(inject.itrace_synth_opts.vm_tm_corr_args); return ret; } From c271a55b0c6029fed0cac909fa57999a11467132 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Dec 2021 10:48:29 +0200 Subject: [PATCH 276/361] perf inject: Fix segfault due to perf_data__fd() without open The fixed commit attempts to get the output file descriptor even if the file was never opened e.g. $ perf record uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (7 samples) ] $ perf inject -i perf.data --vm-time-correlation=dry-run Segmentation fault (core dumped) $ gdb --quiet perf Reading symbols from perf... (gdb) r inject -i perf.data --vm-time-correlation=dry-run Starting program: /home/ahunter/bin/perf inject -i perf.data --vm-time-correlation=dry-run [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Program received signal SIGSEGV, Segmentation fault. __GI___fileno (fp=0x0) at fileno.c:35 35 fileno.c: No such file or directory. (gdb) bt #0 __GI___fileno (fp=0x0) at fileno.c:35 #1 0x00005621e48dd987 in perf_data__fd (data=0x7fff4c68bd08) at util/data.h:72 #2 perf_data__fd (data=0x7fff4c68bd08) at util/data.h:69 #3 cmd_inject (argc=, argv=0x7fff4c69c1f0) at builtin-inject.c:1017 #4 0x00005621e4936783 in run_builtin (p=0x5621e4ee6878 , argc=4, argv=0x7fff4c69c1f0) at perf.c:313 #5 0x00005621e4897d5c in handle_internal_command (argv=, argc=) at perf.c:365 #6 run_argv (argcp=, argv=) at perf.c:409 #7 main (argc=4, argv=0x7fff4c69c1f0) at perf.c:539 (gdb) Fixes: 0ae03893623dd1dd ("perf tools: Pass a fd to perf_file_header__read_pipe()") Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Riccardo Mancini Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20211213084829.114772-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index af70f1c72052a..409b721666cba 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -755,12 +755,16 @@ static int parse_vm_time_correlation(const struct option *opt, const char *str, return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; } +static int output_fd(struct perf_inject *inject) +{ + return inject->in_place_update ? -1 : perf_data__fd(&inject->output); +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; struct perf_session *session = inject->session; - struct perf_data *data_out = &inject->output; - int fd = inject->in_place_update ? -1 : perf_data__fd(data_out); + int fd = output_fd(inject); u64 output_data_offset; signal(SIGINT, sig_handler); @@ -1015,7 +1019,7 @@ int cmd_inject(int argc, const char **argv) } inject.session = __perf_session__new(&data, repipe, - perf_data__fd(&inject.output), + output_fd(&inject), &inject.tool); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); From b2f37aead1b82a770c48b5d583f35ec22aabb61e Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 17 Dec 2021 10:13:56 +0800 Subject: [PATCH 277/361] hamradio: improve the incomplete fix to avoid NPD The previous commit 3e0588c291d6 ("hamradio: defer ax25 kfree after unregister_netdev") reorder the kfree operations and unregister_netdev operation to prevent UAF. This commit improves the previous one by also deferring the nullify of the ax->tty pointer. Otherwise, a NULL pointer dereference bug occurs. Partial of the stack trace is shown below. BUG: kernel NULL pointer dereference, address: 0000000000000538 RIP: 0010:ax_xmit+0x1f9/0x400 ... Call Trace: dev_hard_start_xmit+0xec/0x320 sch_direct_xmit+0xea/0x240 __qdisc_run+0x166/0x5c0 __dev_queue_xmit+0x2c7/0xaf0 ax25_std_establish_data_link+0x59/0x60 ax25_connect+0x3a0/0x500 ? security_socket_connect+0x2b/0x40 __sys_connect+0x96/0xc0 ? __hrtimer_init+0xc0/0xc0 ? common_nsleep+0x2e/0x50 ? switch_fpu_return+0x139/0x1a0 __x64_sys_connect+0x11/0x20 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The crash point is shown as below static void ax_encaps(...) { ... set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); // ax->tty = NULL! ... } By placing the nullify action after the unregister_netdev, the ax->tty pointer won't be assigned as NULL net_device framework layer is well synchronized. Signed-off-by: Lin Ma Signed-off-by: David S. Miller --- drivers/net/hamradio/mkiss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 7da2bb8a443c0..edde9c3ae12b9 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -794,14 +794,14 @@ static void mkiss_close(struct tty_struct *tty) */ netif_stop_queue(ax->dev); - ax->tty = NULL; - unregister_netdev(ax->dev); /* Free all AX25 frame buffers after unreg. */ kfree(ax->rbuff); kfree(ax->xbuff); + ax->tty = NULL; + free_netdev(ax->dev); } From 1ade48d0c27d5da1ccf4b583d8c5fc8b534a3ac8 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 17 Dec 2021 10:29:41 +0800 Subject: [PATCH 278/361] ax25: NPD bug when detaching AX25 device The existing cleanup routine implementation is not well synchronized with the syscall routine. When a device is detaching, below race could occur. static int ax25_sendmsg(...) { ... lock_sock() ax25 = sk_to_ax25(sk); if (ax25->ax25_dev == NULL) // CHECK ... ax25_queue_xmit(skb, ax25->ax25_dev->dev); // USE ... } static void ax25_kill_by_device(...) { ... if (s->ax25_dev == ax25_dev) { s->ax25_dev = NULL; ... } Other syscall functions like ax25_getsockopt, ax25_getname, ax25_info_show also suffer from similar races. To fix them, this patch introduce lock_sock() into ax25_kill_by_device in order to guarantee that the nullify action in cleanup routine cannot proceed when another socket request is pending. Signed-off-by: Hanjie Wu Signed-off-by: Lin Ma Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 2f34bbdde0e8f..cfca99e295b80 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -85,8 +85,10 @@ static void ax25_kill_by_device(struct net_device *dev) again: ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { - s->ax25_dev = NULL; spin_unlock_bh(&ax25_list_lock); + lock_sock(s->sk); + s->ax25_dev = NULL; + release_sock(s->sk); ax25_disconnect(s, ENETUNREACH); spin_lock_bh(&ax25_list_lock); From 60ec7fcfe76892a1479afab51ff17a4281923156 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 17 Dec 2021 17:39:11 +0800 Subject: [PATCH 279/361] qlcnic: potential dereference null pointer of rx_queue->page_ring The return value of kcalloc() needs to be checked. To avoid dereference of null pointer in case of the failure of alloc. Therefore, it might be better to change the return type of qlcnic_sriov_alloc_vlans() and return -ENOMEM when alloc fails and return 0 the others. Also, qlcnic_sriov_set_guest_vlan_mode() and __qlcnic_pci_sriov_enable() should deal with the return value of qlcnic_sriov_alloc_vlans(). Fixes: 154d0c810c53 ("qlcnic: VLAN enhancement for 84XX adapters") Signed-off-by: Jiasheng Jiang Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h | 2 +- .../net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c | 12 +++++++++--- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c | 4 +++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 7160b42f51ddd..d0111cb3b40e1 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -201,7 +201,7 @@ int qlcnic_sriov_get_vf_vport_info(struct qlcnic_adapter *, struct qlcnic_info *, u16); int qlcnic_sriov_cfg_vf_guest_vlan(struct qlcnic_adapter *, u16, u8); void qlcnic_sriov_free_vlans(struct qlcnic_adapter *); -void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *); +int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *); bool qlcnic_sriov_check_any_vlan(struct qlcnic_vf_info *); void qlcnic_sriov_del_vlan_id(struct qlcnic_sriov *, struct qlcnic_vf_info *, u16); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index dd03be3fc82a9..42a44c97572ae 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -432,7 +432,7 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, struct qlcnic_cmd_args *cmd) { struct qlcnic_sriov *sriov = adapter->ahw->sriov; - int i, num_vlans; + int i, num_vlans, ret; u16 *vlans; if (sriov->allowed_vlans) @@ -443,7 +443,9 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, dev_info(&adapter->pdev->dev, "Number of allowed Guest VLANs = %d\n", sriov->num_allowed_vlans); - qlcnic_sriov_alloc_vlans(adapter); + ret = qlcnic_sriov_alloc_vlans(adapter); + if (ret) + return ret; if (!sriov->any_vlan) return 0; @@ -2154,7 +2156,7 @@ static int qlcnic_sriov_vf_resume(struct qlcnic_adapter *adapter) return err; } -void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) +int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) { struct qlcnic_sriov *sriov = adapter->ahw->sriov; struct qlcnic_vf_info *vf; @@ -2164,7 +2166,11 @@ void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) vf = &sriov->vf_info[i]; vf->sriov_vlans = kcalloc(sriov->num_allowed_vlans, sizeof(*vf->sriov_vlans), GFP_KERNEL); + if (!vf->sriov_vlans) + return -ENOMEM; } + + return 0; } void qlcnic_sriov_free_vlans(struct qlcnic_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 447720b93e5ab..e90fa97c0ae6c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -597,7 +597,9 @@ static int __qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter, if (err) goto del_flr_queue; - qlcnic_sriov_alloc_vlans(adapter); + err = qlcnic_sriov_alloc_vlans(adapter); + if (err) + goto del_flr_queue; return err; From 53b1119a6e5028b125f431a0116ba73510d82a72 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Dec 2021 11:12:11 -0500 Subject: [PATCH 280/361] NFSD: Fix READDIR buffer overflow If a client sends a READDIR count argument that is too small (say, zero), then the buffer size calculation in the new init_dirlist helper functions results in an underflow, allowing the XDR stream functions to write beyond the actual buffer. This calculation has always been suspect. NFSD has never sanity- checked the READDIR count argument, but the old entry encoders managed the problem correctly. With the commits below, entry encoding changed, exposing the underflow to the pointer arithmetic in xdr_reserve_space(). Modern NFS clients attempt to retrieve as much data as possible for each READDIR request. Also, we have no unit tests that exercise the behavior of READDIR at the lower bound of @count values. Thus this case was missed during testing. Reported-by: Anatoly Trosinenko Fixes: f5dcccd647da ("NFSD: Update the NFSv2 READDIR entry encoder to use struct xdr_stream") Fixes: 7f87fc2d34d4 ("NFSD: Update NFSv3 READDIR entry encoders to use struct xdr_stream") Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 11 ++++------- fs/nfsd/nfsproc.c | 8 ++++---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 4418517f6f120..15dac36ca852e 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -438,22 +438,19 @@ nfsd3_proc_link(struct svc_rqst *rqstp) static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, struct nfsd3_readdirres *resp, - int count) + u32 count) { struct xdr_buf *buf = &resp->dirlist; struct xdr_stream *xdr = &resp->xdr; - count = min_t(u32, count, svc_max_payload(rqstp)); + count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); memset(buf, 0, sizeof(*buf)); /* Reserve room for the NULL ptr & eof flag (-2 words) */ buf->buflen = count - XDR_UNIT * 2; buf->pages = rqstp->rq_next_page; - while (count > 0) { - rqstp->rq_next_page++; - count -= PAGE_SIZE; - } + rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* This is xdr_init_encode(), but it assumes that * the head kvec has already been consumed. */ @@ -462,7 +459,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, xdr->page_ptr = buf->pages; xdr->iov = NULL; xdr->p = page_address(*buf->pages); - xdr->end = xdr->p + (PAGE_SIZE >> 2); + xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); xdr->rqst = NULL; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index eea5b59b6a6ca..de282f3273c50 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -556,17 +556,17 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp) static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, struct nfsd_readdirres *resp, - int count) + u32 count) { struct xdr_buf *buf = &resp->dirlist; struct xdr_stream *xdr = &resp->xdr; - count = min_t(u32, count, PAGE_SIZE); + count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); memset(buf, 0, sizeof(*buf)); /* Reserve room for the NULL ptr & eof flag (-2 words) */ - buf->buflen = count - sizeof(__be32) * 2; + buf->buflen = count - XDR_UNIT * 2; buf->pages = rqstp->rq_next_page; rqstp->rq_next_page++; @@ -577,7 +577,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, xdr->page_ptr = buf->pages; xdr->iov = NULL; xdr->p = page_address(*buf->pages); - xdr->end = xdr->p + (PAGE_SIZE >> 2); + xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); xdr->rqst = NULL; } From 9a5875f14b0e3a13ae314883f1bb72b7f31fac07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Mon, 18 Oct 2021 13:22:01 +0200 Subject: [PATCH 281/361] gpio: dln2: Fix interrupts when replugging the device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When replugging the device the following message shows up: gpio gpiochip2: (dln2): detected irqchip that is shared with multiple gpiochips: please fix the driver. This also has the effect that interrupts won't work. The same problem would also show up if multiple devices where plugged in. Fix this by allocating the irq_chip data structure per instance like other drivers do. I don't know when this problem appeared, but it is present in 5.10. Cc: # 5.10+ Cc: Daniel Baluta Signed-off-by: Noralf Trønnes Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-dln2.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c index 026903e3ef543..08b9e2cf4f2d6 100644 --- a/drivers/gpio/gpio-dln2.c +++ b/drivers/gpio/gpio-dln2.c @@ -46,6 +46,7 @@ struct dln2_gpio { struct platform_device *pdev; struct gpio_chip gpio; + struct irq_chip irqchip; /* * Cache pin direction to save us one transfer, since the hardware has @@ -383,15 +384,6 @@ static void dln2_irq_bus_unlock(struct irq_data *irqd) mutex_unlock(&dln2->irq_lock); } -static struct irq_chip dln2_gpio_irqchip = { - .name = "dln2-irq", - .irq_mask = dln2_irq_mask, - .irq_unmask = dln2_irq_unmask, - .irq_set_type = dln2_irq_set_type, - .irq_bus_lock = dln2_irq_bus_lock, - .irq_bus_sync_unlock = dln2_irq_bus_unlock, -}; - static void dln2_gpio_event(struct platform_device *pdev, u16 echo, const void *data, int len) { @@ -473,8 +465,15 @@ static int dln2_gpio_probe(struct platform_device *pdev) dln2->gpio.direction_output = dln2_gpio_direction_output; dln2->gpio.set_config = dln2_gpio_set_config; + dln2->irqchip.name = "dln2-irq", + dln2->irqchip.irq_mask = dln2_irq_mask, + dln2->irqchip.irq_unmask = dln2_irq_unmask, + dln2->irqchip.irq_set_type = dln2_irq_set_type, + dln2->irqchip.irq_bus_lock = dln2_irq_bus_lock, + dln2->irqchip.irq_bus_sync_unlock = dln2_irq_bus_unlock, + girq = &dln2->gpio.irq; - girq->chip = &dln2_gpio_irqchip; + girq->chip = &dln2->irqchip; /* The event comes from the outside so no parent handler */ girq->parent_handler = NULL; girq->num_parents = 0; From 87959fa16cfbcf76245c11559db1940069621274 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 19 Dec 2021 07:58:44 -0700 Subject: [PATCH 282/361] Revert "block: reduce kblockd_mod_delayed_work_on() CPU consumption" This reverts commit cb2ac2912a9ca7d3d26291c511939a41361d2d83. Alex and the kernel test robot report that this causes a significant performance regression with BFQ. I can reproduce that result, so let's revert this one as we're close to -rc6 and we there's no point in trying to rush a fix. Link: https://lore.kernel.org/linux-block/1639853092.524jxfaem2.none@localhost/ Link: https://lore.kernel.org/lkml/20211219141852.GH14057@xsang-OptiPlex-9020/ Reported-by: Alex Xu (Hello71) Reported-by: kernel test robot Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c1833f95cb972..1378d084c770f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1484,8 +1484,6 @@ EXPORT_SYMBOL(kblockd_schedule_work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { - if (!delay) - return queue_work_on(cpu, kblockd_workqueue, &dwork->work); return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); From 1aa2abb33a419090c7c87d4ae842a6347078ee12 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 16 Dec 2021 17:52:13 +0100 Subject: [PATCH 283/361] KVM: x86: Drop guest CPUID check for host initiated writes to MSR_IA32_PERF_CAPABILITIES The ability to write to MSR_IA32_PERF_CAPABILITIES from the host should not depend on guest visible CPUID entries, even if just to allow creating/restoring guest MSRs and CPUIDs in any sequence. Fixes: 27461da31089 ("KVM: x86/pmu: Support full width counting") Suggested-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Message-Id: <20211216165213.338923-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0cf1082455dfd..9a2972fdae82b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3413,7 +3413,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) return 1; - if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent)) + if (kvm_get_msr_feature(&msr_ent)) return 1; if (data & ~msr_ent.data) return 1; From 0b091a43d704997789c6d812b02167c8f5f9f061 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 16 Dec 2021 17:52:12 +0100 Subject: [PATCH 284/361] KVM: selftests: vmx_pmu_msrs_test: Drop tests mangling guest visible CPUIDs Host initiated writes to MSR_IA32_PERF_CAPABILITIES should not depend on guest visible CPUIDs and (incorrect) KVM logic implementing it is about to change. Also, KVM_SET_CPUID{,2} after KVM_RUN is now forbidden and causes test to fail. Reported-by: kernel test robot Fixes: feb627e8d6f6 ("KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN") Signed-off-by: Vitaly Kuznetsov Message-Id: <20211216165213.338923-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/vmx_pmu_msrs_test.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c index 23051d84b9078..2454a1f2ca0c2 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c @@ -110,22 +110,5 @@ int main(int argc, char *argv[]) ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT); TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); - /* testcase 4, set capabilities when we don't have PDCM bit */ - entry_1_0->ecx &= ~X86_FEATURE_PDCM; - vcpu_set_cpuid(vm, VCPU_ID, cpuid); - ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); - TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); - - /* testcase 5, set capabilities when we don't have PMU version bits */ - entry_1_0->ecx |= X86_FEATURE_PDCM; - eax.split.version_id = 0; - entry_1_0->ecx = eax.full; - vcpu_set_cpuid(vm, VCPU_ID, cpuid); - ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); - TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); - - vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0); - ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0); - kvm_vm_free(vm); } From 18c841e1f4112d3fb742aca3429e84117fcb1e1c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 9 Dec 2021 06:05:46 +0000 Subject: [PATCH 285/361] KVM: x86: Retry page fault if MMU reload is pending and root has no sp Play nice with a NULL shadow page when checking for an obsolete root in the page fault handler by flagging the page fault as stale if there's no shadow page associated with the root and KVM_REQ_MMU_RELOAD is pending. Invalidating memslots, which is the only case where _all_ roots need to be reloaded, requests all vCPUs to reload their MMUs while holding mmu_lock for lock. The "special" roots, e.g. pae_root when KVM uses PAE paging, are not backed by a shadow page. Running with TDP disabled or with nested NPT explodes spectaculary due to dereferencing a NULL shadow page pointer. Skip the KVM_REQ_MMU_RELOAD check if there is a valid shadow page for the root. Zapping shadow pages in response to guest activity, e.g. when the guest frees a PGD, can trigger KVM_REQ_MMU_RELOAD even if the current vCPU isn't using the affected root. I.e. KVM_REQ_MMU_RELOAD can be seen with a completely valid root shadow page. This is a bit of a moot point as KVM currently unloads all roots on KVM_REQ_MMU_RELOAD, but that will be cleaned up in the future. Fixes: a955cad84cda ("KVM: x86/mmu: Retry page fault if root is invalidated by memslot update") Cc: stable@vger.kernel.org Cc: Maxim Levitsky Signed-off-by: Sean Christopherson Message-Id: <20211209060552.2956723-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e2e1d012df226..fcdf3f8bb59a6 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3987,7 +3987,21 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, static bool is_page_fault_stale(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int mmu_seq) { - if (is_obsolete_sp(vcpu->kvm, to_shadow_page(vcpu->arch.mmu->root_hpa))) + struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root_hpa); + + /* Special roots, e.g. pae_root, are not backed by shadow pages. */ + if (sp && is_obsolete_sp(vcpu->kvm, sp)) + return true; + + /* + * Roots without an associated shadow page are considered invalid if + * there is a pending request to free obsolete roots. The request is + * only a hint that the current root _may_ be obsolete and needs to be + * reloaded, e.g. if the guest frees a PGD that KVM is tracking as a + * previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs + * to reload even if no vCPU is actively using the root. + */ + if (!sp && kvm_test_request(KVM_REQ_MMU_RELOAD, vcpu)) return true; return fault->slot && From 57690554abe135fee81d6ac33cc94d75a7e224bb Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Thu, 16 Dec 2021 00:08:56 +0000 Subject: [PATCH 286/361] x86/pkey: Fix undefined behaviour with PKRU_WD_BIT Both __pkru_allows_write() and arch_set_user_pkey_access() shift PKRU_WD_BIT (a signed constant) by up to 30 bits, hitting the sign bit. Use unsigned constants instead. Clearly pkey 15 has not been used in combination with UBSAN yet. Noticed by code inspection only. I can't actually provoke the compiler into generating incorrect logic as far as this shift is concerned. [ dhansen: add stable@ tag, plus minor changelog massaging, For anyone doing backports, these #defines were in arch/x86/include/asm/pgtable.h before 784a46618f6. ] Fixes: 33a709b25a76 ("mm/gup, x86/mm/pkeys: Check VMAs and PTEs for protection keys") Signed-off-by: Andrew Cooper Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20211216000856.4480-1-andrew.cooper3@citrix.com --- arch/x86/include/asm/pkru.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h index 4cd49afa0ca4b..74f0a2d34ffdd 100644 --- a/arch/x86/include/asm/pkru.h +++ b/arch/x86/include/asm/pkru.h @@ -4,8 +4,8 @@ #include -#define PKRU_AD_BIT 0x1 -#define PKRU_WD_BIT 0x2 +#define PKRU_AD_BIT 0x1u +#define PKRU_WD_BIT 0x2u #define PKRU_BITS_PER_PKEY 2 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS From a7904a538933c525096ca2ccde1e60d0ee62c08e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Dec 2021 14:14:33 -0800 Subject: [PATCH 287/361] Linux 5.16-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 765115c99655f..d85f1ff79f5ca 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Gobble Gobble # *DOCUMENTATION* From 4ebfee2bbc1a9c343dd50565ba5ae249fac32267 Mon Sep 17 00:00:00 2001 From: Johnny Chuang Date: Mon, 20 Dec 2021 00:28:45 -0800 Subject: [PATCH 288/361] Input: elants_i2c - do not check Remark ID on eKTH3900/eKTH5312 The eKTH3900/eKTH5312 series do not support the firmware update rules of Remark ID. Exclude these two series from checking it when updating the firmware in touch controllers. Signed-off-by: Johnny Chuang Link: https://lore.kernel.org/r/1639619603-20616-1-git-send-email-johnny.chuang.emc@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/elants_i2c.c | 46 +++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 7e13a66a8a95c..879a4d984c907 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -117,6 +117,19 @@ #define ELAN_POWERON_DELAY_USEC 500 #define ELAN_RESET_DELAY_MSEC 20 +/* FW boot code version */ +#define BC_VER_H_BYTE_FOR_EKTH3900x1_I2C 0x72 +#define BC_VER_H_BYTE_FOR_EKTH3900x2_I2C 0x82 +#define BC_VER_H_BYTE_FOR_EKTH3900x3_I2C 0x92 +#define BC_VER_H_BYTE_FOR_EKTH5312x1_I2C 0x6D +#define BC_VER_H_BYTE_FOR_EKTH5312x2_I2C 0x6E +#define BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C 0x77 +#define BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C 0x78 +#define BC_VER_H_BYTE_FOR_EKTH5312x1_I2C_USB 0x67 +#define BC_VER_H_BYTE_FOR_EKTH5312x2_I2C_USB 0x68 +#define BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C_USB 0x74 +#define BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C_USB 0x75 + enum elants_chip_id { EKTH3500, EKTF3624, @@ -736,6 +749,37 @@ static int elants_i2c_validate_remark_id(struct elants_data *ts, return 0; } +static bool elants_i2c_should_check_remark_id(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + const u8 bootcode_version = ts->iap_version; + bool check; + + /* I2C eKTH3900 and eKTH5312 are NOT support Remark ID */ + if ((bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x3_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x1_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x2_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C_USB)) { + dev_dbg(&client->dev, + "eKTH3900/eKTH5312(0x%02x) are not support remark id\n", + bootcode_version); + check = false; + } else if (bootcode_version >= 0x60) { + check = true; + } else { + check = false; + } + + return check; +} + static int elants_i2c_do_update_firmware(struct i2c_client *client, const struct firmware *fw, bool force) @@ -749,7 +793,7 @@ static int elants_i2c_do_update_firmware(struct i2c_client *client, u16 send_id; int page, n_fw_pages; int error; - bool check_remark_id = ts->iap_version >= 0x60; + bool check_remark_id = elants_i2c_should_check_remark_id(ts); /* Recovery mode detection! */ if (force) { From 9fb12fe5b93b94b9e607509ba461e17f4cc6a264 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 17 Dec 2021 07:49:34 -0500 Subject: [PATCH 289/361] KVM: x86: remove PMU FIXED_CTR3 from msrs_to_save_all The fixed counter 3 is used for the Topdown metrics, which hasn't been enabled for KVM guests. Userspace accessing to it will fail as it's not included in get_fixed_pmc(). This breaks KVM selftests on ICX+ machines, which have this counter. To reproduce it on ICX+ machines, ./state_test reports: ==== Test Assertion Failure ==== lib/x86_64/processor.c:1078: r == nmsrs pid=4564 tid=4564 - Argument list too long 1 0x000000000040b1b9: vcpu_save_state at processor.c:1077 2 0x0000000000402478: main at state_test.c:209 (discriminator 6) 3 0x00007fbe21ed5f92: ?? ??:0 4 0x000000000040264d: _start at ??:? Unexpected result from KVM_GET_MSRS, r: 17 (failed MSR was 0x30c) With this patch, it works well. Signed-off-by: Wei Wang Message-Id: <20211217124934.32893-1-wei.w.wang@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9a2972fdae82b..d490b83d640cf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1331,7 +1331,7 @@ static const u32 msrs_to_save_all[] = { MSR_IA32_UMWAIT_CONTROL, MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, - MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3, + MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, From 66c915d09b942fb3b2b0cb2f56562180901fba17 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 3 Dec 2021 15:15:54 +0100 Subject: [PATCH 290/361] mmc: core: Disable card detect during shutdown It's seems prone to problems by allowing card detect and its corresponding mmc_rescan() work to run, during platform shutdown. For example, we may end up turning off the power while initializing a card, which potentially could damage it. To avoid this scenario, let's add ->shutdown_pre() callback for the mmc host class device and then turn of the card detect from there. Reported-by: Al Cooper Suggested-by: Adrian Hunter Signed-off-by: Ulf Hansson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211203141555.105351-1-ulf.hansson@linaro.org --- drivers/mmc/core/core.c | 7 ++++++- drivers/mmc/core/core.h | 1 + drivers/mmc/core/host.c | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 240c5af793dce..368f10405e132 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2264,7 +2264,7 @@ void mmc_start_host(struct mmc_host *host) _mmc_detect_change(host, 0, false); } -void mmc_stop_host(struct mmc_host *host) +void __mmc_stop_host(struct mmc_host *host) { if (host->slot.cd_irq >= 0) { mmc_gpio_set_cd_wake(host, false); @@ -2273,6 +2273,11 @@ void mmc_stop_host(struct mmc_host *host) host->rescan_disable = 1; cancel_delayed_work_sync(&host->detect); +} + +void mmc_stop_host(struct mmc_host *host) +{ + __mmc_stop_host(host); /* clear pm flags now and let card drivers set them as needed */ host->pm_flags = 0; diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 7931a4f0137d2..f5f3f623ea492 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -70,6 +70,7 @@ static inline void mmc_delay(unsigned int ms) void mmc_rescan(struct work_struct *work); void mmc_start_host(struct mmc_host *host); +void __mmc_stop_host(struct mmc_host *host); void mmc_stop_host(struct mmc_host *host); void _mmc_detect_change(struct mmc_host *host, unsigned long delay, diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index d4683b1d263fd..cf140f4ec8643 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -80,9 +80,18 @@ static void mmc_host_classdev_release(struct device *dev) kfree(host); } +static int mmc_host_classdev_shutdown(struct device *dev) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + + __mmc_stop_host(host); + return 0; +} + static struct class mmc_host_class = { .name = "mmc_host", .dev_release = mmc_host_classdev_release, + .shutdown_pre = mmc_host_classdev_shutdown, .pm = MMC_HOST_CLASS_DEV_PM_OPS, }; From f89b548ca66be7500dcd92ee8e61590f7d08ac91 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 19 Dec 2021 16:34:41 +0100 Subject: [PATCH 291/361] mmc: meson-mx-sdhc: Set MANUAL_STOP for multi-block SDIO commands The vendor driver implements special handling for multi-block SD_IO_RW_EXTENDED (and SD_IO_RW_DIRECT) commands which have data attached to them. It sets the MANUAL_STOP bit in the MESON_SDHC_MISC register for these commands. In all other cases this bit is cleared. Here we omit SD_IO_RW_DIRECT since that command never has any data attached to it. This fixes SDIO wifi using the brcmfmac driver which reported the following error without this change on a Netxeon S82 board using a Meson8 (S802) SoC: brcmf_fw_alloc_request: using brcm/brcmfmac43362-sdio for chip BCM43362/1 brcmf_sdiod_ramrw: membytes transfer failed brcmf_sdio_download_code_file: error -110 on writing 219557 membytes at 0x00000000 brcmf_sdio_download_firmware: dongle image file download failed And with this change: brcmf_fw_alloc_request: using brcm/brcmfmac43362-sdio for chip BCM43362/1 brcmf_c_process_clm_blob: no clm_blob available (err=-2), device may have limited channels available brcmf_c_preinit_dcmds: Firmware: BCM43362/1 wl0: Apr 22 2013 14:50:00 version 5.90.195.89.6 FWID 01-b30a427d Fixes: e4bf1b0970ef96 ("mmc: host: meson-mx-sdhc: new driver for the Amlogic Meson SDHC host") Signed-off-by: Martin Blumenstingl Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211219153442.463863-2-martin.blumenstingl@googlemail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-mx-sdhc-mmc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/mmc/host/meson-mx-sdhc-mmc.c b/drivers/mmc/host/meson-mx-sdhc-mmc.c index 7cd9c0ec2fcfe..8fdd0bbbfa21f 100644 --- a/drivers/mmc/host/meson-mx-sdhc-mmc.c +++ b/drivers/mmc/host/meson-mx-sdhc-mmc.c @@ -135,6 +135,7 @@ static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) { struct meson_mx_sdhc_host *host = mmc_priv(mmc); + bool manual_stop = false; u32 ictl, send; int pack_len; @@ -172,12 +173,27 @@ static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc, else /* software flush: */ ictl |= MESON_SDHC_ICTL_DATA_XFER_OK; + + /* + * Mimic the logic from the vendor driver where (only) + * SD_IO_RW_EXTENDED commands with more than one block set the + * MESON_SDHC_MISC_MANUAL_STOP bit. This fixes the firmware + * download in the brcmfmac driver for a BCM43362/1 card. + * Without this sdio_memcpy_toio() (with a size of 219557 + * bytes) times out if MESON_SDHC_MISC_MANUAL_STOP is not set. + */ + manual_stop = cmd->data->blocks > 1 && + cmd->opcode == SD_IO_RW_EXTENDED; } else { pack_len = 0; ictl |= MESON_SDHC_ICTL_RESP_OK; } + regmap_update_bits(host->regmap, MESON_SDHC_MISC, + MESON_SDHC_MISC_MANUAL_STOP, + manual_stop ? MESON_SDHC_MISC_MANUAL_STOP : 0); + if (cmd->opcode == MMC_STOP_TRANSMISSION) send |= MESON_SDHC_SEND_DATA_STOP; From 93a2207c254ca102ebbdae47b00f19bbfbfa7ecd Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 20 Dec 2021 10:51:20 +0100 Subject: [PATCH 292/361] HID: holtek: fix mouse probing An overlook from the previous commit: we don't even parse or start the device, meaning that the device is not presented to user space. Fixes: 93020953d0fa ("HID: check for valid USB device for many HID drivers") Cc: stable@vger.kernel.org Link: https://bugs.archlinux.org/task/73048 Link: https://bugzilla.kernel.org/show_bug.cgi?id=215341 Link: https://lore.kernel.org/r/e4efbf13-bd8d-0370-629b-6c80c0044b15@leemhuis.info/ Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-holtek-mouse.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/hid/hid-holtek-mouse.c b/drivers/hid/hid-holtek-mouse.c index b7172c48ef9f0..7c907939bfae1 100644 --- a/drivers/hid/hid-holtek-mouse.c +++ b/drivers/hid/hid-holtek-mouse.c @@ -65,8 +65,23 @@ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, static int holtek_mouse_probe(struct hid_device *hdev, const struct hid_device_id *id) { + int ret; + if (!hid_is_usb(hdev)) return -EINVAL; + + ret = hid_parse(hdev); + if (ret) { + hid_err(hdev, "hid parse failed: %d\n", ret); + return ret; + } + + ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); + if (ret) { + hid_err(hdev, "hw start failed: %d\n", ret); + return ret; + } + return 0; } From 13251ce1dd9bb525da2becb9b26fdfb94ca58659 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 15 Dec 2021 16:36:05 +0800 Subject: [PATCH 293/361] HID: potential dereference of null pointer The return value of devm_kzalloc() needs to be checked. To avoid hdev->dev->driver_data to be null in case of the failure of alloc. Fixes: 14c9c014babe ("HID: add vivaldi HID driver") Cc: stable@vger.kernel.org Signed-off-by: Jiasheng Jiang Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211215083605.117638-1-jiasheng@iscas.ac.cn --- drivers/hid/hid-vivaldi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c index cd7ada48b1d9f..72957a9f71170 100644 --- a/drivers/hid/hid-vivaldi.c +++ b/drivers/hid/hid-vivaldi.c @@ -57,6 +57,9 @@ static int vivaldi_probe(struct hid_device *hdev, int ret; drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + hid_set_drvdata(hdev, drvdata); ret = hid_parse(hdev); From 87a270625a89fc841f1a7e21aae6176543d8385c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Dec 2021 10:22:40 +0100 Subject: [PATCH 294/361] mac80211: fix locking in ieee80211_start_ap error path We need to hold the local->mtx to release the channel context, as even encoded by the lockdep_assert_held() there. Fix it. Cc: stable@vger.kernel.org Fixes: 295b02c4be74 ("mac80211: Add FILS discovery support") Reported-and-tested-by: syzbot+11c342e5e30e9539cabd@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20211220090836.cee3d59a1915.I36bba9b79dc2ff4d57c3c7aa30dff9a003fe8c5c@changeid Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/mac80211/cfg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index bd3d3195097fa..2d0dd69f9753c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1264,7 +1264,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, return 0; error: + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); + return err; } From 662f11d55ffd02933e1bd275d732b97eddccf870 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 17 Dec 2021 12:42:31 -0500 Subject: [PATCH 295/361] docs: networking: dpaa2: Fix DPNI header The DPNI object should get its own header, like the rest of the objects. Fixes: 60b91319a349 ("staging: fsl-mc: Convert documentation to rst format") Signed-off-by: Sean Anderson Signed-off-by: David S. Miller --- .../device_drivers/ethernet/freescale/dpaa2/overview.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst index d638b5a8aadd4..199647729251e 100644 --- a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst @@ -183,6 +183,7 @@ PHY and allows physical transmission and reception of Ethernet frames. IRQ config, enable, reset DPNI (Datapath Network Interface) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Contains TX/RX queues, network interface configuration, and RX buffer pool configuration mechanisms. The TX/RX queues are in memory and are identified by queue number. From 75a2f31520095600f650597c0ac41f48b5ba0068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 19 Dec 2021 19:03:39 +0200 Subject: [PATCH 296/361] phonet/pep: refuse to enable an unbound pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This ioctl() implicitly assumed that the socket was already bound to a valid local socket name, i.e. Phonet object. If the socket was not bound, two separate problems would occur: 1) We'd send an pipe enablement request with an invalid source object. 2) Later socket calls could BUG on the socket unexpectedly being connected yet not bound to a valid object. Reported-by: syzbot+2dc91e7fc3dea88b1e8a@syzkaller.appspotmail.com Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index b4f90afb0638b..65d463ad87707 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -947,6 +947,8 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) ret = -EBUSY; else if (sk->sk_state == TCP_ESTABLISHED) ret = -EISCONN; + else if (!pn->pn_sk.sobject) + ret = -EADDRNOTAVAIL; else ret = pep_sock_enable(sk, NULL, 0); release_sock(sk); From 64d16aca3d4f130f35bbf1120e15f58a62f743d5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 14 Dec 2021 09:04:54 -0800 Subject: [PATCH 297/361] drm/i915/guc: Use correct context lock when callig clr_context_registered s/ce/cn/ when grabbing guc_state.lock before calling clr_context_registered. Fixes: 0f7976506de61 ("drm/i915/guc: Rework and simplify locking") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211214170500.28569-2-matthew.brost@intel.com (cherry picked from commit b25db8c782ad7ae80d4cea2a09c222f4f8980bb9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c48557dfa04c4..c50039a5ba1e6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1664,9 +1664,9 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) list_del_init(&cn->guc_id.link); ce->guc_id = cn->guc_id; - spin_lock(&ce->guc_state.lock); + spin_lock(&cn->guc_state.lock); clr_context_registered(cn); - spin_unlock(&ce->guc_state.lock); + spin_unlock(&cn->guc_state.lock); set_context_guc_id_invalid(cn); From 7807bf28fe02a76bf112916c6b9194f282f5e43c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 14 Dec 2021 09:04:55 -0800 Subject: [PATCH 298/361] drm/i915/guc: Only assign guc_id.id when stealing guc_id Previously assigned whole guc_id structure (list, spin lock) which is incorrect, only assign the guc_id.id. Fixes: 0f7976506de61 ("drm/i915/guc: Rework and simplify locking") Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211214170500.28569-3-matthew.brost@intel.com (cherry picked from commit 939d8e9c87e704fd5437e2c8b80929591fe540eb) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c50039a5ba1e6..302e9ff0602cc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1662,7 +1662,7 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_parent(cn)); list_del_init(&cn->guc_id.link); - ce->guc_id = cn->guc_id; + ce->guc_id.id = cn->guc_id.id; spin_lock(&cn->guc_state.lock); clr_context_registered(cn); From 3a0f64de479cae75effb630a2e0a237ca0d0623c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 14 Dec 2021 03:35:28 +0000 Subject: [PATCH 299/361] KVM: x86/mmu: Don't advance iterator after restart due to yielding After dropping mmu_lock in the TDP MMU, restart the iterator during tdp_iter_next() and do not advance the iterator. Advancing the iterator results in skipping the top-level SPTE and all its children, which is fatal if any of the skipped SPTEs were not visited before yielding. When zapping all SPTEs, i.e. when min_level == root_level, restarting the iter and then invoking tdp_iter_next() is always fatal if the current gfn has as a valid SPTE, as advancing the iterator results in try_step_side() skipping the current gfn, which wasn't visited before yielding. Sprinkle WARNs on iter->yielded being true in various helpers that are often used in conjunction with yielding, and tag the helper with __must_check to reduce the probabily of improper usage. Failing to zap a top-level SPTE manifests in one of two ways. If a valid SPTE is skipped by both kvm_tdp_mmu_zap_all() and kvm_tdp_mmu_put_root(), the shadow page will be leaked and KVM will WARN accordingly. WARNING: CPU: 1 PID: 3509 at arch/x86/kvm/mmu/tdp_mmu.c:46 [kvm] RIP: 0010:kvm_mmu_uninit_tdp_mmu+0x3e/0x50 [kvm] Call Trace: kvm_arch_destroy_vm+0x130/0x1b0 [kvm] kvm_destroy_vm+0x162/0x2a0 [kvm] kvm_vcpu_release+0x34/0x60 [kvm] __fput+0x82/0x240 task_work_run+0x5c/0x90 do_exit+0x364/0xa10 ? futex_unqueue+0x38/0x60 do_group_exit+0x33/0xa0 get_signal+0x155/0x850 arch_do_signal_or_restart+0xed/0x750 exit_to_user_mode_prepare+0xc5/0x120 syscall_exit_to_user_mode+0x1d/0x40 do_syscall_64+0x48/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae If kvm_tdp_mmu_zap_all() skips a gfn/SPTE but that SPTE is then zapped by kvm_tdp_mmu_put_root(), KVM triggers a use-after-free in the form of marking a struct page as dirty/accessed after it has been put back on the free list. This directly triggers a WARN due to encountering a page with page_count() == 0, but it can also lead to data corruption and additional errors in the kernel. WARNING: CPU: 7 PID: 1995658 at arch/x86/kvm/../../../virt/kvm/kvm_main.c:171 RIP: 0010:kvm_is_zone_device_pfn.part.0+0x9e/0xd0 [kvm] Call Trace: kvm_set_pfn_dirty+0x120/0x1d0 [kvm] __handle_changed_spte+0x92e/0xca0 [kvm] __handle_changed_spte+0x63c/0xca0 [kvm] __handle_changed_spte+0x63c/0xca0 [kvm] __handle_changed_spte+0x63c/0xca0 [kvm] zap_gfn_range+0x549/0x620 [kvm] kvm_tdp_mmu_put_root+0x1b6/0x270 [kvm] mmu_free_root_page+0x219/0x2c0 [kvm] kvm_mmu_free_roots+0x1b4/0x4e0 [kvm] kvm_mmu_unload+0x1c/0xa0 [kvm] kvm_arch_destroy_vm+0x1f2/0x5c0 [kvm] kvm_put_kvm+0x3b1/0x8b0 [kvm] kvm_vcpu_release+0x4e/0x70 [kvm] __fput+0x1f7/0x8c0 task_work_run+0xf8/0x1a0 do_exit+0x97b/0x2230 do_group_exit+0xda/0x2a0 get_signal+0x3be/0x1e50 arch_do_signal_or_restart+0x244/0x17f0 exit_to_user_mode_prepare+0xcb/0x120 syscall_exit_to_user_mode+0x1d/0x40 do_syscall_64+0x4d/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Note, the underlying bug existed even before commit 1af4a96025b3 ("KVM: x86/mmu: Yield in TDU MMU iter even if no SPTES changed") moved calls to tdp_mmu_iter_cond_resched() to the beginning of loops, as KVM could still incorrectly advance past a top-level entry when yielding on a lower-level entry. But with respect to leaking shadow pages, the bug was introduced by yielding before processing the current gfn. Alternatively, tdp_mmu_iter_cond_resched() could simply fall through, or callers could jump to their "retry" label. The downside of that approach is that tdp_mmu_iter_cond_resched() _must_ be called before anything else in the loop, and there's no easy way to enfornce that requirement. Ideally, KVM would handling the cond_resched() fully within the iterator macro (the code is actually quite clean) and avoid this entire class of bugs, but that is extremely difficult do while also supporting yielding after tdp_mmu_set_spte_atomic() fails. Yielding after failing to set a SPTE is very desirable as the "owner" of the REMOVED_SPTE isn't strictly bounded, e.g. if it's zapping a high-level shadow page, the REMOVED_SPTE may block operations on the SPTE for a significant amount of time. Fixes: faaf05b00aec ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU") Fixes: 1af4a96025b3 ("KVM: x86/mmu: Yield in TDU MMU iter even if no SPTES changed") Reported-by: Ignat Korchagin Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20211214033528.123268-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_iter.c | 6 ++++++ arch/x86/kvm/mmu/tdp_iter.h | 6 ++++++ arch/x86/kvm/mmu/tdp_mmu.c | 29 ++++++++++++++++------------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index b3ed302c1a359..caa96c270b954 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -26,6 +26,7 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level) */ void tdp_iter_restart(struct tdp_iter *iter) { + iter->yielded = false; iter->yielded_gfn = iter->next_last_level_gfn; iter->level = iter->root_level; @@ -160,6 +161,11 @@ static bool try_step_up(struct tdp_iter *iter) */ void tdp_iter_next(struct tdp_iter *iter) { + if (iter->yielded) { + tdp_iter_restart(iter); + return; + } + if (try_step_down(iter)) return; diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index b1748b988d3ae..e19cabbcb65c8 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -45,6 +45,12 @@ struct tdp_iter { * iterator walks off the end of the paging structure. */ bool valid; + /* + * True if KVM dropped mmu_lock and yielded in the middle of a walk, in + * which case tdp_iter_next() needs to restart the walk at the root + * level instead of advancing to the next entry. + */ + bool yielded; }; /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 1db8496259add..1beb4ca905609 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -502,6 +502,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte) { + WARN_ON_ONCE(iter->yielded); + lockdep_assert_held_read(&kvm->mmu_lock); /* @@ -575,6 +577,8 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte, bool record_acc_track, bool record_dirty_log) { + WARN_ON_ONCE(iter->yielded); + lockdep_assert_held_write(&kvm->mmu_lock); /* @@ -640,18 +644,19 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm, * If this function should yield and flush is set, it will perform a remote * TLB flush before yielding. * - * If this function yields, it will also reset the tdp_iter's walk over the - * paging structure and the calling function should skip to the next - * iteration to allow the iterator to continue its traversal from the - * paging structure root. + * If this function yields, iter->yielded is set and the caller must skip to + * the next iteration, where tdp_iter_next() will reset the tdp_iter's walk + * over the paging structures to allow the iterator to continue its traversal + * from the paging structure root. * - * Return true if this function yielded and the iterator's traversal was reset. - * Return false if a yield was not needed. + * Returns true if this function yielded. */ -static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, - struct tdp_iter *iter, bool flush, - bool shared) +static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm, + struct tdp_iter *iter, + bool flush, bool shared) { + WARN_ON(iter->yielded); + /* Ensure forward progress has been made before yielding. */ if (iter->next_last_level_gfn == iter->yielded_gfn) return false; @@ -671,12 +676,10 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, WARN_ON(iter->gfn > iter->next_last_level_gfn); - tdp_iter_restart(iter); - - return true; + iter->yielded = true; } - return false; + return iter->yielded; } /* From c5063551bfcae4e48fec890b7bf369598b77526b Mon Sep 17 00:00:00 2001 From: Marc Orr Date: Thu, 9 Dec 2021 07:52:57 -0800 Subject: [PATCH 300/361] KVM: x86: Always set kvm_run->if_flag The kvm_run struct's if_flag is a part of the userspace/kernel API. The SEV-ES patches failed to set this flag because it's no longer needed by QEMU (according to the comment in the source code). However, other hypervisors may make use of this flag. Therefore, set the flag for guests with encrypted registers (i.e., with guest_state_protected set). Fixes: f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under SEV-ES") Signed-off-by: Marc Orr Message-Id: <20211209155257.128747-1-marcorr@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Reviewed-by: Maxim Levitsky --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/svm.c | 21 ++++++++++++--------- arch/x86/kvm/vmx/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 9 +-------- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index cefe1d81e2e8b..9e50da3ed01a3 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -47,6 +47,7 @@ KVM_X86_OP(set_dr7) KVM_X86_OP(cache_reg) KVM_X86_OP(get_rflags) KVM_X86_OP(set_rflags) +KVM_X86_OP(get_if_flag) KVM_X86_OP(tlb_flush_all) KVM_X86_OP(tlb_flush_current) KVM_X86_OP_NULL(tlb_remote_flush) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2164b9f4c7b0f..555f4de47ef29 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1349,6 +1349,7 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + bool (*get_if_flag)(struct kvm_vcpu *vcpu); void (*tlb_flush_all)(struct kvm_vcpu *vcpu); void (*tlb_flush_current)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d0f68d11ec70b..5151efa424acb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1585,6 +1585,15 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } +static bool svm_get_if_flag(struct kvm_vcpu *vcpu) +{ + struct vmcb *vmcb = to_svm(vcpu)->vmcb; + + return sev_es_guest(vcpu->kvm) + ? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK + : kvm_get_rflags(vcpu) & X86_EFLAGS_IF; +} + static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { @@ -3568,14 +3577,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; - if (sev_es_guest(vcpu->kvm)) { - /* - * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask - * bit to determine the state of the IF flag. - */ - if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK)) - return true; - } else if (is_guest_mode(vcpu)) { + if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) ? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF) @@ -3586,7 +3588,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (nested_exit_on_intr(svm)) return false; } else { - if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) + if (!svm_get_if_flag(vcpu)) return true; } @@ -4621,6 +4623,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, + .get_if_flag = svm_get_if_flag, .tlb_flush_all = svm_flush_tlb, .tlb_flush_current = svm_flush_tlb, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5aadad3e73675..9f7604cbba41d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1363,6 +1363,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) vmx->emulation_required = vmx_emulation_required(vcpu); } +static bool vmx_get_if_flag(struct kvm_vcpu *vcpu) +{ + return vmx_get_rflags(vcpu) & X86_EFLAGS_IF; +} + u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); @@ -7579,6 +7584,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, + .get_if_flag = vmx_get_if_flag, .tlb_flush_all = vmx_flush_tlb_all, .tlb_flush_current = vmx_flush_tlb_current, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d490b83d640cf..e50e97ac44084 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9001,14 +9001,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - /* - * if_flag is obsolete and useless, so do not bother - * setting it for SEV-ES guests. Userspace can just - * use kvm_run->ready_for_interrupt_injection. - */ - kvm_run->if_flag = !vcpu->arch.guest_state_protected - && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; - + kvm_run->if_flag = static_call(kvm_x86_get_if_flag)(vcpu); kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); From 577e022b7b41854911dcfb03678d8d2b930e8a3f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 14 Dec 2021 16:18:42 +0100 Subject: [PATCH 301/361] selftests: KVM: Fix non-x86 compiling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attempting to compile on a non-x86 architecture fails with include/kvm_util.h: In function ‘vm_compute_max_gfn’: include/kvm_util.h:79:21: error: dereferencing pointer to incomplete type ‘struct kvm_vm’ return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; ^~ This is because the declaration of struct kvm_vm is in lib/kvm_util_internal.h as an effort to make it private to the test lib code. We can still provide arch specific functions, though, by making the generic function symbols weak. Do that to fix the compile error. Fixes: c8cc43c1eae2 ("selftests: KVM: avoid failures due to reserved HyperTransport region") Cc: stable@vger.kernel.org Signed-off-by: Andrew Jones Message-Id: <20211214151842.848314-1-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 10 +--------- tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++++ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index da2b702da71a4..2d62edc49d67f 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -71,15 +71,6 @@ enum vm_guest_mode { #endif -#if defined(__x86_64__) -unsigned long vm_compute_max_gfn(struct kvm_vm *vm); -#else -static inline unsigned long vm_compute_max_gfn(struct kvm_vm *vm) -{ - return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; -} -#endif - #define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) #define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) @@ -330,6 +321,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm); unsigned int vm_get_page_size(struct kvm_vm *vm); unsigned int vm_get_page_shift(struct kvm_vm *vm); +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); uint64_t vm_get_max_gfn(struct kvm_vm *vm); int vm_get_fd(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index daf6fdb217a76..53d2b5d04b829 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2328,6 +2328,11 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm) return vm->page_shift; } +unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm) +{ + return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; +} + uint64_t vm_get_max_gfn(struct kvm_vm *vm) { return vm->max_gfn; From a80dfc025924024d2c61a4c1b8ef62b2fce76a04 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 19:30:03 +0000 Subject: [PATCH 302/361] KVM: VMX: Always clear vmx->fail on emulation_required Revert a relatively recent change that set vmx->fail if the vCPU is in L2 and emulation_required is true, as that behavior is completely bogus. Setting vmx->fail and synthesizing a VM-Exit is contradictory and wrong: (a) it's impossible to have both a VM-Fail and VM-Exit (b) vmcs.EXIT_REASON is not modified on VM-Fail (c) emulation_required refers to guest state and guest state checks are always VM-Exits, not VM-Fails. For KVM specifically, emulation_required is handled before nested exits in __vmx_handle_exit(), thus setting vmx->fail has no immediate effect, i.e. KVM calls into handle_invalid_guest_state() and vmx->fail is ignored. Setting vmx->fail can ultimately result in a WARN in nested_vmx_vmexit() firing when tearing down the VM as KVM never expects vmx->fail to be set when L2 is active, KVM always reflects those errors into L1. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 21158 at arch/x86/kvm/vmx/nested.c:4548 nested_vmx_vmexit+0x16bd/0x17e0 arch/x86/kvm/vmx/nested.c:4547 Modules linked in: CPU: 0 PID: 21158 Comm: syz-executor.1 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:nested_vmx_vmexit+0x16bd/0x17e0 arch/x86/kvm/vmx/nested.c:4547 Code: <0f> 0b e9 2e f8 ff ff e8 57 b3 5d 00 0f 0b e9 00 f1 ff ff 89 e9 80 Call Trace: vmx_leave_nested arch/x86/kvm/vmx/nested.c:6220 [inline] nested_vmx_free_vcpu+0x83/0xc0 arch/x86/kvm/vmx/nested.c:330 vmx_free_vcpu+0x11f/0x2a0 arch/x86/kvm/vmx/vmx.c:6799 kvm_arch_vcpu_destroy+0x6b/0x240 arch/x86/kvm/x86.c:10989 kvm_vcpu_destroy+0x29/0x90 arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 kvm_free_vcpus arch/x86/kvm/x86.c:11426 [inline] kvm_arch_destroy_vm+0x3ef/0x6b0 arch/x86/kvm/x86.c:11545 kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1189 [inline] kvm_put_kvm+0x751/0xe40 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1220 kvm_vcpu_release+0x53/0x60 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3489 __fput+0x3fc/0x870 fs/file_table.c:280 task_work_run+0x146/0x1c0 kernel/task_work.c:164 exit_task_work include/linux/task_work.h:32 [inline] do_exit+0x705/0x24f0 kernel/exit.c:832 do_group_exit+0x168/0x2d0 kernel/exit.c:929 get_signal+0x1740/0x2120 kernel/signal.c:2852 arch_do_signal_or_restart+0x9c/0x730 arch/x86/kernel/signal.c:868 handle_signal_work kernel/entry/common.c:148 [inline] exit_to_user_mode_loop kernel/entry/common.c:172 [inline] exit_to_user_mode_prepare+0x191/0x220 kernel/entry/common.c:207 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] syscall_exit_to_user_mode+0x2e/0x70 kernel/entry/common.c:300 do_syscall_64+0x53/0xd0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: c8607e4a086f ("KVM: x86: nVMX: don't fail nested VM entry on invalid guest state if !from_vmentry") Reported-by: syzbot+f1d2136db9c80d4733e8@syzkaller.appspotmail.com Reviewed-by: Maxim Levitsky Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20211207193006.120997-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9f7604cbba41d..5d4d74dd76f52 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6613,9 +6613,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) * consistency check VM-Exit due to invalid guest state and bail. */ if (unlikely(vmx->emulation_required)) { - - /* We don't emulate invalid state of a nested guest */ - vmx->fail = is_guest_mode(vcpu); + vmx->fail = 0; vmx->exit_reason.full = EXIT_REASON_INVALID_STATE; vmx->exit_reason.failed_vmentry = 1; From cd0e615c49e5e5d69885af9ac3b4fa7bb3387f58 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 19:30:04 +0000 Subject: [PATCH 303/361] KVM: nVMX: Synthesize TRIPLE_FAULT for L2 if emulation is required MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Synthesize a triple fault if L2 guest state is invalid at the time of VM-Enter, which can happen if L1 modifies SMRAM or if userspace stuffs guest state via ioctls(), e.g. KVM_SET_SREGS. KVM should never emulate invalid guest state, since from L1's perspective, it's architecturally impossible for L2 to have invalid state while L2 is running in hardware. E.g. attempts to set CR0 or CR4 to unsupported values will either VM-Exit or #GP. Modifying vCPU state via RSM+SMRAM and ioctl() are the only paths that can trigger this scenario, as nested VM-Enter correctly rejects any attempt to enter L2 with invalid state. RSM is a straightforward case as (a) KVM follows AMD's SMRAM layout and behavior, and (b) Intel's SDM states that loading reserved CR0/CR4 bits via RSM results in shutdown, i.e. there is precedent for KVM's behavior. Following AMD's SMRAM layout is important as AMD's layout saves/restores the descriptor cache information, including CS.RPL and SS.RPL, and also defines all the fields relevant to invalid guest state as read-only, i.e. so long as the vCPU had valid state before the SMI, which is guaranteed for L2, RSM will generate valid state unless SMRAM was modified. Intel's layout saves/restores only the selector, which means that scenarios where the selector and cached RPL don't match, e.g. conforming code segments, would yield invalid guest state. Intel CPUs fudge around this issued by stuffing SS.RPL and CS.RPL on RSM. Per Intel's SDM on the "Default Treatment of RSM", paraphrasing for brevity: IF internal storage indicates that the [CPU was post-VMXON] THEN enter VMX operation (root or non-root); restore VMX-critical state as defined in Section 34.14.1; set to their fixed values any bits in CR0 and CR4 whose values must be fixed in VMX operation [unless coming from an unrestricted guest]; IF RFLAGS.VM = 0 AND (in VMX root operation OR the “unrestricted guest” VM-execution control is 0) THEN CS.RPL := SS.DPL; SS.RPL := SS.DPL; FI; restore current VMCS pointer; FI; Note that Intel CPUs also overwrite the fixed CR0/CR4 bits, whereas KVM will sythesize TRIPLE_FAULT in this scenario. KVM's behavior is allowed as both Intel and AMD define CR0/CR4 SMRAM fields as read-only, i.e. the only way for CR0 and/or CR4 to have illegal values is if they were modified by the L1 SMM handler, and Intel's SDM "SMRAM State Save Map" section states "modifying these registers will result in unpredictable behavior". KVM's ioctl() behavior is less straightforward. Because KVM allows ioctls() to be executed in any order, rejecting an ioctl() if it would result in invalid L2 guest state is not an option as KVM cannot know if a future ioctl() would resolve the invalid state, e.g. KVM_SET_SREGS, or drop the vCPU out of L2, e.g. KVM_SET_NESTED_STATE. Ideally, KVM would reject KVM_RUN if L2 contained invalid guest state, but that carries the risk of a false positive, e.g. if RSM loaded invalid guest state and KVM exited to userspace. Setting a flag/request to detect such a scenario is undesirable because (a) it's extremely unlikely to add value to KVM as a whole, and (b) KVM would need to consider ioctl() interactions with such a flag, e.g. if userspace migrated the vCPU while the flag were set. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20211207193006.120997-3-seanjc@google.com> Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5d4d74dd76f52..5974a88c9d358 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5882,18 +5882,14 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) vmx_flush_pml_buffer(vcpu); /* - * We should never reach this point with a pending nested VM-Enter, and - * more specifically emulation of L2 due to invalid guest state (see - * below) should never happen as that means we incorrectly allowed a - * nested VM-Enter with an invalid vmcs12. + * KVM should never reach this point with a pending nested VM-Enter. + * More specifically, short-circuiting VM-Entry to emulate L2 due to + * invalid guest state should never happen as that means KVM knowingly + * allowed a nested VM-Enter with an invalid vmcs12. More below. */ if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm)) return -EIO; - /* If guest state is invalid, start emulating */ - if (vmx->emulation_required) - return handle_invalid_guest_state(vcpu); - if (is_guest_mode(vcpu)) { /* * PML is never enabled when running L2, bail immediately if a @@ -5915,10 +5911,30 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) */ nested_mark_vmcs12_pages_dirty(vcpu); + /* + * Synthesize a triple fault if L2 state is invalid. In normal + * operation, nested VM-Enter rejects any attempt to enter L2 + * with invalid state. However, those checks are skipped if + * state is being stuffed via RSM or KVM_SET_NESTED_STATE. If + * L2 state is invalid, it means either L1 modified SMRAM state + * or userspace provided bad state. Synthesize TRIPLE_FAULT as + * doing so is architecturally allowed in the RSM case, and is + * the least awful solution for the userspace case without + * risking false positives. + */ + if (vmx->emulation_required) { + nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); + return 1; + } + if (nested_vmx_reflect_vmexit(vcpu)) return 1; } + /* If guest state is invalid, start emulating. L2 is handled above. */ + if (vmx->emulation_required) + return handle_invalid_guest_state(vcpu); + if (exit_reason.failed_vmentry) { dump_vmcs(vcpu); vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; From 0ff29701ffad9a5d5a24344d8b09f3af7b96ffda Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 19:30:05 +0000 Subject: [PATCH 304/361] KVM: VMX: Fix stale docs for kvm-intel.emulate_invalid_guest_state Update the documentation for kvm-intel's emulate_invalid_guest_state to rectify the description of KVM's default behavior, and to document that the behavior and thus parameter only applies to L1. Fixes: a27685c33acc ("KVM: VMX: Emulate invalid guest state by default") Signed-off-by: Sean Christopherson Message-Id: <20211207193006.120997-4-seanjc@google.com> Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- Documentation/admin-guide/kernel-parameters.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9725c546a0d46..fc34332c8d9a6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2413,8 +2413,12 @@ Default is 1 (enabled) kvm-intel.emulate_invalid_guest_state= - [KVM,Intel] Enable emulation of invalid guest states - Default is 0 (disabled) + [KVM,Intel] Disable emulation of invalid guest state. + Ignored if kvm-intel.enable_unrestricted_guest=1, as + guest state is never invalid for unrestricted guests. + This param doesn't apply to nested guests (L2), as KVM + never emulates invalid L2 guest state. + Default is 1 (enabled) kvm-intel.flexpriority= [KVM,Intel] Disable FlexPriority feature (TPR shadow). From ab1ef34416a65ba11f66ae6435fcf0251cb46fd4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 19:30:06 +0000 Subject: [PATCH 305/361] KVM: selftests: Add test to verify TRIPLE_FAULT on invalid L2 guest state Add a selftest to attempt to enter L2 with invalid guests state by exiting to userspace via I/O from L2, and then using KVM_SET_SREGS to set invalid guest state (marking TR unusable is arbitrary chosen for its relative simplicity). This is a regression test for a bug introduced by commit c8607e4a086f ("KVM: x86: nVMX: don't fail nested VM entry on invalid guest state if !from_vmentry"), which incorrectly set vmx->fail=true when L2 had invalid guest state and ultimately triggered a WARN due to nested_vmx_vmexit() seeing vmx->fail==true while attempting to synthesize a nested VM-Exit. The is also a functional test to verify that KVM sythesizes TRIPLE_FAULT for L2, which is somewhat arbitrary behavior, instead of emulating L2. KVM should never emulate L2 due to invalid guest state, as it's architecturally impossible for L1 to run an L2 guest with invalid state as nested VM-Enter should always fail, i.e. L1 needs to do the emulation. Stuffing state via KVM ioctl() is a non-architctural, out-of-band case, hence the TRIPLE_FAULT being rather arbitrary. Signed-off-by: Sean Christopherson Message-Id: <20211207193006.120997-5-seanjc@google.com> Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../x86_64/vmx_invalid_nested_guest_state.c | 105 ++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 00814c0f87a67..3cb5ac5da0875 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -35,6 +35,7 @@ /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test +/x86_64/vmx_invalid_nested_guest_state /x86_64/vmx_preemption_timer_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index f307c9f619815..17342b575e855 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -64,6 +64,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c new file mode 100644 index 0000000000000..489fbed4ca6fe --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include + +#include "kselftest.h" + +#define VCPU_ID 0 +#define ARBITRARY_IO_PORT 0x2000 + +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + /* + * Generate an exit to L0 userspace, i.e. main(), via I/O to an + * arbitrary port. + */ + asm volatile("inb %%dx, %%al" + : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * L2 must be run without unrestricted guest, verify that the selftests + * library hasn't enabled it. Because KVM selftests jump directly to + * 64-bit mode, unrestricted guest support isn't required. + */ + GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) || + !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST)); + + GUEST_ASSERT(!vmlaunch()); + + /* L2 should triple fault after main() stuffs invalid guest state. */ + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva; + struct kvm_sregs sregs; + struct kvm_run *run; + struct ucall uc; + + nested_vmx_check_supported(); + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + + vcpu_run(vm, VCPU_ID); + + run = vcpu_state(vm, VCPU_ID); + + /* + * The first exit to L0 userspace should be an I/O access from L2. + * Running L1 should launch L2 without triggering an exit to userspace. + */ + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Expected KVM_EXIT_IO, got: %u (%s)\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + + TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT, + "Expected IN from port %d from L2, got port %d", + ARBITRARY_IO_PORT, run->io.port); + + /* + * Stuff invalid guest state for L2 by making TR unusuable. The next + * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support + * emulating invalid guest state for L2. + */ + memset(&sregs, 0, sizeof(sregs)); + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.tr.unusable = 1; + vcpu_sregs_set(vm, VCPU_ID, &sregs); + + vcpu_run(vm, VCPU_ID); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + TEST_FAIL("%s", (const char *)uc.args[0]); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} From 484730e5862f6b872dca13840bed40fd7c60fa26 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 8 Dec 2021 11:06:52 +0100 Subject: [PATCH 306/361] parisc: Clear stale IIR value on instruction access rights trap When a trap 7 (Instruction access rights) occurs, this means the CPU couldn't execute an instruction due to missing execute permissions on the memory region. In this case it seems the CPU didn't even fetched the instruction from memory and thus did not store it in the cr19 (IIR) register before calling the trap handler. So, the trap handler will find some random old stale value in cr19. This patch simply overwrites the stale IIR value with a constant magic "bad food" value (0xbaadf00d), in the hope people don't start to try to understand the various random IIR values in trap 7 dumps. Noticed-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index b11fb26ce2998..892b7fc8f3c45 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -730,6 +730,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) } mmap_read_unlock(current->mm); } + /* CPU could not fetch instruction, so clear stale IIR value. */ + regs->iir = 0xbaadf00d; fallthrough; case 27: /* Data memory protection ID trap */ From 8f905c0e7354ef261360fb7535ea079b1082c105 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Dec 2021 06:33:30 -0800 Subject: [PATCH 307/361] inet: fully convert sk->sk_rx_dst to RCU rules syzbot reported various issues around early demux, one being included in this changelog [1] sk->sk_rx_dst is using RCU protection without clearly documenting it. And following sequences in tcp_v4_do_rcv()/tcp_v6_do_rcv() are not following standard RCU rules. [a] dst_release(dst); [b] sk->sk_rx_dst = NULL; They look wrong because a delete operation of RCU protected pointer is supposed to clear the pointer before the call_rcu()/synchronize_rcu() guarding actual memory freeing. In some cases indeed, dst could be freed before [b] is done. We could cheat by clearing sk_rx_dst before calling dst_release(), but this seems the right time to stick to standard RCU annotations and debugging facilities. [1] BUG: KASAN: use-after-free in dst_check include/net/dst.h:470 [inline] BUG: KASAN: use-after-free in tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792 Read of size 2 at addr ffff88807f1cb73a by task syz-executor.5/9204 CPU: 0 PID: 9204 Comm: syz-executor.5 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0x8d/0x320 mm/kasan/report.c:247 __kasan_report mm/kasan/report.c:433 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:450 dst_check include/net/dst.h:470 [inline] tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792 ip_rcv_finish_core.constprop.0+0x15de/0x1e80 net/ipv4/ip_input.c:340 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline] ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline] __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556 __netif_receive_skb_list net/core/dev.c:5608 [inline] netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699 gro_normal_list net/core/dev.c:5853 [inline] gro_normal_list net/core/dev.c:5849 [inline] napi_complete_done+0x1f1/0x880 net/core/dev.c:6590 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557 __napi_poll+0xaf/0x440 net/core/dev.c:7023 napi_poll net/core/dev.c:7090 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7177 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:637 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649 common_interrupt+0x52/0xc0 arch/x86/kernel/irq.c:240 asm_common_interrupt+0x1e/0x40 arch/x86/include/asm/idtentry.h:629 RIP: 0033:0x7f5e972bfd57 Code: 39 d1 73 14 0f 1f 80 00 00 00 00 48 8b 50 f8 48 83 e8 08 48 39 ca 77 f3 48 39 c3 73 3e 48 89 13 48 8b 50 f8 48 89 38 49 8b 0e <48> 8b 3e 48 83 c3 08 48 83 c6 08 eb bc 48 39 d1 72 9e 48 39 d0 73 RSP: 002b:00007fff8a413210 EFLAGS: 00000283 RAX: 00007f5e97108990 RBX: 00007f5e97108338 RCX: ffffffff81d3aa45 RDX: ffffffff81d3aa45 RSI: 00007f5e97108340 RDI: ffffffff81d3aa45 RBP: 00007f5e97107eb8 R08: 00007f5e97108d88 R09: 0000000093c2e8d9 R10: 0000000000000000 R11: 0000000000000000 R12: 00007f5e97107eb0 R13: 00007f5e97108338 R14: 00007f5e97107ea8 R15: 0000000000000019 Allocated by task 13: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] __kasan_slab_alloc+0x90/0xc0 mm/kasan/common.c:467 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3234 [inline] slab_alloc mm/slub.c:3242 [inline] kmem_cache_alloc+0x202/0x3a0 mm/slub.c:3247 dst_alloc+0x146/0x1f0 net/core/dst.c:92 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613 ip_route_input_slow+0x1817/0x3a20 net/ipv4/route.c:2340 ip_route_input_rcu net/ipv4/route.c:2470 [inline] ip_route_input_noref+0x116/0x2a0 net/ipv4/route.c:2415 ip_rcv_finish_core.constprop.0+0x288/0x1e80 net/ipv4/ip_input.c:354 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline] ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline] __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556 __netif_receive_skb_list net/core/dev.c:5608 [inline] netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699 gro_normal_list net/core/dev.c:5853 [inline] gro_normal_list net/core/dev.c:5849 [inline] napi_complete_done+0x1f1/0x880 net/core/dev.c:6590 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557 __napi_poll+0xaf/0x440 net/core/dev.c:7023 napi_poll net/core/dev.c:7090 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7177 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Freed by task 13: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free mm/kasan/common.c:328 [inline] __kasan_slab_free+0xff/0x130 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:235 [inline] slab_free_hook mm/slub.c:1723 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1749 slab_free mm/slub.c:3513 [inline] kmem_cache_free+0xbd/0x5d0 mm/slub.c:3530 dst_destroy+0x2d6/0x3f0 net/core/dst.c:127 rcu_do_batch kernel/rcu/tree.c:2506 [inline] rcu_core+0x7ab/0x1470 kernel/rcu/tree.c:2741 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Last potentially related work creation: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 __kasan_record_aux_stack+0xf5/0x120 mm/kasan/generic.c:348 __call_rcu kernel/rcu/tree.c:2985 [inline] call_rcu+0xb1/0x740 kernel/rcu/tree.c:3065 dst_release net/core/dst.c:177 [inline] dst_release+0x79/0xe0 net/core/dst.c:167 tcp_v4_do_rcv+0x612/0x8d0 net/ipv4/tcp_ipv4.c:1712 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0x134/0x3b0 net/core/sock.c:2768 release_sock+0x54/0x1b0 net/core/sock.c:3300 tcp_sendmsg+0x36/0x40 net/ipv4/tcp.c:1441 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 sock_write_iter+0x289/0x3c0 net/socket.c:1057 call_write_iter include/linux/fs.h:2162 [inline] new_sync_write+0x429/0x660 fs/read_write.c:503 vfs_write+0x7cd/0xae0 fs/read_write.c:590 ksys_write+0x1ee/0x250 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88807f1cb700 which belongs to the cache ip_dst_cache of size 176 The buggy address is located 58 bytes inside of 176-byte region [ffff88807f1cb700, ffff88807f1cb7b0) The buggy address belongs to the page: page:ffffea0001fc72c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7f1cb flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 dead000000000100 dead000000000122 ffff8881413bb780 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_HARDWALL), pid 5, ts 108466983062, free_ts 108048976062 prep_new_page mm/page_alloc.c:2418 [inline] get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4149 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5369 alloc_pages+0x1a7/0x300 mm/mempolicy.c:2191 alloc_slab_page mm/slub.c:1793 [inline] allocate_slab mm/slub.c:1930 [inline] new_slab+0x32d/0x4a0 mm/slub.c:1993 ___slab_alloc+0x918/0xfe0 mm/slub.c:3022 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3109 slab_alloc_node mm/slub.c:3200 [inline] slab_alloc mm/slub.c:3242 [inline] kmem_cache_alloc+0x35c/0x3a0 mm/slub.c:3247 dst_alloc+0x146/0x1f0 net/core/dst.c:92 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613 __mkroute_output net/ipv4/route.c:2564 [inline] ip_route_output_key_hash_rcu+0x921/0x2d00 net/ipv4/route.c:2791 ip_route_output_key_hash+0x18b/0x300 net/ipv4/route.c:2619 __ip_route_output_key include/net/route.h:126 [inline] ip_route_output_flow+0x23/0x150 net/ipv4/route.c:2850 ip_route_output_key include/net/route.h:142 [inline] geneve_get_v4_rt+0x3a6/0x830 drivers/net/geneve.c:809 geneve_xmit_skb drivers/net/geneve.c:899 [inline] geneve_xmit+0xc4a/0x3540 drivers/net/geneve.c:1082 __netdev_start_xmit include/linux/netdevice.h:4994 [inline] netdev_start_xmit include/linux/netdevice.h:5008 [inline] xmit_one net/core/dev.c:3590 [inline] dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3606 __dev_queue_xmit+0x299a/0x3650 net/core/dev.c:4229 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1338 [inline] free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1389 free_unref_page_prepare mm/page_alloc.c:3309 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3388 qlink_free mm/kasan/quarantine.c:146 [inline] qlist_free_all+0x5a/0xc0 mm/kasan/quarantine.c:165 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:272 __kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:444 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3234 [inline] kmem_cache_alloc_node+0x255/0x3f0 mm/slub.c:3270 __alloc_skb+0x215/0x340 net/core/skbuff.c:414 alloc_skb include/linux/skbuff.h:1126 [inline] alloc_skb_with_frags+0x93/0x620 net/core/skbuff.c:6078 sock_alloc_send_pskb+0x783/0x910 net/core/sock.c:2575 mld_newpack+0x1df/0x770 net/ipv6/mcast.c:1754 add_grhead+0x265/0x330 net/ipv6/mcast.c:1857 add_grec+0x1053/0x14e0 net/ipv6/mcast.c:1995 mld_send_initial_cr.part.0+0xf6/0x230 net/ipv6/mcast.c:2242 mld_send_initial_cr net/ipv6/mcast.c:1232 [inline] mld_dad_work+0x1d3/0x690 net/ipv6/mcast.c:2268 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 worker_thread+0x658/0x11f0 kernel/workqueue.c:2445 Memory state around the buggy address: ffff88807f1cb600: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807f1cb680: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc >ffff88807f1cb700: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88807f1cb780: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc ffff88807f1cb800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20211220143330.680945-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp.c | 3 +-- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 11 +++++++---- net/ipv4/udp.c | 6 +++--- net/ipv6/tcp_ipv6.c | 11 +++++++---- net/ipv6/udp.c | 4 ++-- 8 files changed, 23 insertions(+), 18 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index bea21ff70e74d..d47e9658da285 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -431,7 +431,7 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct dst_entry *sk_rx_dst; + struct dst_entry __rcu *sk_rx_dst; int sk_rx_dst_ifindex; u32 sk_rx_dst_cookie; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0189e3cd4a7df..6b59565004361 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -154,7 +154,7 @@ void inet_sock_destruct(struct sock *sk) kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); - dst_release(sk->sk_rx_dst); + dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1)); sk_refcnt_debug_dec(sk); } EXPORT_SYMBOL(inet_sock_destruct); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bbb3d39c69afc..2bb28bfd83bf6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3012,8 +3012,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - dst_release(sk->sk_rx_dst); - sk->sk_rx_dst = NULL; + dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL)); tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->segs_in = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 246ab7b5e857e..0ce46849ec3d4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5787,7 +5787,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) trace_tcp_probe(sk, skb); tcp_mstamp_refresh(tp); - if (unlikely(!sk->sk_rx_dst)) + if (unlikely(!rcu_access_pointer(sk->sk_rx_dst))) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 13d868c432845..084df223b5dff 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1701,7 +1701,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) struct sock *rsk; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst; + + dst = rcu_dereference_protected(sk->sk_rx_dst, + lockdep_sock_is_held(sk)); sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); @@ -1709,8 +1712,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_rx_dst_ifindex != skb->skb_iif || !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check, dst, 0)) { + RCU_INIT_POINTER(sk->sk_rx_dst, NULL); dst_release(dst); - sk->sk_rx_dst = NULL; } } tcp_rcv_established(sk, skb); @@ -1786,7 +1789,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { - struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); @@ -2201,7 +2204,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { - sk->sk_rx_dst = dst; + rcu_assign_pointer(sk->sk_rx_dst, dst); sk->sk_rx_dst_ifindex = skb->skb_iif; } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 23b05e28490b0..15c6b450b8dba 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2250,7 +2250,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old; if (dst_hold_safe(dst)) { - old = xchg(&sk->sk_rx_dst, dst); + old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst); dst_release(old); return old != dst; } @@ -2440,7 +2440,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst != dst)) + if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp_sk_rx_dst_set(sk, dst); ret = udp_unicast_rcv_skb(sk, skb, uh); @@ -2599,7 +2599,7 @@ int udp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = READ_ONCE(sk->sk_rx_dst); + dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 551fce49841d7..680e6481b9672 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -107,7 +107,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) if (dst && dst_hold_safe(dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - sk->sk_rx_dst = dst; + rcu_assign_pointer(sk->sk_rx_dst, dst); sk->sk_rx_dst_ifindex = skb->skb_iif; sk->sk_rx_dst_cookie = rt6_get_cookie(rt); } @@ -1505,7 +1505,10 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst; + + dst = rcu_dereference_protected(sk->sk_rx_dst, + lockdep_sock_is_held(sk)); sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); @@ -1513,8 +1516,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_rx_dst_ifindex != skb->skb_iif || INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, dst, sk->sk_rx_dst_cookie) == NULL) { + RCU_INIT_POINTER(sk->sk_rx_dst, NULL); dst_release(dst); - sk->sk_rx_dst = NULL; } } @@ -1874,7 +1877,7 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { - struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, sk->sk_rx_dst_cookie); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e43b31d25fb61..a2caca6ccf114 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -956,7 +956,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst != dst)) + if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); if (!uh->check && !udp_sk(sk)->no_check6_rx) { @@ -1070,7 +1070,7 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = READ_ONCE(sk->sk_rx_dst); + dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, sk->sk_rx_dst_cookie); From a9725e1d3962ad00288c4ae6d9b518afc51b2adc Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 20 Dec 2021 09:46:08 -0500 Subject: [PATCH 308/361] docs: networking: replace skb_hwtstamp_tx with skb_tstamp_tx Tiny doc fix. The hardware transmit function was called skb_tstamp_tx from its introduction in commit ac45f602ee3d ("net: infrastructure for hardware time stamping") in the same series as this documentation. Fixes: cb9eff097831 ("net: new user space API for time stamping of incoming and outgoing packets") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211220144608.2783526-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/timestamping.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index 80b13353254a0..f5809206eb93d 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -582,8 +582,8 @@ Time stamps for outgoing packets are to be generated as follows: and hardware timestamping is not possible (SKBTX_IN_PROGRESS not set). - As soon as the driver has sent the packet and/or obtained a hardware time stamp for it, it passes the time stamp back by - calling skb_hwtstamp_tx() with the original skb, the raw - hardware time stamp. skb_hwtstamp_tx() clones the original skb and + calling skb_tstamp_tx() with the original skb, the raw + hardware time stamp. skb_tstamp_tx() clones the original skb and adds the timestamps, therefore the original skb has to be freed now. If obtaining the hardware time stamp somehow fails, then the driver should not fall back to software time stamping. The rationale is that From 7e5cced9ca84df52d874aca6b632f930b3dc5bc6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 20 Dec 2021 09:49:01 -0500 Subject: [PATCH 309/361] net: accept UFOv6 packages in virtio_net_hdr_to_skb Skb with skb->protocol 0 at the time of virtio_net_hdr_to_skb may have a protocol inferred from virtio_net_hdr with virtio_net_hdr_set_proto. Unlike TCP, UDP does not have separate types for IPv4 and IPv6. Type VIRTIO_NET_HDR_GSO_UDP is guessed to be IPv4/UDP. As of the below commit, UFOv6 packets are dropped due to not matching the protocol as obtained from dev_parse_header_protocol. Invert the test to take that L2 protocol field as starting point and pass both UFOv4 and UFOv6 for VIRTIO_NET_HDR_GSO_UDP. Fixes: 924a9bc362a5 ("net: check if protocol extracted by virtio_net_hdr_set_proto is correct") Link: https://lore.kernel.org/netdev/CABcq3pG9GRCYqFDBAJ48H1vpnnX=41u+MhQnayF1ztLH4WX0Fw@mail.gmail.com/ Reported-by: Andrew Melnichenko Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211220144901.2784030-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 04e87f4b9417c..22dd48c825600 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -7,6 +7,21 @@ #include #include +static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) +{ + switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + return protocol == cpu_to_be16(ETH_P_IP); + case VIRTIO_NET_HDR_GSO_TCPV6: + return protocol == cpu_to_be16(ETH_P_IPV6); + case VIRTIO_NET_HDR_GSO_UDP: + return protocol == cpu_to_be16(ETH_P_IP) || + protocol == cpu_to_be16(ETH_P_IPV6); + default: + return false; + } +} + static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { @@ -88,9 +103,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb->protocol) { __be16 protocol = dev_parse_header_protocol(skb); - virtio_net_hdr_set_proto(skb, hdr); - if (protocol && protocol != skb->protocol) + if (!protocol) + virtio_net_hdr_set_proto(skb, hdr); + else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) return -EINVAL; + else + skb->protocol = protocol; } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, From 1ed1d592113959f00cc552c3b9f47ca2d157768f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 20 Dec 2021 09:50:27 -0500 Subject: [PATCH 310/361] net: skip virtio_net_hdr_set_proto if protocol already set virtio_net_hdr_set_proto infers skb->protocol from the virtio_net_hdr gso_type, to avoid packets getting dropped for lack of a proto type. Its protocol choice is a guess, especially in the case of UFO, where the single VIRTIO_NET_HDR_GSO_UDP label covers both UFOv4 and UFOv6. Skip this best effort if the field is already initialized. Whether explicitly from userspace, or implicitly based on an earlier call to dev_parse_header_protocol (which is more robust, but was introduced after this patch). Fixes: 9d2f67e43b73 ("net/packet: fix packet drop as of virtio gso") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211220145027.2784293-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 22dd48c825600..a960de68ac69e 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -25,6 +25,9 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { + if (skb->protocol) + return 0; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: From 1f06f7d97f741667bab0f459a4f940b21cab1549 Mon Sep 17 00:00:00 2001 From: Jeroen de Borst Date: Mon, 20 Dec 2021 11:27:46 -0800 Subject: [PATCH 311/361] gve: Correct order of processing device options The legacy raw addressing device option was processed before the new RDA queue format option. This caused the supported features mask, which is provided only on the RDA queue format option, not to be set. This disabled jumbo-frame support when using raw adressing. Fixes: 255489f5b33c ("gve: Add a jumbo-frame device option") Signed-off-by: Jeroen de Borst Link: https://lore.kernel.org/r/20211220192746.2900594-1-jeroendb@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_adminq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 83ae56c310d3b..326b56b49216a 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -738,10 +738,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) * is not set to GqiRda, choose the queue format in a priority order: * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default. */ - if (priv->queue_format == GVE_GQI_RDA_FORMAT) { - dev_info(&priv->pdev->dev, - "Driver is running with GQI RDA queue format.\n"); - } else if (dev_op_dqo_rda) { + if (dev_op_dqo_rda) { priv->queue_format = GVE_DQO_RDA_FORMAT; dev_info(&priv->pdev->dev, "Driver is running with DQO RDA queue format.\n"); @@ -753,6 +750,9 @@ int gve_adminq_describe_device(struct gve_priv *priv) "Driver is running with GQI RDA queue format.\n"); supported_features_mask = be32_to_cpu(dev_op_gqi_rda->supported_features_mask); + } else if (priv->queue_format == GVE_GQI_RDA_FORMAT) { + dev_info(&priv->pdev->dev, + "Driver is running with GQI RDA queue format.\n"); } else { priv->queue_format = GVE_GQI_QPL_FORMAT; if (dev_op_gqi_qpl) From ac8c58f5b535d6272324e2b8b4a0454781c9147e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 20 Dec 2021 12:18:44 -0800 Subject: [PATCH 312/361] igb: fix deadlock caused by taking RTNL in RPM resume path Recent net core changes caused an issue with few Intel drivers (reportedly igb), where taking RTNL in RPM resume path results in a deadlock. See [0] for a bug report. I don't think the core changes are wrong, but taking RTNL in RPM resume path isn't needed. The Intel drivers are the only ones doing this. See [1] for a discussion on the issue. Following patch changes the RPM resume path to not take RTNL. [0] https://bugzilla.kernel.org/show_bug.cgi?id=215129 [1] https://lore.kernel.org/netdev/20211125074949.5f897431@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com/t/ Fixes: bd869245a3dc ("net: core: try to runtime-resume detached device in __dev_open") Fixes: f32a21376573 ("ethtool: runtime-resume netdev parent before ethtool ioctl ops") Tested-by: Martin Stolpe Signed-off-by: Heiner Kallweit Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20211220201844.2714498-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b597b8bfb9103..446894dde1820 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -9254,7 +9254,7 @@ static int __maybe_unused igb_suspend(struct device *dev) return __igb_shutdown(to_pci_dev(dev), NULL, 0); } -static int __maybe_unused igb_resume(struct device *dev) +static int __maybe_unused __igb_resume(struct device *dev, bool rpm) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -9297,17 +9297,24 @@ static int __maybe_unused igb_resume(struct device *dev) wr32(E1000_WUS, ~0); - rtnl_lock(); + if (!rpm) + rtnl_lock(); if (!err && netif_running(netdev)) err = __igb_open(netdev, true); if (!err) netif_device_attach(netdev); - rtnl_unlock(); + if (!rpm) + rtnl_unlock(); return err; } +static int __maybe_unused igb_resume(struct device *dev) +{ + return __igb_resume(dev, false); +} + static int __maybe_unused igb_runtime_idle(struct device *dev) { struct net_device *netdev = dev_get_drvdata(dev); @@ -9326,7 +9333,7 @@ static int __maybe_unused igb_runtime_suspend(struct device *dev) static int __maybe_unused igb_runtime_resume(struct device *dev) { - return igb_resume(dev); + return __igb_resume(dev, true); } static void igb_shutdown(struct pci_dev *pdev) @@ -9442,7 +9449,7 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot. Implementation - * resembles the first-half of the igb_resume routine. + * resembles the first-half of the __igb_resume routine. **/ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) { @@ -9482,7 +9489,7 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation. Implementation resembles the - * second-half of the igb_resume routine. + * second-half of the __igb_resume routine. */ static void igb_io_resume(struct pci_dev *pdev) { From ff31ee0a0f471776f67be5e5275c18d17736fc6b Mon Sep 17 00:00:00 2001 From: Yann Gautier Date: Wed, 15 Dec 2021 15:17:26 +0100 Subject: [PATCH 313/361] mmc: mmci: stm32: clear DLYB_CR after sending tuning command During test campaign, and especially after several unbind/bind sequences, it has been seen that the SD-card on SDMMC1 thread could freeze. The freeze always appear on a CMD23 following a CMD19. Checking SDMMC internal registers shows that the tuning command (CMD19) has failed. The freeze is then due to the delay block involved in the tuning sequence. To correct this, clear the delay block register DLYB_CR register after the tuning commands. Signed-off-by: Christophe Kerello Signed-off-by: Yann Gautier Reviewed-by: Linus Walleij Fixes: 1103f807a3b9 ("mmc: mmci_sdmmc: Add execute tuning with delay block") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211215141727.4901-4-yann.gautier@foss.st.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci_stm32_sdmmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index fdaa11f92fe6f..a75d3dd34d18c 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -441,6 +441,8 @@ static int sdmmc_dlyb_phase_tuning(struct mmci_host *host, u32 opcode) return -EINVAL; } + writel_relaxed(0, dlyb->base + DLYB_CR); + phase = end_of_len - max_len / 2; sdmmc_dlyb_set_cfgr(dlyb, dlyb->unit, phase, false); From ffb76a86f8096a8206be03b14adda6092e18e275 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Tue, 21 Dec 2021 15:00:34 +0800 Subject: [PATCH 314/361] ipmi: Fix UAF when uninstall ipmi_si and ipmi_msghandler module Hi, When testing install and uninstall of ipmi_si.ko and ipmi_msghandler.ko, the system crashed. The log as follows: [ 141.087026] BUG: unable to handle kernel paging request at ffffffffc09b3a5a [ 141.087241] PGD 8fe4c0d067 P4D 8fe4c0d067 PUD 8fe4c0f067 PMD 103ad89067 PTE 0 [ 141.087464] Oops: 0010 [#1] SMP NOPTI [ 141.087580] CPU: 67 PID: 668 Comm: kworker/67:1 Kdump: loaded Not tainted 4.18.0.x86_64 #47 [ 141.088009] Workqueue: events 0xffffffffc09b3a40 [ 141.088009] RIP: 0010:0xffffffffc09b3a5a [ 141.088009] Code: Bad RIP value. [ 141.088009] RSP: 0018:ffffb9094e2c3e88 EFLAGS: 00010246 [ 141.088009] RAX: 0000000000000000 RBX: ffff9abfdb1f04a0 RCX: 0000000000000000 [ 141.088009] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000246 [ 141.088009] RBP: 0000000000000000 R08: ffff9abfffee3cb8 R09: 00000000000002e1 [ 141.088009] R10: ffffb9094cb73d90 R11: 00000000000f4240 R12: ffff9abfffee8700 [ 141.088009] R13: 0000000000000000 R14: ffff9abfdb1f04a0 R15: ffff9abfdb1f04a8 [ 141.088009] FS: 0000000000000000(0000) GS:ffff9abfffec0000(0000) knlGS:0000000000000000 [ 141.088009] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 141.088009] CR2: ffffffffc09b3a30 CR3: 0000008fe4c0a001 CR4: 00000000007606e0 [ 141.088009] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 141.088009] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 141.088009] PKRU: 55555554 [ 141.088009] Call Trace: [ 141.088009] ? process_one_work+0x195/0x390 [ 141.088009] ? worker_thread+0x30/0x390 [ 141.088009] ? process_one_work+0x390/0x390 [ 141.088009] ? kthread+0x10d/0x130 [ 141.088009] ? kthread_flush_work_fn+0x10/0x10 [ 141.088009] ? ret_from_fork+0x35/0x40] BUG: unable to handle kernel paging request at ffffffffc0b28a5a [ 200.223240] PGD 97fe00d067 P4D 97fe00d067 PUD 97fe00f067 PMD a580cbf067 PTE 0 [ 200.223464] Oops: 0010 [#1] SMP NOPTI [ 200.223579] CPU: 63 PID: 664 Comm: kworker/63:1 Kdump: loaded Not tainted 4.18.0.x86_64 #46 [ 200.224008] Workqueue: events 0xffffffffc0b28a40 [ 200.224008] RIP: 0010:0xffffffffc0b28a5a [ 200.224008] Code: Bad RIP value. [ 200.224008] RSP: 0018:ffffbf3c8e2a3e88 EFLAGS: 00010246 [ 200.224008] RAX: 0000000000000000 RBX: ffffa0799ad6bca0 RCX: 0000000000000000 [ 200.224008] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000246 [ 200.224008] RBP: 0000000000000000 R08: ffff9fe43fde3cb8 R09: 00000000000000d5 [ 200.224008] R10: ffffbf3c8cb53d90 R11: 00000000000f4240 R12: ffff9fe43fde8700 [ 200.224008] R13: 0000000000000000 R14: ffffa0799ad6bca0 R15: ffffa0799ad6bca8 [ 200.224008] FS: 0000000000000000(0000) GS:ffff9fe43fdc0000(0000) knlGS:0000000000000000 [ 200.224008] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 200.224008] CR2: ffffffffc0b28a30 CR3: 00000097fe00a002 CR4: 00000000007606e0 [ 200.224008] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 200.224008] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 200.224008] PKRU: 55555554 [ 200.224008] Call Trace: [ 200.224008] ? process_one_work+0x195/0x390 [ 200.224008] ? worker_thread+0x30/0x390 [ 200.224008] ? process_one_work+0x390/0x390 [ 200.224008] ? kthread+0x10d/0x130 [ 200.224008] ? kthread_flush_work_fn+0x10/0x10 [ 200.224008] ? ret_from_fork+0x35/0x40 [ 200.224008] kernel fault(0x1) notification starting on CPU 63 [ 200.224008] kernel fault(0x1) notification finished on CPU 63 [ 200.224008] CR2: ffffffffc0b28a5a [ 200.224008] ---[ end trace c82a412d93f57412 ]--- The reason is as follows: T1: rmmod ipmi_si. ->ipmi_unregister_smi() -> ipmi_bmc_unregister() -> __ipmi_bmc_unregister() -> kref_put(&bmc->usecount, cleanup_bmc_device); -> schedule_work(&bmc->remove_work); T2: rmmod ipmi_msghandler. ipmi_msghander module uninstalled, and the module space will be freed. T3: bmc->remove_work doing cleanup the bmc resource. -> cleanup_bmc_work() -> platform_device_unregister(&bmc->pdev); -> platform_device_del(pdev); -> device_del(&pdev->dev); -> kobject_uevent(&dev->kobj, KOBJ_REMOVE); -> kobject_uevent_env() -> dev_uevent() -> if (dev->type && dev->type->name) 'dev->type'(bmc_device_type) pointer space has freed when uninstall ipmi_msghander module, 'dev->type->name' cause the system crash. drivers/char/ipmi/ipmi_msghandler.c: 2820 static const struct device_type bmc_device_type = { 2821 .groups = bmc_dev_attr_groups, 2822 }; Steps to reproduce: Add a time delay in cleanup_bmc_work() function, and uninstall ipmi_si and ipmi_msghandler module. 2910 static void cleanup_bmc_work(struct work_struct *work) 2911 { 2912 struct bmc_device *bmc = container_of(work, struct bmc_device, 2913 remove_work); 2914 int id = bmc->pdev.id; /* Unregister overwrites id */ 2915 2916 msleep(3000); <--- 2917 platform_device_unregister(&bmc->pdev); 2918 ida_simple_remove(&ipmi_bmc_ida, id); 2919 } Use 'remove_work_wq' instead of 'system_wq' to solve this issues. Fixes: b2cfd8ab4add ("ipmi: Rework device id and guid handling to catch changing BMCs") Signed-off-by: Wu Bo Message-Id: <1640070034-56671-1-git-send-email-wubo40@huawei.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 266c7bc58ddae..c59265146e9c8 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3031,7 +3031,7 @@ cleanup_bmc_device(struct kref *ref) * with removing the device attributes while reading a device * attribute. */ - schedule_work(&bmc->remove_work); + queue_work(remove_work_wq, &bmc->remove_work); } /* From 3e4d9a485029aa9e172dab5420abe775fd86f8e8 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 20 Dec 2021 14:06:56 +0100 Subject: [PATCH 315/361] gpio: virtio: remove timeout The driver imposes an arbitrary one second timeout on virtio requests, but the specification doesn't prevent the virtio device from taking longer to process requests, so remove this timeout to support all systems and device implementations. Fixes: 3a29355a22c0275fe86 ("gpio: Add virtio-gpio driver") Signed-off-by: Vincent Whitchurch Acked-by: Michael S. Tsirkin Acked-by: Viresh Kumar Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-virtio.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c index 84f96b78f32af..9f4941bc57604 100644 --- a/drivers/gpio/gpio-virtio.c +++ b/drivers/gpio/gpio-virtio.c @@ -100,11 +100,7 @@ static int _virtio_gpio_req(struct virtio_gpio *vgpio, u16 type, u16 gpio, virtqueue_kick(vgpio->request_vq); mutex_unlock(&vgpio->lock); - if (!wait_for_completion_timeout(&line->completion, HZ)) { - dev_err(dev, "GPIO operation timed out\n"); - ret = -ETIMEDOUT; - goto out; - } + wait_for_completion(&line->completion); if (unlikely(res->status != VIRTIO_GPIO_STATUS_OK)) { dev_err(dev, "GPIO request failed: %d\n", gpio); From fdba608f15e2427419997b0898750a49a735afcb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 21 Dec 2021 10:37:00 -0500 Subject: [PATCH 316/361] KVM: VMX: Wake vCPU when delivering posted IRQ even if vCPU == this vCPU Drop a check that guards triggering a posted interrupt on the currently running vCPU, and more importantly guards waking the target vCPU if triggering a posted interrupt fails because the vCPU isn't IN_GUEST_MODE. If a vIRQ is delivered from asynchronous context, the target vCPU can be the currently running vCPU and can also be blocking, in which case skipping kvm_vcpu_wake_up() is effectively dropping what is supposed to be a wake event for the vCPU. The "do nothing" logic when "vcpu == running_vcpu" mostly works only because the majority of calls to ->deliver_posted_interrupt(), especially when using posted interrupts, come from synchronous KVM context. But if a device is exposed to the guest using vfio-pci passthrough, the VFIO IRQ and vCPU are bound to the same pCPU, and the IRQ is _not_ configured to use posted interrupts, wake events from the device will be delivered to KVM from IRQ context, e.g. vfio_msihandler() | |-> eventfd_signal() | |-> ... | |-> irqfd_wakeup() | |->kvm_arch_set_irq_inatomic() | |-> kvm_irq_delivery_to_apic_fast() | |-> kvm_apic_set_irq() This also aligns the non-nested and nested usage of triggering posted interrupts, and will allow for additional cleanups. Fixes: 379a3c8ee444 ("KVM: VMX: Optimize posted-interrupt delivery for timer fastpath") Cc: stable@vger.kernel.org Reported-by: Longpeng (Mike) Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Message-Id: <20211208015236.1616697-18-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5974a88c9d358..0dbf94eb954fd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3964,8 +3964,7 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) if (pi_test_and_set_on(&vmx->pi_desc)) return 0; - if (vcpu != kvm_get_running_vcpu() && - !kvm_vcpu_trigger_posted_interrupt(vcpu, false)) + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); return 0; From 804034c4ffc502795cea9b3867acb2ec7fad99ba Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 10 Dec 2021 07:07:53 +0000 Subject: [PATCH 317/361] platform/mellanox: mlxbf-pmc: Fix an IS_ERR() vs NULL bug in mlxbf_pmc_map_counters The devm_ioremap() function returns NULL on error, it doesn't return error pointers. Also according to doc of device_property_read_u64_array, values in info array are properties of device or NULL. Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20211210070753.10761-1-linmq006@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxbf-pmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 04bc3b50aa7a4..65b4a819f1bdf 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -1374,8 +1374,8 @@ static int mlxbf_pmc_map_counters(struct device *dev) pmc->block[i].counters = info[2]; pmc->block[i].type = info[3]; - if (IS_ERR(pmc->block[i].mmio_base)) - return PTR_ERR(pmc->block[i].mmio_base); + if (!pmc->block[i].mmio_base) + return -ENOMEM; ret = mlxbf_pmc_create_groups(dev, i); if (ret) From 09fc14061f3ed28899c23b8714c066946fdbd43e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 10 Dec 2021 08:35:29 -0600 Subject: [PATCH 318/361] platform/x86: amd-pmc: only use callbacks for suspend This driver is intended to be used exclusively for suspend to idle so callbacks to send OS_HINT during hibernate and S5 will set OS_HINT at the wrong time leading to an undefined behavior. Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20211210143529.10594-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 841c44cd64c2c..230593ae5d6de 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -508,7 +508,8 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) } static const struct dev_pm_ops amd_pmc_pm_ops = { - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(amd_pmc_suspend, amd_pmc_resume) + .suspend_noirq = amd_pmc_suspend, + .resume_noirq = amd_pmc_resume, }; static const struct pci_device_id pmc_pci_ids[] = { From eb66fb03a727cde0ab9b1a3858de55c26f3007da Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Tue, 14 Dec 2021 04:18:36 -0800 Subject: [PATCH 319/361] platform/x86: apple-gmux: use resource_size() with res This should be (res->end - res->start + 1) here actually, use resource_size() derectly. Signed-off-by: Wang Qing Link: https://lore.kernel.org/r/1639484316-75873-1-git-send-email-wangqing@vivo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/apple-gmux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index 9aae45a452002..57553f9b4d1dc 100644 --- a/drivers/platform/x86/apple-gmux.c +++ b/drivers/platform/x86/apple-gmux.c @@ -625,7 +625,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) } gmux_data->iostart = res->start; - gmux_data->iolen = res->end - res->start; + gmux_data->iolen = resource_size(res); if (gmux_data->iolen < GMUX_MIN_IO_LEN) { pr_err("gmux I/O region too small (%lu < %u)\n", From 8f66fce0f46560b9e910787ff7ad0974441c4f9c Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 21 Dec 2021 13:21:22 -0500 Subject: [PATCH 320/361] parisc: Correct completer in lws start The completer in the "or,ev %r1,%r30,%r30" instruction is reversed, so we are not clipping the LWS number when we are called from a 32-bit process (W=0). We need to nulify the following depdi instruction when the least-significant bit of %r30 is 1. If the %r20 register is not clipped, a user process could perform a LWS call that would branch to an undefined location in the kernel and potentially crash the machine. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.19+ Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index d2497b339d139..65c88ca7a7acd 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -472,7 +472,7 @@ lws_start: extrd,u %r1,PSW_W_BIT,1,%r1 /* sp must be aligned on 4, so deposit the W bit setting into * the bottom of sp temporarily */ - or,ev %r1,%r30,%r30 + or,od %r1,%r30,%r30 /* Clip LWS number to a 32-bit value for 32-bit processes */ depdi 0, 31, 32, %r20 From d3a5a68cff47f6eead84504c3c28376b85053242 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 21 Dec 2021 13:33:16 -0500 Subject: [PATCH 321/361] parisc: Fix mask used to select futex spinlock The address bits used to select the futex spinlock need to match those used in the LWS code in syscall.S. The mask 0x3f8 only selects 7 bits. It should select 8 bits. This change fixes the glibc nptl/tst-cond24 and nptl/tst-cond25 tests. Signed-off-by: John David Anglin Fixes: 53a42b6324b8 ("parisc: Switch to more fine grained lws locks") Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/futex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h index 70cf8f0a7617b..9cd4dd6e63ad9 100644 --- a/arch/parisc/include/asm/futex.h +++ b/arch/parisc/include/asm/futex.h @@ -14,7 +14,7 @@ static inline void _futex_spin_lock(u32 __user *uaddr) { extern u32 lws_lock_start[]; - long index = ((long)uaddr & 0x3f8) >> 1; + long index = ((long)uaddr & 0x7f8) >> 1; arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index]; preempt_disable(); arch_spin_lock(s); @@ -24,7 +24,7 @@ static inline void _futex_spin_unlock(u32 __user *uaddr) { extern u32 lws_lock_start[]; - long index = ((long)uaddr & 0x3f8) >> 1; + long index = ((long)uaddr & 0x7f8) >> 1; arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index]; arch_spin_unlock(s); preempt_enable(); From cb8747b7d2a9e3d687a19a007575071d4b71cd05 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Mon, 15 Nov 2021 14:46:47 +0100 Subject: [PATCH 322/361] uapi: Fix undefined __always_inline on non-glibc systems This macro is defined by glibc itself, which makes the issue go unnoticed on those systems. On non-glibc systems it causes build failures on several utilities and libraries, like bpftool and objtool. Fixes: 1d509f2a6ebc ("x86/insn: Support big endian cross-compiles") Fixes: 2d7ce0e8a704 ("tools/virtio: more stubs") Fixes: 3fb321fde22d ("selftests/net: ipv6 flowlabel") Fixes: 50b3ed57dee9 ("selftests/bpf: test bpf flow dissection") Fixes: 9cacf81f8161 ("bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE") Fixes: a4b2061242ec ("tools include uapi: Grab a copy of linux/in.h") Fixes: b12d6ec09730 ("bpf: btf: add btf print functionality") Fixes: c0dd967818a2 ("tools, include: Grab a copy of linux/erspan.h") Fixes: c4b6014e8bb0 ("tools: Add copy of perf_event.h to tools/include/linux/") Signed-off-by: Ismael Luceno Acked-by: Masami Hiramatsu Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211115134647.1921-1-ismael@iodev.co.uk Cc: Martin Schwidefsky Cc: Vasily Gorbik --- include/uapi/linux/byteorder/big_endian.h | 1 + include/uapi/linux/byteorder/little_endian.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/uapi/linux/byteorder/big_endian.h b/include/uapi/linux/byteorder/big_endian.h index 2199adc6a6c20..80aa5c41a7636 100644 --- a/include/uapi/linux/byteorder/big_endian.h +++ b/include/uapi/linux/byteorder/big_endian.h @@ -9,6 +9,7 @@ #define __BIG_ENDIAN_BITFIELD #endif +#include #include #include diff --git a/include/uapi/linux/byteorder/little_endian.h b/include/uapi/linux/byteorder/little_endian.h index 601c904fd5cd9..cd98982e7523e 100644 --- a/include/uapi/linux/byteorder/little_endian.h +++ b/include/uapi/linux/byteorder/little_endian.h @@ -9,6 +9,7 @@ #define __LITTLE_ENDIAN_BITFIELD #endif +#include #include #include From dcce50e6cc4d86a63dc0a9a6ee7d4f948ccd53a1 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 8 Nov 2021 14:35:59 -0800 Subject: [PATCH 323/361] compiler.h: Fix annotation macro misplacement with Clang When building with Clang and CONFIG_TRACE_BRANCH_PROFILING, there are a lot of unreachable warnings, like: arch/x86/kernel/traps.o: warning: objtool: handle_xfd_event()+0x134: unreachable instruction Without an input to the inline asm, 'volatile' is ignored for some reason and Clang feels free to move the reachable() annotation away from its intended location. Fix that by re-adding the counter value to the inputs. Fixes: f1069a8756b9 ("compiler.h: Avoid using inline asm operand modifiers") Fixes: c199f64ff93c ("instrumentation.h: Avoid using inline asm operand modifiers") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/0417e96909b97a406323409210de7bf13df0b170.1636410380.git.jpoimboe@redhat.com Cc: Peter Zijlstra Cc: x86@kernel.org Cc: Vasily Gorbik Cc: Miroslav Benes --- include/linux/compiler.h | 4 ++-- include/linux/instrumentation.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 3d5af56337bdb..429dcebe2b992 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -121,7 +121,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.reachable\n\t" \ ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define annotate_reachable() __annotate_reachable(__COUNTER__) @@ -129,7 +129,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h index fa2cd8c63dcc9..24359b4a96053 100644 --- a/include/linux/instrumentation.h +++ b/include/linux/instrumentation.h @@ -11,7 +11,7 @@ asm volatile(__stringify(c) ": nop\n\t" \ ".pushsection .discard.instr_begin\n\t" \ ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define instrumentation_begin() __instrumentation_begin(__COUNTER__) @@ -50,7 +50,7 @@ asm volatile(__stringify(c) ": nop\n\t" \ ".pushsection .discard.instr_end\n\t" \ ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define instrumentation_end() __instrumentation_end(__COUNTER__) #else From 1c15b05baea71a5ff98235783e3e4ad227760876 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 21 Dec 2021 12:13:45 +0100 Subject: [PATCH 324/361] bonding: fix ad_actor_system option setting to default When 802.3ad bond mode is configured the ad_actor_system option is set to "00:00:00:00:00:00". But when trying to set the all-zeroes MAC as actors' system address it was failing with EINVAL. An all-zeroes ethernet address is valid, only multicast addresses are not valid values. Fixes: 171a42c38c6e ("bonding: add netlink support for sys prio, actor sys mac, and port key") Signed-off-by: Fernando Fernandez Mancera Acked-by: Jay Vosburgh Link: https://lore.kernel.org/r/20211221111345.2462-1-ffmancera@riseup.net Signed-off-by: Jakub Kicinski --- Documentation/networking/bonding.rst | 11 ++++++----- drivers/net/bonding/bond_options.c | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 31cfd7d674a6c..c0a789b008063 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -196,11 +196,12 @@ ad_actor_sys_prio ad_actor_system In an AD system, this specifies the mac-address for the actor in - protocol packet exchanges (LACPDUs). The value cannot be NULL or - multicast. It is preferred to have the local-admin bit set for this - mac but driver does not enforce it. If the value is not given then - system defaults to using the masters' mac address as actors' system - address. + protocol packet exchanges (LACPDUs). The value cannot be a multicast + address. If the all-zeroes MAC is specified, bonding will internally + use the MAC of the bond itself. It is preferred to have the + local-admin bit set for this mac but driver does not enforce it. If + the value is not given then system defaults to using the masters' + mac address as actors' system address. This parameter has effect only in 802.3ad mode and is available through SysFs interface. diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index a8fde3bc458f6..b93337b5a7211 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1526,7 +1526,7 @@ static int bond_option_ad_actor_system_set(struct bonding *bond, mac = (u8 *)&newval->value; } - if (!is_valid_ether_addr(mac)) + if (is_multicast_ether_addr(mac)) goto err; netdev_dbg(bond->dev, "Setting ad_actor_system to %pM\n", mac); From aacb2016063dfa6da9378d76734cd9dc1e977619 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 13 Dec 2021 11:40:44 +0900 Subject: [PATCH 325/361] parisc: remove ARCH_DEFCONFIG Commit 2a86f6612164 ("kbuild: use KBUILD_DEFCONFIG as the fallback for DEFCONFIG_LIST") removed ARCH_DEFCONFIG because it does not make much sense. In the same development cycle, Commit ededa081ed20 ("parisc: Fix defconfig selection") added ARCH_DEFCONFIG for parisc. Please use KBUILD_DEFCONFIG in arch/*/Makefile for defconfig selection. Signed-off-by: Masahiro Yamada Acked-by: Helge Deller Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b2188da09c732..011dc32fdb4d5 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -85,11 +85,6 @@ config MMU config STACK_GROWSUP def_bool y -config ARCH_DEFCONFIG - string - default "arch/parisc/configs/generic-32bit_defconfig" if !64BIT - default "arch/parisc/configs/generic-64bit_defconfig" if 64BIT - config GENERIC_LOCKBREAK bool default y From db6d6afe382de5a65d6ccf51253ab48b8e8336c3 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 22 Dec 2021 15:12:07 +0800 Subject: [PATCH 326/361] fjes: Check for error irq I find that platform_get_irq() will not always succeed. It will return error irq in case of the failure. Therefore, it might be better to check it if order to avoid the use of error irq. Fixes: 658d439b2292 ("fjes: Introduce FUJITSU Extended Socket Network Device driver") Signed-off-by: Jiasheng Jiang Signed-off-by: David S. Miller --- drivers/net/fjes/fjes_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b06c17ac8d4ee..ebd287039a546 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1262,6 +1262,11 @@ static int fjes_probe(struct platform_device *plat_dev) hw->hw_res.start = res->start; hw->hw_res.size = resource_size(res); hw->hw_res.irq = platform_get_irq(plat_dev, 0); + if (hw->hw_res.irq < 0) { + err = hw->hw_res.irq; + goto err_free_control_wq; + } + err = fjes_hw_init(&adapter->hw); if (err) goto err_free_control_wq; From cb93b3e11d405f20a405a07482d01147ef4934a3 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 22 Dec 2021 15:41:12 +0800 Subject: [PATCH 327/361] drivers: net: smc911x: Check for error irq Because platform_get_irq() could fail and return error irq. Therefore, it might be better to check it if order to avoid the use of error irq. Fixes: ae150435b59e ("smsc: Move the SMC (SMSC) drivers") Signed-off-by: Jiasheng Jiang Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc911x.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 89381f7969855..dd6f69ced4ee3 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -2072,6 +2072,11 @@ static int smc911x_drv_probe(struct platform_device *pdev) ndev->dma = (unsigned char)-1; ndev->irq = platform_get_irq(pdev, 0); + if (ndev->irq < 0) { + ret = ndev->irq; + goto release_both; + } + lp = netdev_priv(ndev); lp->netdev = ndev; #ifdef SMC_DYNAMIC_BUS_CONFIG From 99d7fbb5cedf598f67e8be106d6c7b8d91366aef Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 22 Dec 2021 15:59:44 +0800 Subject: [PATCH 328/361] net: ks8851: Check for error irq Because platform_get_irq() could fail and return error irq. Therefore, it might be better to check it if order to avoid the use of error irq. Fixes: 797047f875b5 ("net: ks8851: Implement Parallel bus operations") Signed-off-by: Jiasheng Jiang Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851_par.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c index 2e25798c610ee..7f49042484bdc 100644 --- a/drivers/net/ethernet/micrel/ks8851_par.c +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -321,6 +321,8 @@ static int ks8851_probe_par(struct platform_device *pdev) return ret; netdev->irq = platform_get_irq(pdev, 0); + if (netdev->irq < 0) + return netdev->irq; return ks8851_probe_common(netdev, dev, msg_enable); } From d7f55471db2719629f773c2d6b5742a69595bfd3 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Fri, 17 Dec 2021 10:07:54 +0800 Subject: [PATCH 329/361] memblock: fix memblock_phys_alloc() section mismatch error Fix modpost Section mismatch error in memblock_phys_alloc() [...] WARNING: modpost: vmlinux.o(.text.unlikely+0x1dcc): Section mismatch in reference from the function memblock_phys_alloc() to the function .init.text:memblock_phys_alloc_range() The function memblock_phys_alloc() references the function __init memblock_phys_alloc_range(). This is often because memblock_phys_alloc lacks a __init annotation or the annotation of memblock_phys_alloc_range is wrong. ERROR: modpost: Section mismatches detected. Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them. [...] memblock_phys_alloc() is a one-line wrapper, make it __always_inline to avoid these section mismatches. Reported-by: k2ci Suggested-by: Mike Rapoport Signed-off-by: Jackie Liu [rppt: slightly massaged changelog ] Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/20211217020754.2874872-1-liu.yun@linux.dev --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8adcf1fa8096f..9dc7cb239d21c 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -405,8 +405,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size, phys_addr_t end, int nid, bool exact_nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, - phys_addr_t align) +static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size, + phys_addr_t align) { return memblock_phys_alloc_range(size, align, 0, MEMBLOCK_ALLOC_ACCESSIBLE); From b6fd77472dea76b7a2bad3a338ade920152972b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 22 Dec 2021 16:53:50 +0200 Subject: [PATCH 330/361] ALSA: hda/hdmi: Disable silent stream on GLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The silent stream stuff recurses back into i915 audio component .get_power() from the .pin_eld_notify() hook. On GLK this will deadlock as i915 may already be holding the relevant modeset locks during .pin_eld_notify() and the GLK audio vs. CDCLK workaround will try to grab the same locks from .get_power(). Until someone comes up with a better fix just disable the silent stream support on GLK. Cc: stable@vger.kernel.org Cc: Harsha Priya Cc: Emmanuel Jillela Cc: Kai Vehmanen Cc: Takashi Iwai Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2623 Fixes: 951894cf30f4 ("ALSA: hda/hdmi: Add Intel silent stream support") Signed-off-by: Ville Syrjälä Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20211222145350.24342-1-ville.syrjala@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 415701bd10ac8..ffcde7409d2a5 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2947,7 +2947,8 @@ static int parse_intel_hdmi(struct hda_codec *codec) /* Intel Haswell and onwards; audio component with eld notifier */ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, - const int *port_map, int port_num, int dev_num) + const int *port_map, int port_num, int dev_num, + bool send_silent_stream) { struct hdmi_spec *spec; int err; @@ -2980,7 +2981,7 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, * Enable silent stream feature, if it is enabled via * module param or Kconfig option */ - if (enable_silent_stream) + if (send_silent_stream) spec->send_silent_stream = true; return parse_intel_hdmi(codec); @@ -2988,12 +2989,18 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, static int patch_i915_hsw_hdmi(struct hda_codec *codec) { - return intel_hsw_common_init(codec, 0x08, NULL, 0, 3); + return intel_hsw_common_init(codec, 0x08, NULL, 0, 3, + enable_silent_stream); } static int patch_i915_glk_hdmi(struct hda_codec *codec) { - return intel_hsw_common_init(codec, 0x0b, NULL, 0, 3); + /* + * Silent stream calls audio component .get_power() from + * .pin_eld_notify(). On GLK this will deadlock in i915 due + * to the audio vs. CDCLK workaround. + */ + return intel_hsw_common_init(codec, 0x0b, NULL, 0, 3, false); } static int patch_i915_icl_hdmi(struct hda_codec *codec) @@ -3004,7 +3011,8 @@ static int patch_i915_icl_hdmi(struct hda_codec *codec) */ static const int map[] = {0x0, 0x4, 0x6, 0x8, 0xa, 0xb}; - return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 3); + return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 3, + enable_silent_stream); } static int patch_i915_tgl_hdmi(struct hda_codec *codec) @@ -3016,7 +3024,8 @@ static int patch_i915_tgl_hdmi(struct hda_codec *codec) static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf}; int ret; - ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 4); + ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 4, + enable_silent_stream); if (!ret) { struct hdmi_spec *spec = codec->spec; From 385f287f9853da402d94278e59f594501c1d1dad Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Tue, 21 Dec 2021 09:08:16 +0800 Subject: [PATCH 331/361] ALSA: hda: intel-sdw-acpi: harden detection of controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing code currently sets a pointer to an ACPI handle before checking that it's actually a SoundWire controller. This can lead to issues where the graph walk continues and eventually fails, but the pointer was set already. This patch changes the logic so that the information provided to the caller is set when a controller is found. Reviewed-by: Péter Ujfalusi Signed-off-by: Libin Yang Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20211221010817.23636-2-yung-chuan.liao@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index c0123bc31c0dd..ba8a872a29010 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -132,8 +132,6 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, return AE_NOT_FOUND; } - info->handle = handle; - /* * On some Intel platforms, multiple children of the HDAS * device can be found, but only one of them is the SoundWire @@ -144,6 +142,9 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE) return AE_OK; /* keep going */ + /* found the correct SoundWire controller */ + info->handle = handle; + /* device found, stop namespace walk */ return AE_CTRL_TERMINATE; } From 78ea40efb48e978756db2ce45fcfa55bac056b91 Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Tue, 21 Dec 2021 09:08:17 +0800 Subject: [PATCH 332/361] ALSA: hda: intel-sdw-acpi: go through HDAS ACPI at max depth of 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the HDAS ACPI scope, the SoundWire may not be the direct child of HDAS. It needs to go through the ACPI table at max depth of 2 to find the SoundWire device from HDAS. Reviewed-by: Péter Ujfalusi Signed-off-by: Libin Yang Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20211221010817.23636-3-yung-chuan.liao@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index ba8a872a29010..b7758dbe23714 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -165,8 +165,14 @@ int sdw_intel_acpi_scan(acpi_handle *parent_handle, acpi_status status; info->handle = NULL; + /* + * In the HDAS ACPI scope, 'SNDW' may be either the child of + * 'HDAS' or the grandchild of 'HDAS'. So let's go through + * the ACPI from 'HDAS' at max depth of 2 to find the 'SNDW' + * device. + */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, - parent_handle, 1, + parent_handle, 2, sdw_intel_acpi_cb, NULL, info, NULL); if (ACPI_FAILURE(status) || info->handle == NULL) From 39a8fc4971a00d22536aeb7d446ee4a97810611b Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Sat, 18 Dec 2021 13:39:25 +0100 Subject: [PATCH 333/361] ALSA: rawmidi - fix the uninitalized user_pversion The user_pversion was uninitialized for the user space file structure in the open function, because the file private structure use kmalloc for the allocation. The kernel ALSA sequencer code clears the file structure, so no additional fixes are required. Cc: stable@kernel.org Cc: broonie@kernel.org BugLink: https://github.com/alsa-project/alsa-lib/issues/178 Fixes: 09d23174402d ("ALSA: rawmidi: introduce SNDRV_RAWMIDI_IOCTL_USER_PVERSION") Reported-by: syzbot+88412ee8811832b00dbe@syzkaller.appspotmail.com Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20211218123925.2583847-1-perex@perex.cz Signed-off-by: Takashi Iwai --- sound/core/rawmidi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 6f30231bdb884..befa9809ff001 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -447,6 +447,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file) err = -ENOMEM; goto __error; } + rawmidi_file->user_pversion = 0; init_waitqueue_entry(&wait, current); add_wait_queue(&rmidi->open_wait, &wait); while (1) { From edca7cc4b0accfa69dc032442fe0684e59c691b8 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Wed, 15 Dec 2021 20:16:46 +0100 Subject: [PATCH 334/361] ALSA: hda/realtek: Fix quirk for Clevo NJ51CU The Clevo NJ51CU comes either with the ALC293 or the ALC256 codec, but uses the 0x8686 subproduct id in both cases. The ALC256 codec needs a different quirk for the headset microphone working and and edditional quirk for sound working after suspend and resume. When waking up from s3 suspend the Coef 0x10 is set to 0x0220 instead of 0x0020 on the ALC256 codec. Setting the value manually makes the sound work again. This patch does this automatically. [ minor coding style fix by tiwai ] Signed-off-by: Werner Sembach Fixes: b5acfe152abaa ("ALSA: hda/realtek: Add some Clove SSID in the ALC293(ALC1220)") Cc: Link: https://lore.kernel.org/r/20211215191646.844644-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e59ff75eea751..28255e752c4a1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6546,6 +6546,23 @@ static void alc233_fixup_no_audio_jack(struct hda_codec *codec, alc_process_coef_fw(codec, alc233_fixup_no_audio_jack_coefs); } +static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* + * The Clevo NJ51CU comes either with the ALC293 or the ALC256 codec, + * but uses the 0x8686 subproduct id in both cases. The ALC256 codec + * needs an additional quirk for sound working after suspend and resume. + */ + if (codec->core.vendor_id == 0x10ec0256) { + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + snd_hda_codec_set_pincfg(codec, 0x19, 0x04a11120); + } else { + snd_hda_codec_set_pincfg(codec, 0x1a, 0x04a1113c); + } +} + enum { ALC269_FIXUP_GPIO2, ALC269_FIXUP_SONY_VAIO, @@ -6766,6 +6783,7 @@ enum { ALC256_FIXUP_SET_COEF_DEFAULTS, ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, ALC233_FIXUP_NO_AUDIO_JACK, + ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME, }; static const struct hda_fixup alc269_fixups[] = { @@ -8490,6 +8508,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc233_fixup_no_audio_jack, }, + [ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc256_fixup_mic_no_presence_and_resume, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8831,7 +8855,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[57][0-9]RZ[Q]", ALC269_FIXUP_DMIC), SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME), SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), From bdf1b5c3884f6a0dc91b0dbdb8c3b7d205f449e0 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Mon, 20 Dec 2021 21:56:03 +0800 Subject: [PATCH 335/361] sfc: Check null pointer of rx_queue->page_ring Because of the possible failure of the kcalloc, it should be better to set rx_queue->page_ptr_mask to 0 when it happens in order to maintain the consistency. Fixes: 5a6681e22c14 ("sfc: separate out SFC4000 ("Falcon") support into new sfc-falcon driver") Signed-off-by: Jiasheng Jiang Acked-by: Martin Habets Link: https://lore.kernel.org/r/20211220135603.954944-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/rx_common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 68fc7d317693b..0983abc0cc5f0 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -150,7 +150,10 @@ static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue) efx->rx_bufs_per_page); rx_queue->page_ring = kcalloc(page_ring_size, sizeof(*rx_queue->page_ring), GFP_KERNEL); - rx_queue->page_ptr_mask = page_ring_size - 1; + if (!rx_queue->page_ring) + rx_queue->page_ptr_mask = 0; + else + rx_queue->page_ptr_mask = page_ring_size - 1; } static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) From 9b8bdd1eb5890aeeab7391dddcf8bd51f7b07216 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Mon, 20 Dec 2021 22:03:44 +0800 Subject: [PATCH 336/361] sfc: falcon: Check null pointer of rx_queue->page_ring Because of the possible failure of the kcalloc, it should be better to set rx_queue->page_ptr_mask to 0 when it happens in order to maintain the consistency. Fixes: 5a6681e22c14 ("sfc: separate out SFC4000 ("Falcon") support into new sfc-falcon driver") Signed-off-by: Jiasheng Jiang Acked-by: Martin Habets Link: https://lore.kernel.org/r/20211220140344.978408-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/falcon/rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c index 966f13e7475dd..11a6aee852e92 100644 --- a/drivers/net/ethernet/sfc/falcon/rx.c +++ b/drivers/net/ethernet/sfc/falcon/rx.c @@ -728,7 +728,10 @@ static void ef4_init_rx_recycle_ring(struct ef4_nic *efx, efx->rx_bufs_per_page); rx_queue->page_ring = kcalloc(page_ring_size, sizeof(*rx_queue->page_ring), GFP_KERNEL); - rx_queue->page_ptr_mask = page_ring_size - 1; + if (!rx_queue->page_ring) + rx_queue->page_ptr_mask = 0; + else + rx_queue->page_ptr_mask = page_ring_size - 1; } void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue) From 8035b1a2a37a29d8c717ef84fca8fe7278bc9f03 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 21 Dec 2021 23:10:36 +0300 Subject: [PATCH 337/361] asix: fix uninit-value in asix_mdio_read() asix_read_cmd() may read less than sizeof(smsr) bytes and in this case smsr will be uninitialized. Fail log: BUG: KMSAN: uninit-value in asix_check_host_enable drivers/net/usb/asix_common.c:82 [inline] BUG: KMSAN: uninit-value in asix_check_host_enable drivers/net/usb/asix_common.c:82 [inline] drivers/net/usb/asix_common.c:497 BUG: KMSAN: uninit-value in asix_mdio_read+0x3c1/0xb00 drivers/net/usb/asix_common.c:497 drivers/net/usb/asix_common.c:497 asix_check_host_enable drivers/net/usb/asix_common.c:82 [inline] asix_check_host_enable drivers/net/usb/asix_common.c:82 [inline] drivers/net/usb/asix_common.c:497 asix_mdio_read+0x3c1/0xb00 drivers/net/usb/asix_common.c:497 drivers/net/usb/asix_common.c:497 Fixes: d9fe64e51114 ("net: asix: Add in_pm parameter") Reported-and-tested-by: syzbot+f44badb06036334e867a@syzkaller.appspotmail.com Reviewed-by: Andrew Lunn Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/8966e3b514edf39857dd93603fc79ec02e000a75.1640117288.git.paskripkin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 42ba4af680907..06823d7141b6d 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -77,7 +77,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm) 0, 0, 1, &smsr, in_pm); if (ret == -ENODEV) break; - else if (ret < 0) + else if (ret < sizeof(smsr)) continue; else if (smsr & AX_HOST_EN) break; From d1652b70d07cc3eed96210c876c4879e1655f20e Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 21 Dec 2021 23:10:43 +0300 Subject: [PATCH 338/361] asix: fix wrong return value in asix_check_host_enable() If asix_read_cmd() returns 0 on 30th interation, 0 will be returned from asix_check_host_enable(), which is logically wrong. Fix it by returning -ETIMEDOUT explicitly if we have exceeded 30 iterations Also, replaced 30 with #define as suggested by Andrew Fixes: a786e3195d6a ("net: asix: fix uninit value bugs") Reported-by: Andrew Lunn Signed-off-by: Pavel Skripkin Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/ecd3470ce6c2d5697ac635d0d3b14a47defb4acb.1640117288.git.paskripkin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 06823d7141b6d..71682970be584 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -9,6 +9,8 @@ #include "asix.h" +#define AX_HOST_EN_RETRIES 30 + int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data, int in_pm) { @@ -68,7 +70,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm) int i, ret; u8 smsr; - for (i = 0; i < 30; ++i) { + for (i = 0; i < AX_HOST_EN_RETRIES; ++i) { ret = asix_set_sw_mii(dev, in_pm); if (ret == -ENODEV || ret == -ETIMEDOUT) break; @@ -83,7 +85,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm) break; } - return ret; + return i >= AX_HOST_EN_RETRIES ? -ETIMEDOUT : ret; } static void reset_asix_rx_fixup_info(struct asix_rx_fixup_info *rx) From 7b9762a5e8837b92a027d58d396a9d27f6440c36 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 22 Dec 2021 20:26:56 -0700 Subject: [PATCH 339/361] io_uring: zero iocb->ki_pos for stream file types io_uring supports using offset == -1 for using the current file position, and we read that in as part of read/write command setup. For the non-iter read/write types we pass in NULL for the position pointer, but for the iter types we should not be passing any anything but 0 for the position for a stream. Clear kiocb->ki_pos if the file is a stream, don't leave it as -1. If we do, then the request will error with -ESPIPE. Fixes: ba04291eb66e ("io_uring: allow use of offset == -1 to mean file position") Link: https://github.com/axboe/liburing/discussions/501 Reported-by: Samuel Williams Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d5ab0e9a3f291..fb2a0cb4aaf83 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2891,9 +2891,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; kiocb->ki_pos = READ_ONCE(sqe->off); - if (kiocb->ki_pos == -1 && !(file->f_mode & FMODE_STREAM)) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = file->f_pos; + if (kiocb->ki_pos == -1) { + if (!(file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = file->f_pos; + } else { + kiocb->ki_pos = 0; + } } kiocb->ki_flags = iocb_flags(file); ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); From c4499272566d677075c6a84f46baeb826a6a7182 Mon Sep 17 00:00:00 2001 From: Tim Crawford Date: Wed, 22 Dec 2021 11:51:54 -0700 Subject: [PATCH 340/361] platform/x86: system76_acpi: Guard System76 EC specific functionality Certain functionality or its implementation in System76 EC firmware may be different to the proprietary ODM EC firmware. Introduce a new bool, `has_open_ec`, to guard our specific logic. Detect the use of this by looking for a custom ACPI method name used in System76 firmware. Signed-off-by: Tim Crawford Link: https://lore.kernel.org/r/20211222185154.4560-1-tcrawford@system76.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/system76_acpi.c | 58 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/drivers/platform/x86/system76_acpi.c b/drivers/platform/x86/system76_acpi.c index 8b292ee95a144..7299ad08c8387 100644 --- a/drivers/platform/x86/system76_acpi.c +++ b/drivers/platform/x86/system76_acpi.c @@ -35,6 +35,7 @@ struct system76_data { union acpi_object *nfan; union acpi_object *ntmp; struct input_dev *input; + bool has_open_ec; }; static const struct acpi_device_id device_ids[] = { @@ -279,20 +280,12 @@ static struct acpi_battery_hook system76_battery_hook = { static void system76_battery_init(void) { - acpi_handle handle; - - handle = ec_get_handle(); - if (handle && acpi_has_method(handle, "GBCT")) - battery_hook_register(&system76_battery_hook); + battery_hook_register(&system76_battery_hook); } static void system76_battery_exit(void) { - acpi_handle handle; - - handle = ec_get_handle(); - if (handle && acpi_has_method(handle, "GBCT")) - battery_hook_unregister(&system76_battery_hook); + battery_hook_unregister(&system76_battery_hook); } // Get the airplane mode LED brightness @@ -673,6 +666,10 @@ static int system76_add(struct acpi_device *acpi_dev) acpi_dev->driver_data = data; data->acpi_dev = acpi_dev; + // Some models do not run open EC firmware. Check for an ACPI method + // that only exists on open EC to guard functionality specific to it. + data->has_open_ec = acpi_has_method(acpi_device_handle(data->acpi_dev), "NFAN"); + err = system76_get(data, "INIT"); if (err) return err; @@ -718,27 +715,31 @@ static int system76_add(struct acpi_device *acpi_dev) if (err) goto error; - err = system76_get_object(data, "NFAN", &data->nfan); - if (err) - goto error; + if (data->has_open_ec) { + err = system76_get_object(data, "NFAN", &data->nfan); + if (err) + goto error; - err = system76_get_object(data, "NTMP", &data->ntmp); - if (err) - goto error; + err = system76_get_object(data, "NTMP", &data->ntmp); + if (err) + goto error; - data->therm = devm_hwmon_device_register_with_info(&acpi_dev->dev, - "system76_acpi", data, &thermal_chip_info, NULL); - err = PTR_ERR_OR_ZERO(data->therm); - if (err) - goto error; + data->therm = devm_hwmon_device_register_with_info(&acpi_dev->dev, + "system76_acpi", data, &thermal_chip_info, NULL); + err = PTR_ERR_OR_ZERO(data->therm); + if (err) + goto error; - system76_battery_init(); + system76_battery_init(); + } return 0; error: - kfree(data->ntmp); - kfree(data->nfan); + if (data->has_open_ec) { + kfree(data->ntmp); + kfree(data->nfan); + } return err; } @@ -749,14 +750,15 @@ static int system76_remove(struct acpi_device *acpi_dev) data = acpi_driver_data(acpi_dev); - system76_battery_exit(); + if (data->has_open_ec) { + system76_battery_exit(); + kfree(data->nfan); + kfree(data->ntmp); + } devm_led_classdev_unregister(&acpi_dev->dev, &data->ap_led); devm_led_classdev_unregister(&acpi_dev->dev, &data->kb_led); - kfree(data->nfan); - kfree(data->ntmp); - system76_get(data, "FINI"); return 0; From 4f6c131c3c31b9f68470ebd01320d5403d8719bb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Dec 2021 21:49:41 +0200 Subject: [PATCH 341/361] platform/x86/intel: Remove X86_PLATFORM_DRIVERS_INTEL While introduction of this menu brings a nice view in the configuration tools, it brought more issues than solves, i.e. it prevents to locate files in the intel/ subfolder without touching non-related Kconfig dependencies elsewhere. Drop X86_PLATFORM_DRIVERS_INTEL altogether. Note, on x86 it's enabled by default and it's quite unlikely anybody wants to disable all of the modules in this submenu. Fixes: 8bd836feb6ca ("platform/x86: intel_skl_int3472: Move to intel/ subfolder") Suggested-by: Hans de Goede Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211222194941.76054-1-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/Makefile | 2 +- drivers/platform/x86/intel/Kconfig | 15 --------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 2194780616839..253a096b5dd8c 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -68,7 +68,7 @@ obj-$(CONFIG_THINKPAD_ACPI) += thinkpad_acpi.o obj-$(CONFIG_THINKPAD_LMI) += think-lmi.o # Intel -obj-$(CONFIG_X86_PLATFORM_DRIVERS_INTEL) += intel/ +obj-y += intel/ # MSI obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o diff --git a/drivers/platform/x86/intel/Kconfig b/drivers/platform/x86/intel/Kconfig index 38ce3e3445892..40096b25994af 100644 --- a/drivers/platform/x86/intel/Kconfig +++ b/drivers/platform/x86/intel/Kconfig @@ -3,19 +3,6 @@ # Intel x86 Platform Specific Drivers # -menuconfig X86_PLATFORM_DRIVERS_INTEL - bool "Intel x86 Platform Specific Device Drivers" - default y - help - Say Y here to get to see options for device drivers for - various Intel x86 platforms, including vendor-specific - drivers. This option alone does not add any kernel code. - - If you say N, all options in this submenu will be skipped - and disabled. - -if X86_PLATFORM_DRIVERS_INTEL - source "drivers/platform/x86/intel/atomisp2/Kconfig" source "drivers/platform/x86/intel/int1092/Kconfig" source "drivers/platform/x86/intel/int33fe/Kconfig" @@ -183,5 +170,3 @@ config INTEL_UNCORE_FREQ_CONTROL To compile this driver as a module, choose M here: the module will be called intel-uncore-frequency. - -endif # X86_PLATFORM_DRIVERS_INTEL From 9695b7de5b4760ed22132aca919570c0190cb0ce Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 22 Dec 2021 19:39:52 +0100 Subject: [PATCH 342/361] veth: ensure skb entering GRO are not cloned. After commit d3256efd8e8b ("veth: allow enabling NAPI even without XDP"), if GRO is enabled on a veth device and TSO is disabled on the peer device, TCP skbs will go through the NAPI callback. If there is no XDP program attached, the veth code does not perform any share check, and shared/cloned skbs could enter the GRO engine. Ignat reported a BUG triggered later-on due to the above condition: [ 53.970529][ C1] kernel BUG at net/core/skbuff.c:3574! [ 53.981755][ C1] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI [ 53.982634][ C1] CPU: 1 PID: 19 Comm: ksoftirqd/1 Not tainted 5.16.0-rc5+ #25 [ 53.982634][ C1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [ 53.982634][ C1] RIP: 0010:skb_shift+0x13ef/0x23b0 [ 53.982634][ C1] Code: ea 03 0f b6 04 02 48 89 fa 83 e2 07 38 d0 7f 08 84 c0 0f 85 41 0c 00 00 41 80 7f 02 00 4d 8d b5 d0 00 00 00 0f 85 74 f5 ff ff <0f> 0b 4d 8d 77 20 be 04 00 00 00 4c 89 44 24 78 4c 89 f7 4c 89 8c [ 53.982634][ C1] RSP: 0018:ffff8881008f7008 EFLAGS: 00010246 [ 53.982634][ C1] RAX: 0000000000000000 RBX: ffff8881180b4c80 RCX: 0000000000000000 [ 53.982634][ C1] RDX: 0000000000000002 RSI: ffff8881180b4d3c RDI: ffff88810bc9cac2 [ 53.982634][ C1] RBP: ffff8881008f70b8 R08: ffff8881180b4cf4 R09: ffff8881180b4cf0 [ 53.982634][ C1] R10: ffffed1022999e5c R11: 0000000000000002 R12: 0000000000000590 [ 53.982634][ C1] R13: ffff88810f940c80 R14: ffff88810f940d50 R15: ffff88810bc9cac0 [ 53.982634][ C1] FS: 0000000000000000(0000) GS:ffff888235880000(0000) knlGS:0000000000000000 [ 53.982634][ C1] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.982634][ C1] CR2: 00007ff5f9b86680 CR3: 0000000108ce8004 CR4: 0000000000170ee0 [ 53.982634][ C1] Call Trace: [ 53.982634][ C1] [ 53.982634][ C1] tcp_sacktag_walk+0xaba/0x18e0 [ 53.982634][ C1] tcp_sacktag_write_queue+0xe7b/0x3460 [ 53.982634][ C1] tcp_ack+0x2666/0x54b0 [ 53.982634][ C1] tcp_rcv_established+0x4d9/0x20f0 [ 53.982634][ C1] tcp_v4_do_rcv+0x551/0x810 [ 53.982634][ C1] tcp_v4_rcv+0x22ed/0x2ed0 [ 53.982634][ C1] ip_protocol_deliver_rcu+0x96/0xaf0 [ 53.982634][ C1] ip_local_deliver_finish+0x1e0/0x2f0 [ 53.982634][ C1] ip_sublist_rcv_finish+0x211/0x440 [ 53.982634][ C1] ip_list_rcv_finish.constprop.0+0x424/0x660 [ 53.982634][ C1] ip_list_rcv+0x2c8/0x410 [ 53.982634][ C1] __netif_receive_skb_list_core+0x65c/0x910 [ 53.982634][ C1] netif_receive_skb_list_internal+0x5f9/0xcb0 [ 53.982634][ C1] napi_complete_done+0x188/0x6e0 [ 53.982634][ C1] gro_cell_poll+0x10c/0x1d0 [ 53.982634][ C1] __napi_poll+0xa1/0x530 [ 53.982634][ C1] net_rx_action+0x567/0x1270 [ 53.982634][ C1] __do_softirq+0x28a/0x9ba [ 53.982634][ C1] run_ksoftirqd+0x32/0x60 [ 53.982634][ C1] smpboot_thread_fn+0x559/0x8c0 [ 53.982634][ C1] kthread+0x3b9/0x490 [ 53.982634][ C1] ret_from_fork+0x22/0x30 [ 53.982634][ C1] Address the issue by skipping the GRO stage for shared or cloned skbs. To reduce the chance of OoO, try to unclone the skbs before giving up. v1 -> v2: - use avoid skb_copy and fallback to netif_receive_skb - Eric Reported-by: Ignat Korchagin Fixes: d3256efd8e8b ("veth: allow enabling NAPI even without XDP") Signed-off-by: Paolo Abeni Tested-by: Ignat Korchagin Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/b5f61c5602aab01bac8d711d8d1bfab0a4817db7.1640197544.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 50eb43e5bf459..2acdb8ad6c713 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -879,8 +879,12 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, stats->xdp_bytes += skb->len; skb = veth_xdp_rcv_skb(rq, skb, bq, stats); - if (skb) - napi_gro_receive(&rq->xdp_napi, skb); + if (skb) { + if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC)) + netif_receive_skb(skb); + else + napi_gro_receive(&rq->xdp_napi, skb); + } } done++; } From ae2778a64724f77fd6cad674461a045fb3307df7 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 23 Dec 2021 15:22:11 +0800 Subject: [PATCH 343/361] net: dsa: tag_ocelot: use traffic class to map priority on injected header For Ocelot switches, the CPU injected frames have an injection header where it can specify the QoS class of the packet and the DSA tag, now it uses the SKB priority to set that. If a traffic class to priority mapping is configured on the netdevice (with mqprio for example ...), it won't be considered for CPU injected headers. This patch make the QoS class aligned to the priority to traffic class mapping if it exists. Fixes: 8dce89aa5f32 ("net: dsa: ocelot: add tagger for Ocelot/Felix switches") Signed-off-by: Xiaoliang Yang Signed-off-by: Marouen Ghodhbane Link: https://lore.kernel.org/r/20211223072211.33130-1-xiaoliang.yang_1@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_ocelot.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index de1c849a0a705..4ed74d509d6ac 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -47,9 +47,13 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, void *injection; __be32 *prefix; u32 rew_op = 0; + u64 qos_class; ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type); + qos_class = netdev_get_num_tc(netdev) ? + netdev_get_prio_tc_map(netdev, skb->priority) : skb->priority; + injection = skb_push(skb, OCELOT_TAG_LEN); prefix = skb_push(skb, OCELOT_SHORT_PREFIX_LEN); @@ -57,7 +61,7 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, memset(injection, 0, OCELOT_TAG_LEN); ocelot_ifh_set_bypass(injection, 1); ocelot_ifh_set_src(injection, ds->num_ports); - ocelot_ifh_set_qos_class(injection, skb->priority); + ocelot_ifh_set_qos_class(injection, qos_class); ocelot_ifh_set_vlan_tci(injection, vlan_tci); ocelot_ifh_set_tag_type(injection, tag_type); From eccffcf4657ab9a148faaa0eb354d2a091caf552 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 23 Dec 2021 15:39:28 +0800 Subject: [PATCH 344/361] net: stmmac: ptp: fix potentially overflowing expression Convert the u32 variable to type u64 in a context where expression of type u64 is required to avoid potential overflow. Fixes: e9e3720002f6 ("net: stmmac: ptp: update tas basetime after ptp adjust") Signed-off-by: Xiaoliang Yang Link: https://lore.kernel.org/r/20211223073928.37371-1-xiaoliang.yang_1@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 580cc035536bd..be9b58b2abf9b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -102,7 +102,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) time.tv_nsec = priv->plat->est->btr_reserve[0]; time.tv_sec = priv->plat->est->btr_reserve[1]; basetime = timespec64_to_ktime(time); - cycle_time = priv->plat->est->ctr[1] * NSEC_PER_SEC + + cycle_time = (u64)priv->plat->est->ctr[1] * NSEC_PER_SEC + priv->plat->est->ctr[0]; time = stmmac_calc_tas_basetime(basetime, current_time_ns, From d95a56207c078e2019cf6659d890ec1e987e8420 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Thu, 23 Dec 2021 16:31:38 +0100 Subject: [PATCH 345/361] net: bridge: fix ioctl old_deviceless bridge argument Commit 561d8352818f ("bridge: use ndo_siocdevprivate") changed the source and destination arguments of copy_{to,from}_user in bridge's old_deviceless() from args[1] to uarg breaking SIOC{G,S}IFBR ioctls. Commit cbd7ad29a507 ("net: bridge: fix ioctl old_deviceless bridge argument") fixed only BRCTL_{ADD,DEL}_BRIDGES commands leaving BRCTL_GET_BRIDGES one untouched. The fixes BRCTL_GET_BRIDGES as well and has been tested with busybox's brctl. Example of broken brctl: $ brctl show bridge name bridge id STP enabled interfaces brctl: can't get bridge name for index 0: No such device or address Example of fixed brctl: $ brctl show bridge name bridge id STP enabled interfaces br0 8000.000000000000 no Fixes: 561d8352818f ("bridge: use ndo_siocdevprivate") Signed-off-by: Remi Pommarel Reviewed-by: Arnd Bergmann Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/all/20211223153139.7661-2-repk@triplefau.lt/ Signed-off-by: Jakub Kicinski --- net/bridge/br_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index db4ab2c2ce18b..891cfcf45644b 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -337,7 +337,7 @@ static int old_deviceless(struct net *net, void __user *uarg) args[2] = get_bridge_ifindices(net, indices, args[2]); - ret = copy_to_user(uarg, indices, + ret = copy_to_user((void __user *)args[1], indices, array_size(args[2], sizeof(int))) ? -EFAULT : args[2]; From 45bf944e6703d43fe5e285808312acd8a34c1a24 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Thu, 23 Dec 2021 17:27:01 +0800 Subject: [PATCH 346/361] r8152: fix the force speed doesn't work for RTL8156 It needs to set mdio force mode. Otherwise, link off always occurs when setting force speed. Fixes: 195aae321c82 ("r8152: support new chips") Signed-off-by: Hayes Wang Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f9877a3e83acf..a817dfd5c9eb6 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6584,6 +6584,21 @@ static bool rtl8153_in_nway(struct r8152 *tp) return true; } +static void r8156_mdio_force_mode(struct r8152 *tp) +{ + u16 data; + + /* Select force mode through 0xa5b4 bit 15 + * 0: MDIO force mode + * 1: MMD force mode + */ + data = ocp_reg_read(tp, 0xa5b4); + if (data & BIT(15)) { + data &= ~BIT(15); + ocp_reg_write(tp, 0xa5b4, data); + } +} + static void set_carrier(struct r8152 *tp) { struct net_device *netdev = tp->netdev; @@ -8016,6 +8031,7 @@ static void r8156_init(struct r8152 *tp) ocp_data |= ACT_ODMA; ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ocp_data); + r8156_mdio_force_mode(tp); rtl_tally_reset(tp); tp->coalesce = 15000; /* 15 us */ @@ -8145,6 +8161,7 @@ static void r8156b_init(struct r8152 *tp) ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); + r8156_mdio_force_mode(tp); rtl_tally_reset(tp); tp->coalesce = 15000; /* 15 us */ From b24edca309535c2d9af86aab95d64065f6ef1d26 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Thu, 23 Dec 2021 17:27:02 +0800 Subject: [PATCH 347/361] r8152: sync ocp base There are some chances that the actual base of hardware is different from the value recorded by driver, so we have to reset the variable of ocp_base to sync it. Set ocp_base to -1. Then, it would be updated and the new base would be set to the hardware next time. Signed-off-by: Hayes Wang Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index a817dfd5c9eb6..3085e8118d7fa 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "12" /* Information for net */ -#define NET_VERSION "11" +#define NET_VERSION "12" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -4016,6 +4016,11 @@ static void rtl_clear_bp(struct r8152 *tp, u16 type) ocp_write_word(tp, type, PLA_BP_BA, 0); } +static inline void rtl_reset_ocp_base(struct r8152 *tp) +{ + tp->ocp_base = -1; +} + static int rtl_phy_patch_request(struct r8152 *tp, bool request, bool wait) { u16 data, check; @@ -4087,8 +4092,6 @@ static int rtl_post_ram_code(struct r8152 *tp, u16 key_addr, bool wait) rtl_phy_patch_request(tp, false, wait); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, tp->ocp_base); - return 0; } @@ -4800,6 +4803,8 @@ static void rtl_ram_code_speed_up(struct r8152 *tp, struct fw_phy_speed_up *phy, u32 len; u8 *data; + rtl_reset_ocp_base(tp); + if (sram_read(tp, SRAM_GPHY_FW_VER) >= __le16_to_cpu(phy->version)) { dev_dbg(&tp->intf->dev, "PHY firmware has been the newest\n"); return; @@ -4845,7 +4850,8 @@ static void rtl_ram_code_speed_up(struct r8152 *tp, struct fw_phy_speed_up *phy, } } - ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, tp->ocp_base); + rtl_reset_ocp_base(tp); + rtl_phy_patch_request(tp, false, wait); if (sram_read(tp, SRAM_GPHY_FW_VER) == __le16_to_cpu(phy->version)) @@ -4861,6 +4867,8 @@ static int rtl8152_fw_phy_ver(struct r8152 *tp, struct fw_phy_ver *phy_ver) ver_addr = __le16_to_cpu(phy_ver->ver.addr); ver = __le16_to_cpu(phy_ver->ver.data); + rtl_reset_ocp_base(tp); + if (sram_read(tp, ver_addr) >= ver) { dev_dbg(&tp->intf->dev, "PHY firmware has been the newest\n"); return 0; @@ -4877,6 +4885,8 @@ static void rtl8152_fw_phy_fixup(struct r8152 *tp, struct fw_phy_fixup *fix) { u16 addr, data; + rtl_reset_ocp_base(tp); + addr = __le16_to_cpu(fix->setting.addr); data = ocp_reg_read(tp, addr); @@ -4908,6 +4918,8 @@ static void rtl8152_fw_phy_union_apply(struct r8152 *tp, struct fw_phy_union *ph u32 length; int i, num; + rtl_reset_ocp_base(tp); + num = phy->pre_num; for (i = 0; i < num; i++) sram_write(tp, __le16_to_cpu(phy->pre_set[i].addr), @@ -4938,6 +4950,8 @@ static void rtl8152_fw_phy_nc_apply(struct r8152 *tp, struct fw_phy_nc *phy) u32 length, i, num; __le16 *data; + rtl_reset_ocp_base(tp); + mode_reg = __le16_to_cpu(phy->mode_reg); sram_write(tp, mode_reg, __le16_to_cpu(phy->mode_pre)); sram_write(tp, __le16_to_cpu(phy->ba_reg), @@ -5107,6 +5121,7 @@ static void rtl8152_apply_firmware(struct r8152 *tp, bool power_cut) if (rtl_fw->post_fw) rtl_fw->post_fw(tp); + rtl_reset_ocp_base(tp); strscpy(rtl_fw->version, fw_hdr->version, RTL_VER_SIZE); dev_info(&tp->intf->dev, "load %s successfully\n", rtl_fw->version); } @@ -8484,6 +8499,8 @@ static int rtl8152_resume(struct usb_interface *intf) mutex_lock(&tp->control); + rtl_reset_ocp_base(tp); + if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) ret = rtl8152_runtime_resume(tp); else @@ -8499,6 +8516,7 @@ static int rtl8152_reset_resume(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); clear_bit(SELECTIVE_SUSPEND, &tp->flags); + rtl_reset_ocp_base(tp); tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); set_ethernet_addr(tp, true); From 391e5975c0208ce3739587b33eba08be3e473d79 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Thu, 23 Dec 2021 16:36:33 +0900 Subject: [PATCH 348/361] net: stmmac: dwmac-visconti: Fix value of ETHER_CLK_SEL_FREQ_SEL_2P5M ETHER_CLK_SEL_FREQ_SEL_2P5M is not 0 bit of the register. This is a value, which is 0. Fix from BIT(0) to 0. Reported-by: Yuji Ishikawa Fixes: b38dd98ff8d0 ("net: stmmac: Add Toshiba Visconti SoCs glue driver") Signed-off-by: Nobuhiro Iwamatsu Link: https://lore.kernel.org/r/20211223073633.101306-1-nobuhiro1.iwamatsu@toshiba.co.jp Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index 66fc8be34bb71..e2e0f977875d7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -26,7 +26,7 @@ #define ETHER_CLK_SEL_FREQ_SEL_125M (BIT(9) | BIT(8)) #define ETHER_CLK_SEL_FREQ_SEL_50M BIT(9) #define ETHER_CLK_SEL_FREQ_SEL_25M BIT(8) -#define ETHER_CLK_SEL_FREQ_SEL_2P5M BIT(0) +#define ETHER_CLK_SEL_FREQ_SEL_2P5M 0 #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN BIT(0) #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC BIT(10) #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV BIT(11) From 26a8b09437804fabfb1db080d676b96c0de68e7c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Dec 2021 11:50:23 +0100 Subject: [PATCH 349/361] platform/x86: intel_pmc_core: fix memleak on registration failure In case device registration fails during module initialisation, the platform device structure needs to be freed using platform_device_put() to properly free all resources (e.g. the device name). Fixes: 938835aa903a ("platform/x86: intel_pmc_core: do not create a static struct device") Cc: stable@vger.kernel.org # 5.9 Signed-off-by: Johan Hovold Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211222105023.6205-1-johan@kernel.org Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmc/pltdrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmc/pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c index 73797680b895c..15ca8afdd973d 100644 --- a/drivers/platform/x86/intel/pmc/pltdrv.c +++ b/drivers/platform/x86/intel/pmc/pltdrv.c @@ -65,7 +65,7 @@ static int __init pmc_core_platform_init(void) retval = platform_device_register(pmc_core_device); if (retval) - kfree(pmc_core_device); + platform_device_put(pmc_core_device); return retval; } From 0129ab1f268b6cf88825eae819b9b84aa0a85634 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 24 Dec 2021 21:12:32 -0800 Subject: [PATCH 350/361] kfence: fix memory leak when cat kfence objects Hulk robot reported a kmemleak problem: unreferenced object 0xffff93d1d8cc02e8 (size 248): comm "cat", pid 23327, jiffies 4624670141 (age 495992.217s) hex dump (first 32 bytes): 00 40 85 19 d4 93 ff ff 00 10 00 00 00 00 00 00 .@.............. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: seq_open+0x2a/0x80 full_proxy_open+0x167/0x1e0 do_dentry_open+0x1e1/0x3a0 path_openat+0x961/0xa20 do_filp_open+0xae/0x120 do_sys_openat2+0x216/0x2f0 do_sys_open+0x57/0x80 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 unreferenced object 0xffff93d419854000 (size 4096): comm "cat", pid 23327, jiffies 4624670141 (age 495992.217s) hex dump (first 32 bytes): 6b 66 65 6e 63 65 2d 23 32 35 30 3a 20 30 78 30 kfence-#250: 0x0 30 30 30 30 30 30 30 37 35 34 62 64 61 31 32 2d 0000000754bda12- backtrace: seq_read_iter+0x313/0x440 seq_read+0x14b/0x1a0 full_proxy_read+0x56/0x80 vfs_read+0xa5/0x1b0 ksys_read+0xa0/0xf0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 I find that we can easily reproduce this problem with the following commands: cat /sys/kernel/debug/kfence/objects echo scan > /sys/kernel/debug/kmemleak cat /sys/kernel/debug/kmemleak The leaked memory is allocated in the stack below: do_syscall_64 do_sys_open do_dentry_open full_proxy_open seq_open ---> alloc seq_file vfs_read full_proxy_read seq_read seq_read_iter traverse ---> alloc seq_buf And it should have been released in the following process: do_syscall_64 syscall_exit_to_user_mode exit_to_user_mode_prepare task_work_run ____fput __fput full_proxy_release ---> free here However, the release function corresponding to file_operations is not implemented in kfence. As a result, a memory leak occurs. Therefore, the solution to this problem is to implement the corresponding release function. Link: https://lkml.kernel.org/r/20211206133628.2822545-1-libaokun1@huawei.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Baokun Li Reported-by: Hulk Robot Acked-by: Marco Elver Reviewed-by: Kefeng Wang Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Yu Kuai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 09945784df9e6..a19154a8d1964 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -683,6 +683,7 @@ static const struct file_operations objects_fops = { .open = open_objects, .read = seq_read, .llseek = seq_lseek, + .release = seq_release, }; static int __init kfence_debugfs_init(void) From 338635340669d5b317c7e8dcf4fff4a0f3651d87 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 24 Dec 2021 21:12:35 -0800 Subject: [PATCH 351/361] mm: mempolicy: fix THP allocations escaping mempolicy restrictions alloc_pages_vma() may try to allocate THP page on the local NUMA node first: page = __alloc_pages_node(hpage_node, gfp | __GFP_THISNODE | __GFP_NORETRY, order); And if the allocation fails it retries allowing remote memory: if (!page && (gfp & __GFP_DIRECT_RECLAIM)) page = __alloc_pages_node(hpage_node, gfp, order); However, this retry allocation completely ignores memory policy nodemask allowing allocation to escape restrictions. The first appearance of this bug seems to be the commit ac5b2c18911f ("mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings"). The bug disappeared later in the commit 89c83fb539f9 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask") and reappeared again in slightly different form in the commit 76e654cc91bb ("mm, page_alloc: allow hugepage fallback to remote nodes when madvised") Fix this by passing correct nodemask to the __alloc_pages() call. The demonstration/reproducer of the problem: $ mount -oremount,size=4G,huge=always /dev/shm/ $ echo always > /sys/kernel/mm/transparent_hugepage/defrag $ cat mbind_thp.c #include #include #include #include #include #include #include #include #define SIZE 2ULL << 30 int main(int argc, char **argv) { int fd; unsigned long long i; char *addr; pid_t pid; char buf[100]; unsigned long nodemask = 1; fd = open("/dev/shm/test", O_RDWR|O_CREAT); assert(fd > 0); assert(ftruncate(fd, SIZE) == 0); addr = mmap(NULL, SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); assert(mbind(addr, SIZE, MPOL_BIND, &nodemask, 2, MPOL_MF_STRICT|MPOL_MF_MOVE)==0); for (i = 0; i < SIZE; i+=4096) { addr[i] = 1; } pid = getpid(); snprintf(buf, sizeof(buf), "grep shm /proc/%d/numa_maps", pid); system(buf); sleep(10000); return 0; } $ gcc mbind_thp.c -o mbind_thp -lnuma $ numactl -H available: 2 nodes (0-1) node 0 cpus: 0 2 node 0 size: 1918 MB node 0 free: 1595 MB node 1 cpus: 1 3 node 1 size: 2014 MB node 1 free: 1731 MB node distances: node 0 1 0: 10 20 1: 20 10 $ rm -f /dev/shm/test; taskset -c 0 ./mbind_thp 7fd970a00000 bind:0 file=/dev/shm/test dirty=524288 active=0 N0=396800 N1=127488 kernelpagesize_kB=4 Link: https://lkml.kernel.org/r/20211208165343.22349-1-arbn@yandex-team.com Fixes: ac5b2c18911f ("mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings") Signed-off-by: Andrey Ryabinin Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: David Rientjes Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 10e9c87260ede..f6248affaf38c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2140,8 +2140,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, * memory with both reclaim and compact as well. */ if (!page && (gfp & __GFP_DIRECT_RECLAIM)) - page = __alloc_pages_node(hpage_node, - gfp, order); + page = __alloc_pages(gfp, order, hpage_node, nmask); goto out; } From 71d2bcec2d4d69ff109c497e6611d6c53c8926d4 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 24 Dec 2021 21:12:39 -0800 Subject: [PATCH 352/361] kernel/crash_core: suppress unknown crashkernel parameter warning When booting with crashkernel= on the kernel command line a warning similar to Kernel command line: ro console=ttyS0 crashkernel=256M Unknown kernel command line parameters "crashkernel=256M", will be passed to user space. is printed. This comes from crashkernel= being parsed independent from the kernel parameter handling mechanism. So the code in init/main.c doesn't know that crashkernel= is a valid kernel parameter and prints this incorrect warning. Suppress the warning by adding a dummy early_param handler for crashkernel=. Link: https://lkml.kernel.org/r/20211208133443.6867-1-prudo@redhat.com Fixes: 86d1919a4fb0 ("init: print out unknown kernel parameters") Signed-off-by: Philipp Rudo Acked-by: Baoquan He Cc: Andrew Halaney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/crash_core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index eb53f5ec62c90..256cf6db573cd 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -295,6 +296,16 @@ int __init parse_crashkernel_low(char *cmdline, "crashkernel=", suffix_tbl[SUFFIX_LOW]); } +/* + * Add a dummy early_param handler to mark crashkernel= as a known command line + * parameter and suppress incorrect warnings in init/main.c. + */ +static int __init parse_crashkernel_dummy(char *arg) +{ + return 0; +} +early_param("crashkernel", parse_crashkernel_dummy); + Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len) { From 7e5b901e4609441fc6bb94701c4743b39b6c277e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 24 Dec 2021 21:12:42 -0800 Subject: [PATCH 353/361] MAINTAINERS: mark more list instances as moderated Some lists that are moderated are not marked as moderated consistently, so mark them all as moderated. Link: https://lkml.kernel.org/r/20211209001330.18558-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Miquel Raynal Cc: Conor Culhane Cc: Ryder Lee Cc: Jianjun Wang Cc: Alexandre Belloni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8912b2c1260ca..fb18ce7168aa7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14845,7 +14845,7 @@ PCIE DRIVER FOR MEDIATEK M: Ryder Lee M: Jianjun Wang L: linux-pci@vger.kernel.org -L: linux-mediatek@lists.infradead.org +L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/pci/mediatek* F: drivers/pci/controller/*mediatek* @@ -17423,7 +17423,7 @@ F: drivers/video/fbdev/sm712* SILVACO I3C DUAL-ROLE MASTER M: Miquel Raynal M: Conor Culhane -L: linux-i3c@lists.infradead.org +L: linux-i3c@lists.infradead.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml F: drivers/i3c/master/svc-i3c-master.c From e37e7b0b3bd52ec4f8ab71b027bcec08f57f1b3b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 24 Dec 2021 21:12:45 -0800 Subject: [PATCH 354/361] mm, hwpoison: fix condition in free hugetlb page path When a memory error hits a tail page of a free hugepage, __page_handle_poison() is expected to be called to isolate the error in 4kB unit, but it's not called due to the outdated if-condition in memory_failure_hugetlb(). This loses the chance to isolate the error in the finer unit, so it's not optimal. Drop the condition. This "(p != head && TestSetPageHWPoison(head)" condition is based on the old semantics of PageHWPoison on hugepage (where PG_hwpoison flag was set on the subpage), so it's not necessray any more. By getting to set PG_hwpoison on head page for hugepages, concurrent error events on different subpages in a single hugepage can be prevented by TestSetPageHWPoison(head) at the beginning of memory_failure_hugetlb(). So dropping the condition should not reopen the race window originally mentioned in commit b985194c8c0a ("hwpoison, hugetlb: lock_page/unlock_page does not match for handling a free hugepage") [naoya.horiguchi@linux.dev: fix "HardwareCorrupted" counter] Link: https://lkml.kernel.org/r/20211220084851.GA1460264@u2004 Link: https://lkml.kernel.org/r/20211210110208.879740-1-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reported-by: Fei Luo Reviewed-by: Mike Kravetz Cc: [5.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 07c875fdeaf0c..682828b94ab60 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1470,17 +1470,12 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) if (!(flags & MF_COUNT_INCREASED)) { res = get_hwpoison_page(p, flags); if (!res) { - /* - * Check "filter hit" and "race with other subpage." - */ lock_page(head); - if (PageHWPoison(head)) { - if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) - || (p != head && TestSetPageHWPoison(head))) { + if (hwpoison_filter(p)) { + if (TestClearPageHWPoison(head)) num_poisoned_pages_dec(); - unlock_page(head); - return 0; - } + unlock_page(head); + return 0; } unlock_page(head); res = MF_FAILED; From 94ab10dd42a70acc5208a41325617e3d9cf81a70 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 24 Dec 2021 21:12:48 -0800 Subject: [PATCH 355/361] mm: delete unsafe BUG from page_cache_add_speculative() It is not easily reproducible, but on 5.16-rc I have several times hit the VM_BUG_ON_PAGE(PageTail(page), page) in page_cache_add_speculative(): usually from filemap_get_read_batch() for an ext4 read, yesterday from next_uptodate_page() from filemap_map_pages() for a shmem fault. That BUG used to be placed where page_ref_add_unless() had succeeded, but now it is placed before folio_ref_add_unless() is attempted: that is not safe, since it is only the acquired reference which makes the page safe from racing THP collapse or split. We could keep the BUG, checking PageTail only when folio_ref_try_add_rcu() has succeeded; but I don't think it adds much value - just delete it. Link: https://lkml.kernel.org/r/8b98fc6f-3439-8614-c3f3-945c659a1aba@google.com Fixes: 020853b6f5ea ("mm: Add folio_try_get_rcu()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Reviewed-by: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: William Kucharski Cc: Christoph Hellwig Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 6052464523051..d150a9082b31c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -285,7 +285,6 @@ static inline struct inode *folio_inode(struct folio *folio) static inline bool page_cache_add_speculative(struct page *page, int count) { - VM_BUG_ON_PAGE(PageTail(page), page); return folio_ref_try_add_rcu((struct folio *)page, count); } From 595ec1973c276f6c0c1de8aca5eef8dfd81f9b49 Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Fri, 24 Dec 2021 21:12:51 -0800 Subject: [PATCH 356/361] mm/page_alloc: fix __alloc_size attribute for alloc_pages_exact_nid The second parameter of alloc_pages_exact_nid is the one indicating the size of memory pointed by the returned pointer. Link: https://lkml.kernel.org/r/YbjEgwhn4bGblp//@coeus Fixes: abd58f38dfb4 ("mm/page_alloc: add __alloc_size attributes for better bounds checking") Signed-off-by: Thibaut Sautereau Acked-by: Kees Cook Cc: Daniel Micay Cc: Levente Polyak Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b976c41772995..8fcc38467af6e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -624,7 +624,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1); void free_pages_exact(void *virt, size_t size); -__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(1); +__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) From 34796417964b8d0aef45a99cf6c2d20cebe33733 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 24 Dec 2021 21:12:54 -0800 Subject: [PATCH 357/361] mm/damon/dbgfs: protect targets destructions with kdamond_lock DAMON debugfs interface iterates current monitoring targets in 'dbgfs_target_ids_read()' while holding the corresponding 'kdamond_lock'. However, it also destructs the monitoring targets in 'dbgfs_before_terminate()' without holding the lock. This can result in a use_after_free bug. This commit avoids the race by protecting the destruction with the corresponding 'kdamond_lock'. Link: https://lkml.kernel.org/r/20211221094447.2241-1-sj@kernel.org Reported-by: Sangwoo Bae Fixes: 4bc05954d007 ("mm/damon: implement a debugfs-based user space interface") Signed-off-by: SeongJae Park Cc: [5.15.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/dbgfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 1efac0022e9a4..4fbd729edc9e7 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -650,10 +650,12 @@ static void dbgfs_before_terminate(struct damon_ctx *ctx) if (!targetid_is_pid(ctx)) return; + mutex_lock(&ctx->kdamond_lock); damon_for_each_target_safe(t, next, ctx) { put_pid((struct pid *)t->id); damon_destroy_target(t); } + mutex_unlock(&ctx->kdamond_lock); } static struct damon_ctx *dbgfs_new_ctx(void) From 2a57d83c78f889bf3f54eede908d0643c40d5418 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 24 Dec 2021 21:12:58 -0800 Subject: [PATCH 358/361] mm/hwpoison: clear MF_COUNT_INCREASED before retrying get_any_page() Hulk Robot reported a panic in put_page_testzero() when testing madvise() with MADV_SOFT_OFFLINE. The BUG() is triggered when retrying get_any_page(). This is because we keep MF_COUNT_INCREASED flag in second try but the refcnt is not increased. page dumped because: VM_BUG_ON_PAGE(page_ref_count(page) == 0) ------------[ cut here ]------------ kernel BUG at include/linux/mm.h:737! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 5 PID: 2135 Comm: sshd Tainted: G B 5.16.0-rc6-dirty #373 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: release_pages+0x53f/0x840 Call Trace: free_pages_and_swap_cache+0x64/0x80 tlb_flush_mmu+0x6f/0x220 unmap_page_range+0xe6c/0x12c0 unmap_single_vma+0x90/0x170 unmap_vmas+0xc4/0x180 exit_mmap+0xde/0x3a0 mmput+0xa3/0x250 do_exit+0x564/0x1470 do_group_exit+0x3b/0x100 __do_sys_exit_group+0x13/0x20 __x64_sys_exit_group+0x16/0x20 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Modules linked in: ---[ end trace e99579b570fe0649 ]--- RIP: 0010:release_pages+0x53f/0x840 Link: https://lkml.kernel.org/r/20211221074908.3910286-1-liushixin2@huawei.com Fixes: b94e02822deb ("mm,hwpoison: try to narrow window race for free pages") Signed-off-by: Liu Shixin Reported-by: Hulk Robot Reviewed-by: Oscar Salvador Acked-by: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 682828b94ab60..3a274468f193e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2234,6 +2234,7 @@ int soft_offline_page(unsigned long pfn, int flags) } else if (ret == 0) { if (soft_offline_free_page(page) && try_again) { try_again = false; + flags &= ~MF_COUNT_INCREASED; goto retry; } } From fc74e0a40e4f9fd0468e34045b0c45bba11dcbb2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Dec 2021 13:17:17 -0800 Subject: [PATCH 359/361] Linux 5.16-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d85f1ff79f5ca..17b4319ad2ff2 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Gobble Gobble # *DOCUMENTATION* From f34e8875ae244462711e31fcc4a82db13a16d36f Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 22 Nov 2021 09:54:00 -0600 Subject: [PATCH 360/361] dt-bindings: spi: cadence-quadspi: document "intel,socfpga-qspi" The QSPI controller on Intel's SoCFPGA platform does not implement the CQSPI_REG_WR_COMPLETION_CTRL register, thus a write to this register results in a crash. Introduce the dts compatible "intel,socfpga-qspi" to differentiate the hardware. Acked-by: Pratyush Yadav Reviewed-by: Rob Herring Signed-off-by: Dinh Nguyen --- v3: revert to "intel,socfpga-qspi" v2: change binding to "cdns,qspi-nor-0010" to be more generic for other platforms --- Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml index ca155abbda7a3..037f41f58503c 100644 --- a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml +++ b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml @@ -29,6 +29,7 @@ properties: - ti,am654-ospi - intel,lgm-qspi - xlnx,versal-ospi-1.0 + - intel,socfpga-qspi - const: cdns,qspi-nor - const: cdns,qspi-nor From 36de991e93908f7ad5c2a0eac9c4ecf8b723fa4a Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 22 Nov 2021 09:10:03 -0600 Subject: [PATCH 361/361] ARM: dts: socfpga: change qspi to "intel,socfpga-qspi" Because of commit 9cb2ff111712 ("spi: cadence-quadspi: Disable Auto-HW polling"), which does a write to the CQSPI_REG_WR_COMPLETION_CTRL register regardless of any condition. Well, the Cadence QuadSPI controller on Intel's SoCFPGA platforms does not implement the CQSPI_REG_WR_COMPLETION_CTRL register, thus a write to this register results in a crash! So starting with v5.16, I introduced the patch 98d948eb833 ("spi: cadence-quadspi: fix write completion support"), which adds the dts compatible "intel,socfpga-qspi" that is specific for versions that doesn't have the CQSPI_REG_WR_COMPLETION_CTRL register implemented. Signed-off-by: Dinh Nguyen --- v3: revert back to "intel,socfpga-qspi" v2: use both "cdns,qspi-nor" and "cdns,qspi-nor-0010" --- arch/arm/boot/dts/socfpga.dtsi | 2 +- arch/arm/boot/dts/socfpga_arria10.dtsi | 2 +- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 2 +- arch/arm64/boot/dts/intel/socfpga_agilex.dtsi | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 0b021eef0b538..7c1d6423d7f8c 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -782,7 +782,7 @@ }; qspi: spi@ff705000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff705000 0x1000>, diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index a574ea91d9d3f..3ba431dfa8c94 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -756,7 +756,7 @@ }; qspi: spi@ff809000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff809000 0x100>, diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index d301ac0d406bf..3ec301bd08a91 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -594,7 +594,7 @@ }; qspi: spi@ff8d2000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff8d2000 0x100>, diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index 163f33b46e4f7..0dd2d2ee765aa 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -628,7 +628,7 @@ }; qspi: spi@ff8d2000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff8d2000 0x100>,