From 10f3f10f202109e3841c0c75add5a743ca197205 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 13 Oct 2022 14:46:39 -0700 Subject: [PATCH 001/239] MAINTAINERS: git://github -> https://github.com for broadcom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Github deprecated the git:// links about a year ago, so let's move to the https:// URLs instead. Acked-by: William Zhang Reviewed-by: Philippe Mathieu-Daudé Reported-by: Conor Dooley Link: https://github.blog/2021-09-01-improving-git-protocol-security-github/ Signed-off-by: Palmer Dabbelt Signed-off-by: Florian Fainelli --- MAINTAINERS | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f185023724..83612d907bed7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3984,7 +3984,7 @@ M: Rafał Miłecki R: Broadcom internal kernel review list L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml F: arch/arm64/boot/dts/broadcom/bcmbca/* N: bcmbca @@ -4009,7 +4009,7 @@ R: Broadcom internal kernel review list L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml F: drivers/pci/controller/pcie-brcmstb.c F: drivers/staging/vc04_services @@ -4023,7 +4023,7 @@ M: Ray Jui M: Scott Branden R: Broadcom internal kernel review list S: Maintained -T: git git://github.com/broadcom/mach-bcm +T: git https://github.com/broadcom/mach-bcm F: arch/arm/mach-bcm/ N: bcm281* N: bcm113* @@ -4088,7 +4088,7 @@ M: Florian Fainelli R: Broadcom internal kernel review list L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml F: arch/arm/boot/dts/bcm7*.dts* F: arch/arm/include/asm/hardware/cache-b15-rac.h @@ -4120,7 +4120,7 @@ M: Florian Fainelli R: Broadcom internal kernel review list L: linux-mips@vger.kernel.org S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: arch/mips/bmips/* F: arch/mips/boot/dts/brcm/bcm*.dts* F: arch/mips/include/asm/mach-bmips/* @@ -4259,7 +4259,7 @@ M: Scott Branden R: Broadcom internal kernel review list L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: arch/arm64/boot/dts/broadcom/northstar2/* F: arch/arm64/boot/dts/broadcom/stingray/* F: drivers/clk/bcm/clk-ns* @@ -4329,7 +4329,7 @@ M: Florian Fainelli R: Broadcom internal kernel review list L: linux-pm@vger.kernel.org S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: drivers/soc/bcm/bcm63xx/bcm-pmb.c F: include/dt-bindings/soc/bcm-pmb.h From 2ff4ba9e37024735f5cefc5ea2a73fc66addfe0e Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 29 Sep 2022 21:55:21 +0200 Subject: [PATCH 002/239] clk: rs9: Fix I2C accessors Add custom I2C accessors to this driver, since the regular I2C regmap ones do not generate the exact I2C transfers required by the chip. On I2C write, it is mandatory to send transfer length first, on read the chip returns the transfer length in first byte. Instead of always reading back 8 bytes, which is the default and also the size of the entire register file, set BCP register to 1 to read out 1 byte which is less wasteful. Fixes: 892e0ddea1aa ("clk: rs9: Add Renesas 9-series PCIe clock generator driver") Reported-by: Alexander Stein Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20220929195521.284497-1-marex@denx.de Reviewed-by: Alexander Stein Signed-off-by: Stephen Boyd --- drivers/clk/clk-renesas-pcie.c | 65 ++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/drivers/clk/clk-renesas-pcie.c b/drivers/clk/clk-renesas-pcie.c index 4f5df1fc74b46..e6247141d0c05 100644 --- a/drivers/clk/clk-renesas-pcie.c +++ b/drivers/clk/clk-renesas-pcie.c @@ -90,13 +90,66 @@ static const struct regmap_access_table rs9_writeable_table = { .n_yes_ranges = ARRAY_SIZE(rs9_writeable_ranges), }; +static int rs9_regmap_i2c_write(void *context, + unsigned int reg, unsigned int val) +{ + struct i2c_client *i2c = context; + const u8 data[3] = { reg, 1, val }; + const int count = ARRAY_SIZE(data); + int ret; + + ret = i2c_master_send(i2c, data, count); + if (ret == count) + return 0; + else if (ret < 0) + return ret; + else + return -EIO; +} + +static int rs9_regmap_i2c_read(void *context, + unsigned int reg, unsigned int *val) +{ + struct i2c_client *i2c = context; + struct i2c_msg xfer[2]; + u8 txdata = reg; + u8 rxdata[2]; + int ret; + + xfer[0].addr = i2c->addr; + xfer[0].flags = 0; + xfer[0].len = 1; + xfer[0].buf = (void *)&txdata; + + xfer[1].addr = i2c->addr; + xfer[1].flags = I2C_M_RD; + xfer[1].len = 2; + xfer[1].buf = (void *)rxdata; + + ret = i2c_transfer(i2c->adapter, xfer, 2); + if (ret < 0) + return ret; + if (ret != 2) + return -EIO; + + /* + * Byte 0 is transfer length, which is always 1 due + * to BCP register programming to 1 in rs9_probe(), + * ignore it and use data from Byte 1. + */ + *val = rxdata[1]; + return 0; +} + static const struct regmap_config rs9_regmap_config = { .reg_bits = 8, .val_bits = 8, - .cache_type = REGCACHE_FLAT, - .max_register = 0x8, + .cache_type = REGCACHE_NONE, + .max_register = RS9_REG_BCP, .rd_table = &rs9_readable_table, .wr_table = &rs9_writeable_table, + .reg_write = rs9_regmap_i2c_write, + .reg_read = rs9_regmap_i2c_read, }; static int rs9_get_output_config(struct rs9_driver_data *rs9, int idx) @@ -242,11 +295,17 @@ static int rs9_probe(struct i2c_client *client) return ret; } - rs9->regmap = devm_regmap_init_i2c(client, &rs9_regmap_config); + rs9->regmap = devm_regmap_init(&client->dev, NULL, + client, &rs9_regmap_config); if (IS_ERR(rs9->regmap)) return dev_err_probe(&client->dev, PTR_ERR(rs9->regmap), "Failed to allocate register map\n"); + /* Always read back 1 Byte via I2C */ + ret = regmap_write(rs9->regmap, RS9_REG_BCP, 1); + if (ret < 0) + return ret; + /* Register clock */ for (i = 0; i < rs9->chip_info->num_clks; i++) { snprintf(name, 5, "DIF%d", i); From 7e3e6e1b75c9643e25e8ca7d6caf1b1faf8f022e Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 5 Oct 2022 18:13:44 +0100 Subject: [PATCH 003/239] clk: sifive: select by default if SOC_SIFIVE With the aim of dropping direct selects of drivers from Kconfig.socs, default the SiFive clock drivers to the value of SOC_SIFIVE. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20221005171348.167476-2-conor@kernel.org Signed-off-by: Stephen Boyd --- drivers/clk/sifive/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/sifive/Kconfig b/drivers/clk/sifive/Kconfig index 9132c3c4aa868..b7fde0aadfcbb 100644 --- a/drivers/clk/sifive/Kconfig +++ b/drivers/clk/sifive/Kconfig @@ -2,7 +2,8 @@ menuconfig CLK_SIFIVE bool "SiFive SoC driver support" - depends on RISCV || COMPILE_TEST + depends on SOC_SIFIVE || COMPILE_TEST + default SOC_SIFIVE help SoC drivers for SiFive Linux-capable SoCs. @@ -10,6 +11,7 @@ if CLK_SIFIVE config CLK_SIFIVE_PRCI bool "PRCI driver for SiFive SoCs" + default SOC_SIFIVE select RESET_CONTROLLER select RESET_SIMPLE select CLK_ANALOGBITS_WRPLL_CLN28HPC From 8fbf8636cd37b821ce3482748340008dbbe2dcb5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sun, 9 Oct 2022 10:50:56 +0800 Subject: [PATCH 004/239] clk: mediatek: clk-mt8195-topckgen: Fix error return code in clk_mt8195_topck_probe() If devm_clk_hw_register_mux() fails in clk_mt8195_topck_probe(), it should return error code. Fixes: deeb2af77cf6 ("clk: mediatek: clk-mt8195-topckgen: Register mfg_ck_fast_ref as generic mux") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221009025056.35311-1-yangyingliang@huawei.com Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/clk-mt8195-topckgen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/mediatek/clk-mt8195-topckgen.c b/drivers/clk/mediatek/clk-mt8195-topckgen.c index 8cbab5ca2e581..1e016329c1d23 100644 --- a/drivers/clk/mediatek/clk-mt8195-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8195-topckgen.c @@ -1270,8 +1270,10 @@ static int clk_mt8195_topck_probe(struct platform_device *pdev) hw = devm_clk_hw_register_mux(&pdev->dev, "mfg_ck_fast_ref", mfg_fast_parents, ARRAY_SIZE(mfg_fast_parents), CLK_SET_RATE_PARENT, (base + 0x250), 8, 1, 0, &mt8195_clk_lock); - if (IS_ERR(hw)) + if (IS_ERR(hw)) { + r = PTR_ERR(hw); goto unregister_muxes; + } top_clk_data->hws[CLK_TOP_MFG_CK_FAST_REF] = hw; r = clk_mt8195_reg_mfg_mux_notifier(&pdev->dev, From ba5284ebe497044f37c9bb9c7b1564932f4b6610 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 7 Oct 2022 15:10:00 +0200 Subject: [PATCH 005/239] clk: renesas: r8a779g0: Add SASYNCPER clocks On R-Car V4H, all PLLs except PLL5 support Spread Spectrum and/or Fractional Multiplication to reduce electromagnetic interference. Add the SASYNCPER and SASYNCPERD[124] clocks, which are used as clock sources for modules that must not be affected by Spread Spectrum and/or Fractional Multiplication. Signed-off-by: Geert Uytterhoeven Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/d0f35c35e1f96c5a649ab477e7ba5d8025957cd0.1665147497.git.geert+renesas@glider.be --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index 9641122133b54..3e8c93facfa14 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -47,6 +47,7 @@ enum clk_ids { CLK_S0_VIO, CLK_S0_VC, CLK_S0_HSC, + CLK_SASYNCPER, CLK_SV_VIP, CLK_SV_IR, CLK_SDSRC, @@ -84,6 +85,7 @@ static const struct cpg_core_clk r8a779g0_core_clks[] __initconst = { DEF_FIXED(".s0_vio", CLK_S0_VIO, CLK_PLL1_DIV2, 2, 1), DEF_FIXED(".s0_vc", CLK_S0_VC, CLK_PLL1_DIV2, 2, 1), DEF_FIXED(".s0_hsc", CLK_S0_HSC, CLK_PLL1_DIV2, 2, 1), + DEF_FIXED(".sasyncper", CLK_SASYNCPER, CLK_PLL5_DIV4, 3, 1), DEF_FIXED(".sv_vip", CLK_SV_VIP, CLK_PLL1, 5, 1), DEF_FIXED(".sv_ir", CLK_SV_IR, CLK_PLL1, 5, 1), DEF_BASE(".sdsrc", CLK_SDSRC, CLK_TYPE_GEN4_SDSRC, CLK_PLL5), @@ -128,6 +130,9 @@ static const struct cpg_core_clk r8a779g0_core_clks[] __initconst = { DEF_FIXED("s0d4_hsc", R8A779G0_CLK_S0D4_HSC, CLK_S0_HSC, 4, 1), DEF_FIXED("cl16m_hsc", R8A779G0_CLK_CL16M_HSC, CLK_S0_HSC, 48, 1), DEF_FIXED("s0d2_cc", R8A779G0_CLK_S0D2_CC, CLK_S0, 2, 1), + DEF_FIXED("sasyncperd1",R8A779G0_CLK_SASYNCPERD1, CLK_SASYNCPER,1, 1), + DEF_FIXED("sasyncperd2",R8A779G0_CLK_SASYNCPERD2, CLK_SASYNCPER,2, 1), + DEF_FIXED("sasyncperd4",R8A779G0_CLK_SASYNCPERD4, CLK_SASYNCPER,4, 1), DEF_FIXED("svd1_ir", R8A779G0_CLK_SVD1_IR, CLK_SV_IR, 1, 1), DEF_FIXED("svd2_ir", R8A779G0_CLK_SVD2_IR, CLK_SV_IR, 2, 1), DEF_FIXED("svd1_vip", R8A779G0_CLK_SVD1_VIP, CLK_SV_VIP, 1, 1), From 97cf79677ecb50a38517253ae2fd705849a7e51a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 16 Oct 2022 17:54:40 -0700 Subject: [PATCH 006/239] xfs: avoid a UAF when log intent item recovery fails KASAN reported a UAF bug when I was running xfs/235: BUG: KASAN: use-after-free in xlog_recover_process_intents+0xa77/0xae0 [xfs] Read of size 8 at addr ffff88804391b360 by task mount/5680 CPU: 2 PID: 5680 Comm: mount Not tainted 6.0.0-xfsx #6.0.0 77e7b52a4943a975441e5ac90a5ad7748b7867f6 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl+0x34/0x44 print_report.cold+0x2cc/0x682 kasan_report+0xa3/0x120 xlog_recover_process_intents+0xa77/0xae0 [xfs fb841c7180aad3f8359438576e27867f5795667e] xlog_recover_finish+0x7d/0x970 [xfs fb841c7180aad3f8359438576e27867f5795667e] xfs_log_mount_finish+0x2d7/0x5d0 [xfs fb841c7180aad3f8359438576e27867f5795667e] xfs_mountfs+0x11d4/0x1d10 [xfs fb841c7180aad3f8359438576e27867f5795667e] xfs_fs_fill_super+0x13d5/0x1a80 [xfs fb841c7180aad3f8359438576e27867f5795667e] get_tree_bdev+0x3da/0x6e0 vfs_get_tree+0x7d/0x240 path_mount+0xdd3/0x17d0 __x64_sys_mount+0x1fa/0x270 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7ff5bc069eae Code: 48 8b 0d 85 1f 0f 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 52 1f 0f 00 f7 d8 64 89 01 48 RSP: 002b:00007ffe433fd448 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ff5bc069eae RDX: 00005575d7213290 RSI: 00005575d72132d0 RDI: 00005575d72132b0 RBP: 00005575d7212fd0 R08: 00005575d7213230 R09: 00005575d7213fe0 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00005575d7213290 R14: 00005575d72132b0 R15: 00005575d7212fd0 Allocated by task 5680: kasan_save_stack+0x1e/0x40 __kasan_slab_alloc+0x66/0x80 kmem_cache_alloc+0x152/0x320 xfs_rui_init+0x17a/0x1b0 [xfs] xlog_recover_rui_commit_pass2+0xb9/0x2e0 [xfs] xlog_recover_items_pass2+0xe9/0x220 [xfs] xlog_recover_commit_trans+0x673/0x900 [xfs] xlog_recovery_process_trans+0xbe/0x130 [xfs] xlog_recover_process_data+0x103/0x2a0 [xfs] xlog_do_recovery_pass+0x548/0xc60 [xfs] xlog_do_log_recovery+0x62/0xc0 [xfs] xlog_do_recover+0x73/0x480 [xfs] xlog_recover+0x229/0x460 [xfs] xfs_log_mount+0x284/0x640 [xfs] xfs_mountfs+0xf8b/0x1d10 [xfs] xfs_fs_fill_super+0x13d5/0x1a80 [xfs] get_tree_bdev+0x3da/0x6e0 vfs_get_tree+0x7d/0x240 path_mount+0xdd3/0x17d0 __x64_sys_mount+0x1fa/0x270 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Freed by task 5680: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_set_free_info+0x20/0x30 ____kasan_slab_free+0x144/0x1b0 slab_free_freelist_hook+0xab/0x180 kmem_cache_free+0x1f1/0x410 xfs_rud_item_release+0x33/0x80 [xfs] xfs_trans_free_items+0xc3/0x220 [xfs] xfs_trans_cancel+0x1fa/0x590 [xfs] xfs_rui_item_recover+0x913/0xd60 [xfs] xlog_recover_process_intents+0x24e/0xae0 [xfs] xlog_recover_finish+0x7d/0x970 [xfs] xfs_log_mount_finish+0x2d7/0x5d0 [xfs] xfs_mountfs+0x11d4/0x1d10 [xfs] xfs_fs_fill_super+0x13d5/0x1a80 [xfs] get_tree_bdev+0x3da/0x6e0 vfs_get_tree+0x7d/0x240 path_mount+0xdd3/0x17d0 __x64_sys_mount+0x1fa/0x270 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The buggy address belongs to the object at ffff88804391b300 which belongs to the cache xfs_rui_item of size 688 The buggy address is located 96 bytes inside of 688-byte region [ffff88804391b300, ffff88804391b5b0) The buggy address belongs to the physical page: page:ffffea00010e4600 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888043919320 pfn:0x43918 head:ffffea00010e4600 order:2 compound_mapcount:0 compound_pincount:0 flags: 0x4fff80000010200(slab|head|node=1|zone=1|lastcpupid=0xfff) raw: 04fff80000010200 0000000000000000 dead000000000122 ffff88807f0eadc0 raw: ffff888043919320 0000000080140010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88804391b200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88804391b280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88804391b300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88804391b380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88804391b400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== The test fuzzes an rmap btree block and starts writer threads to induce a filesystem shutdown on the corrupt block. When the filesystem is remounted, recovery will try to replay the committed rmap intent item, but the corruption problem causes the recovery transaction to fail. Cancelling the transaction frees the RUD, which frees the RUI that we recovered. When we return to xlog_recover_process_intents, @lip is now a dangling pointer, and we cannot use it to find the iop_recover method for the tracepoint. Hence we must store the item ops before calling ->iop_recover if we want to give it to the tracepoint so that the trace data will tell us exactly which intent item failed. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log_recover.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 17e923b9c5fa2..322eb2ee6c550 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2552,6 +2552,8 @@ xlog_recover_process_intents( for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL; lip = xfs_trans_ail_cursor_next(ailp, &cur)) { + const struct xfs_item_ops *ops; + if (!xlog_item_is_intent(lip)) break; @@ -2567,13 +2569,17 @@ xlog_recover_process_intents( * deferred ops, you /must/ attach them to the capture list in * the recover routine or else those subsequent intents will be * replayed in the wrong order! + * + * The recovery function can free the log item, so we must not + * access lip after it returns. */ spin_unlock(&ailp->ail_lock); - error = lip->li_ops->iop_recover(lip, &capture_list); + ops = lip->li_ops; + error = ops->iop_recover(lip, &capture_list); spin_lock(&ailp->ail_lock); if (error) { trace_xlog_intent_recovery_failed(log->l_mp, error, - lip->li_ops->iop_recover); + ops->iop_recover); break; } } From 091873e47ef700e935aa80079b63929af599a0b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Wed, 19 Oct 2022 22:05:36 +0200 Subject: [PATCH 007/239] selftests/landlock: Build without static libraries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only (forced) static test binary doesn't depend on libcap. Because using -lcap on systems that don't have such static library would fail (e.g. on Arch Linux), let's be more specific and require only dynamic libcap linking. Fixes: a52540522c95 ("selftests/landlock: Fix out-of-tree builds") Cc: Anders Roxell Cc: Guillaume Tucker Cc: Mark Brown Cc: Shuah Khan Cc: stable@vger.kernel.org Signed-off-by: Mickaël Salaün Link: https://lore.kernel.org/r/20221019200536.2771316-1-mic@digikod.net --- tools/testing/selftests/landlock/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile index 6632bfff486b8..348e2dbdb4e0b 100644 --- a/tools/testing/selftests/landlock/Makefile +++ b/tools/testing/selftests/landlock/Makefile @@ -3,7 +3,6 @@ # First run: make -C ../../../.. headers_install CFLAGS += -Wall -O2 $(KHDR_INCLUDES) -LDLIBS += -lcap LOCAL_HDRS += common.h @@ -13,10 +12,12 @@ TEST_GEN_PROGS := $(src_test:.c=) TEST_GEN_PROGS_EXTENDED := true -# Static linking for short targets: +# Short targets: +$(TEST_GEN_PROGS): LDLIBS += -lcap $(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static include ../lib.mk -# Static linking for targets with $(OUTPUT)/ prefix: +# Targets with $(OUTPUT)/ prefix: +$(TEST_GEN_PROGS): LDLIBS += -lcap $(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static From 9fa248c65bdbf5af0a2f74dd38575acfc8dfd2bf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Oct 2022 17:18:58 +0200 Subject: [PATCH 008/239] fuse: fix readdir cache race There's a race in fuse's readdir cache that can result in an uninitilized page being read. The page lock is supposed to prevent this from happening but in the following case it doesn't: Two fuse_add_dirent_to_cache() start out and get the same parameters (size=0,offset=0). One of them wins the race to create and lock the page, after which it fills in data, sets rdc.size and unlocks the page. In the meantime the page gets evicted from the cache before the other instance gets to run. That one also creates the page, but finds the size to be mismatched, bails out and leaves the uninitialized page in the cache. Fix by marking a filled page uptodate and ignoring non-uptodate pages. Reported-by: Frank Sorenson Fixes: 5d7bc7e8680c ("fuse: allow using readdir cache") Cc: # v4.20 Signed-off-by: Miklos Szeredi --- fs/fuse/readdir.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index b4e5657110457..e8deaacf1832a 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -77,8 +77,10 @@ static void fuse_add_dirent_to_cache(struct file *file, goto unlock; addr = kmap_local_page(page); - if (!offset) + if (!offset) { clear_page(addr); + SetPageUptodate(page); + } memcpy(addr + offset, dirent, reclen); kunmap_local(addr); fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; @@ -516,6 +518,12 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) page = find_get_page_flags(file->f_mapping, index, FGP_ACCESSED | FGP_LOCK); + /* Page gone missing, then re-added to cache, but not initialized? */ + if (page && !PageUptodate(page)) { + unlock_page(page); + put_page(page); + page = NULL; + } spin_lock(&fi->rdc.lock); if (!page) { /* From 13cf24e00665c9751951a422756d975812b71173 Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Tue, 18 Oct 2022 14:32:35 -0700 Subject: [PATCH 009/239] xfs: fix exception caused by unexpected illegal bestcount in leaf dir For leaf dir, In most cases, there should be as many bestfree slots as the dir data blocks that can fit under i_size (except for [1]). Root cause is we don't examin the number bestfree slots, when the slots number less than dir data blocks, if we need to allocate new dir data block and update the bestfree array, we will use the dir block number as index to assign bestfree array, while we did not check the leaf buf boundary which may cause UAF or other memory access problem. This issue can also triggered with test cases xfs/473 from fstests. According to Dave Chinner & Darrick's suggestion, adding buffer verifier to detect this abnormal situation in time. Simplify the testcase for fstest xfs/554 [1] The error log is shown as follows: ================================================================== BUG: KASAN: use-after-free in xfs_dir2_leaf_addname+0x1995/0x1ac0 Write of size 2 at addr ffff88810168b000 by task touch/1552 CPU: 5 PID: 1552 Comm: touch Not tainted 6.0.0-rc3+ #101 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: dump_stack_lvl+0x4d/0x66 print_report.cold+0xf6/0x691 kasan_report+0xa8/0x120 xfs_dir2_leaf_addname+0x1995/0x1ac0 xfs_dir_createname+0x58c/0x7f0 xfs_create+0x7af/0x1010 xfs_generic_create+0x270/0x5e0 path_openat+0x270b/0x3450 do_filp_open+0x1cf/0x2b0 do_sys_openat2+0x46b/0x7a0 do_sys_open+0xb7/0x130 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fe4d9e9312b Code: 25 00 00 41 00 3d 00 00 41 00 74 4b 64 8b 04 25 18 00 00 00 85 c0 75 67 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 91 00 00 00 48 8b 4c 24 28 64 48 33 0c 25 RSP: 002b:00007ffda4c16c20 EFLAGS: 00000246 ORIG_RAX: 0000000000000101 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fe4d9e9312b RDX: 0000000000000941 RSI: 00007ffda4c17f33 RDI: 00000000ffffff9c RBP: 00007ffda4c17f33 R08: 0000000000000000 R09: 0000000000000000 R10: 00000000000001b6 R11: 0000000000000246 R12: 0000000000000941 R13: 00007fe4d9f631a4 R14: 00007ffda4c17f33 R15: 0000000000000000 The buggy address belongs to the physical page: page:ffffea000405a2c0 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10168b flags: 0x2fffff80000000(node=0|zone=2|lastcpupid=0x1fffff) raw: 002fffff80000000 ffffea0004057788 ffffea000402dbc8 0000000000000000 raw: 0000000000000000 0000000000170000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88810168af00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88810168af80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff88810168b000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff88810168b080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88810168b100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint 00000000: 58 44 44 33 5b 53 35 c2 00 00 00 00 00 00 00 78 XDD3[S5........x XFS (sdb): Internal error xfs_dir2_data_use_free at line 1200 of file fs/xfs/libxfs/xfs_dir2_data.c. Caller xfs_dir2_data_use_free+0x28a/0xeb0 CPU: 5 PID: 1552 Comm: touch Tainted: G B 6.0.0-rc3+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: dump_stack_lvl+0x4d/0x66 xfs_corruption_error+0x132/0x150 xfs_dir2_data_use_free+0x198/0xeb0 xfs_dir2_leaf_addname+0xa59/0x1ac0 xfs_dir_createname+0x58c/0x7f0 xfs_create+0x7af/0x1010 xfs_generic_create+0x270/0x5e0 path_openat+0x270b/0x3450 do_filp_open+0x1cf/0x2b0 do_sys_openat2+0x46b/0x7a0 do_sys_open+0xb7/0x130 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fe4d9e9312b Code: 25 00 00 41 00 3d 00 00 41 00 74 4b 64 8b 04 25 18 00 00 00 85 c0 75 67 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 91 00 00 00 48 8b 4c 24 28 64 48 33 0c 25 RSP: 002b:00007ffda4c16c20 EFLAGS: 00000246 ORIG_RAX: 0000000000000101 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fe4d9e9312b RDX: 0000000000000941 RSI: 00007ffda4c17f46 RDI: 00000000ffffff9c RBP: 00007ffda4c17f46 R08: 0000000000000000 R09: 0000000000000001 R10: 00000000000001b6 R11: 0000000000000246 R12: 0000000000000941 R13: 00007fe4d9f631a4 R14: 00007ffda4c17f46 R15: 0000000000000000 XFS (sdb): Corruption detected. Unmount and run xfs_repair [1] https://lore.kernel.org/all/20220928095355.2074025-1-guoxuenan@huawei.com/ Reviewed-by: Hou Tao Signed-off-by: Guo Xuenan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_dir2_leaf.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index d9b66306a9a77..cb9e950a911d8 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -146,6 +146,8 @@ xfs_dir3_leaf_check_int( xfs_dir2_leaf_tail_t *ltp; int stale; int i; + bool isleaf1 = (hdr->magic == XFS_DIR2_LEAF1_MAGIC || + hdr->magic == XFS_DIR3_LEAF1_MAGIC); ltp = xfs_dir2_leaf_tail_p(geo, leaf); @@ -158,8 +160,7 @@ xfs_dir3_leaf_check_int( return __this_address; /* Leaves and bests don't overlap in leaf format. */ - if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || - hdr->magic == XFS_DIR3_LEAF1_MAGIC) && + if (isleaf1 && (char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) return __this_address; @@ -175,6 +176,10 @@ xfs_dir3_leaf_check_int( } if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; + if (isleaf1 && xfs_dir2_dataptr_to_db(geo, + be32_to_cpu(hdr->ents[i].address)) >= + be32_to_cpu(ltp->bestcount)) + return __this_address; } if (hdr->stale != stale) return __this_address; From fc93812c725068e6a491ce574f058a4530130c00 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 18 Oct 2022 14:37:39 -0700 Subject: [PATCH 010/239] xfs: remove redundant pointer lip The assignment to pointer lip is not really required, the pointer lip is redundant and can be removed. Cleans up clang-scan warning: warning: Although the value stored to 'lip' is used in the enclosing expression, the value is never actually read from 'lip' [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_trans_ail.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 16fbf2a1144c1..f51df7d94ef74 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -730,11 +730,10 @@ void xfs_ail_push_all_sync( struct xfs_ail *ailp) { - struct xfs_log_item *lip; DEFINE_WAIT(wait); spin_lock(&ailp->ail_lock); - while ((lip = xfs_ail_max(ailp)) != NULL) { + while (xfs_ail_max(ailp) != NULL) { prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE); wake_up_process(ailp->ail_task); spin_unlock(&ailp->ail_lock); From cf4f4c12dea7a977a143c8fe5af1740b7f9876f8 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 18 Oct 2022 14:38:14 -0700 Subject: [PATCH 011/239] xfs: fix memory leak in xfs_errortag_init When `xfs_sysfs_init` returns failed, `mp->m_errortag` needs to free. Otherwise kmemleak would report memory leak after mounting xfs image: unreferenced object 0xffff888101364900 (size 192): comm "mount", pid 13099, jiffies 4294915218 (age 335.207s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000f08ad25c>] __kmalloc+0x41/0x1b0 [<00000000dca9aeb6>] kmem_alloc+0xfd/0x430 [<0000000040361882>] xfs_errortag_init+0x20/0x110 [<00000000b384a0f6>] xfs_mountfs+0x6ea/0x1a30 [<000000003774395d>] xfs_fs_fill_super+0xe10/0x1a80 [<000000009cf07b6c>] get_tree_bdev+0x3e7/0x700 [<00000000046b5426>] vfs_get_tree+0x8e/0x2e0 [<00000000952ec082>] path_mount+0xf8c/0x1990 [<00000000beb1f838>] do_mount+0xee/0x110 [<000000000e9c41bb>] __x64_sys_mount+0x14b/0x1f0 [<00000000f7bb938e>] do_syscall_64+0x3b/0x90 [<000000003fcd67a9>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: c68401011522 ("xfs: expose errortag knobs via sysfs") Signed-off-by: Zeng Heng Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_error.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 7db588ed0be59..c6b2aabd6f187 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -234,13 +234,18 @@ int xfs_errortag_init( struct xfs_mount *mp) { + int ret; + mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX, KM_MAYFAIL); if (!mp->m_errortag) return -ENOMEM; - return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, - &mp->m_kobj, "errortag"); + ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, + &mp->m_kobj, "errortag"); + if (ret) + kmem_free(mp->m_errortag); + return ret; } void From d08af40340cad0e025d643c3982781a8f99d5032 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Tue, 18 Oct 2022 14:38:29 -0700 Subject: [PATCH 012/239] xfs: Fix unreferenced object reported by kmemleak in xfs_sysfs_init() kmemleak reported a sequence of memory leaks, and one of them indicated we failed to free a pointer: comm "mount", pid 19610, jiffies 4297086464 (age 60.635s) hex dump (first 8 bytes): 73 64 61 00 81 88 ff ff sda..... backtrace: [<00000000d77f3e04>] kstrdup_const+0x46/0x70 [<00000000e51fa804>] kobject_set_name_vargs+0x2f/0xb0 [<00000000247cd595>] kobject_init_and_add+0xb0/0x120 [<00000000f9139aaf>] xfs_mountfs+0x367/0xfc0 [<00000000250d3caf>] xfs_fs_fill_super+0xa16/0xdc0 [<000000008d873d38>] get_tree_bdev+0x256/0x390 [<000000004881f3fa>] vfs_get_tree+0x41/0xf0 [<000000008291ab52>] path_mount+0x9b3/0xdd0 [<0000000022ba8f2d>] __x64_sys_mount+0x190/0x1d0 As mentioned in kobject_init_and_add() comment, if this function returns an error, kobject_put() must be called to properly clean up the memory associated with the object. Apparently, xfs_sysfs_init() does not follow such a requirement. When kobject_init_and_add() returns an error, the space of kobj->kobject.name alloced by kstrdup_const() is unfree, which will cause the above stack. Fix it by adding kobject_put() when kobject_init_and_add returns an error. Fixes: a31b1d3d89e4 ("xfs: add xfs_mount sysfs kobject") Signed-off-by: Li Zetao Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_sysfs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index 43585850f1546..513095e353a5b 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -33,10 +33,15 @@ xfs_sysfs_init( const char *name) { struct kobject *parent; + int err; parent = parent_kobj ? &parent_kobj->kobject : NULL; init_completion(&kobj->complete); - return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); + err = kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); + if (err) + kobject_put(&kobj->kobject); + + return err; } static inline void From cf00b33058b196b4db928419dde68993b15a975b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 15 Aug 2022 16:40:43 +0100 Subject: [PATCH 013/239] cxl/mbox: Add a check on input payload size A bug in the LSA code resulted in transfers slightly larger than the mailbox size. Let us make it easier to catch similar issues in future by adding a low level check. Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20220815154044.24733-2-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 16176b9278b4e..0c90f13870a43 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -174,7 +174,7 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, }; int rc; - if (out_size > cxlds->payload_size) + if (in_size > cxlds->payload_size || out_size > cxlds->payload_size) return -E2BIG; rc = cxlds->mbox_send(cxlds, &mbox_cmd); From 2816e24b0510e0c185c0c46acff1ce7aa4c4443f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 18 Aug 2022 17:42:10 +0100 Subject: [PATCH 014/239] cxl/region: Fix null pointer dereference due to pass through decoder commit Not all decoders have a commit callback. The CXL specification allows a host bridge with a single root port to have no explicit HDM decoders. Currently the region driver assumes there are none. As such the CXL core creates a special pass through decoder instance without a commit callback. Prior to this patch, the ->commit() callback was called unconditionally. Thus a configuration with 1 Host Bridge, 1 Root Port, 1 switch with multiple downstream ports below which there are multiple CXL type 3 devices results in a situation where committing the region causes a null pointer dereference. Reported-by: Bobo WL Fixes: 176baefb2eb5 ("cxl/hdm: Commit decoder state to hardware") Signed-off-by: Jonathan Cameron Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/20220818164210.2084-1-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 4011480169784..c49d9a5f1091c 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -174,7 +174,8 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr) iter = to_cxl_port(iter->dev.parent)) { cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - rc = cxld->commit(cxld); + if (cxld->commit) + rc = cxld->commit(cxld); if (rc) break; } From f010c75c05299ecd65adfd31a7841eea3476ce1f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 15 Aug 2022 16:40:44 +0100 Subject: [PATCH 015/239] cxl/pmem: Fix failure to account for 8 byte header for writes to the device LSA. Writes to the device must include an offset and size as defined in CXL 2.0 8.2.9.5.2.4 Set LSA (Opcode 4103h) Fixes tag is non obvious as this code has been through several reworks and variable names + wasn't in use until the addition of the region code. Due to a bug in QEMU CXL emulation this overrun resulted in QEMU crashing. Reported-by: Bobo WL Signed-off-by: Jonathan Cameron Fixes: 60b8f17215de ("cxl/pmem: Translate NVDIMM label commands to CXL label commands") Link: https://lore.kernel.org/r/20220815154044.24733-3-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 7dc0a2fa1a6b6..115a7b79f3439 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -107,7 +107,7 @@ static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds, *cmd = (struct nd_cmd_get_config_size) { .config_size = cxlds->lsa_size, - .max_xfer = cxlds->payload_size, + .max_xfer = cxlds->payload_size - sizeof(struct cxl_mbox_set_lsa), }; return 0; From 24f0692bfd41fd207d99c993a5785c3426762046 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 20 Oct 2022 16:54:55 -0700 Subject: [PATCH 016/239] ACPI: NUMA: Add CXL CFMWS 'nodes' to the possible nodes set The ACPI CEDT.CFMWS indicates a range of possible address where new CXL regions can appear. Each range is associated with a QTG id (QoS Throttling Group id). For each range + QTG pair that is not covered by a proximity domain in the SRAT, Linux creates a new NUMA node. However, the commit that added the new ranges missed updating the node_possible mask which causes memory_group_register() to fail. Add the new nodes to the nodes_possible mask. Cc: Fixes: fd49f99c1809 ("ACPI: NUMA: Add a node and memblk for each CFMWS not in SRAT") Cc: Alison Schofield Cc: Rafael J. Wysocki Reported-by: Vishal Verma Tested-by: Vishal Verma Acked-by: Rafael J. Wysocki Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166631003537.1167078.9373680312035292395.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/acpi/numa/srat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 3b818ab186be8..1f4fc5f8a819d 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -327,6 +327,7 @@ static int __init acpi_parse_cfmws(union acpi_subtable_headers *header, pr_warn("ACPI NUMA: Failed to add memblk for CFMWS node %d [mem %#llx-%#llx]\n", node, start, end); } + node_set(node, numa_nodes_parsed); /* Set the next available fake_pxm value */ (*fake_pxm)++; From f23f1a1e8437e38014fe34a2f12e37e861e5bcc7 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 21 Sep 2022 03:10:08 +0200 Subject: [PATCH 017/239] arm64: dts: imx8mm: Enable CPLD_Dn pull down resistor on MX8Menlo Enable CPLD_Dn pull down resistor instead of pull up to avoid intefering with CPLD power off functionality. Fixes: 510c527b4ff57 ("arm64: dts: imx8mm: Add i.MX8M Mini Toradex Verdin based Menlo board") Signed-off-by: Marek Vasut Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- .../arm64/boot/dts/freescale/imx8mm-mx8menlo.dts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts b/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts index 32f6f2f50c10c..43e89859c0445 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts @@ -250,21 +250,21 @@ /* SODIMM 96 */ MX8MM_IOMUXC_SAI1_RXD2_GPIO4_IO4 0x1c4 /* CPLD_D[7] */ - MX8MM_IOMUXC_SAI1_RXD3_GPIO4_IO5 0x1c4 + MX8MM_IOMUXC_SAI1_RXD3_GPIO4_IO5 0x184 /* CPLD_D[6] */ - MX8MM_IOMUXC_SAI1_RXFS_GPIO4_IO0 0x1c4 + MX8MM_IOMUXC_SAI1_RXFS_GPIO4_IO0 0x184 /* CPLD_D[5] */ - MX8MM_IOMUXC_SAI1_TXC_GPIO4_IO11 0x1c4 + MX8MM_IOMUXC_SAI1_TXC_GPIO4_IO11 0x184 /* CPLD_D[4] */ - MX8MM_IOMUXC_SAI1_TXD0_GPIO4_IO12 0x1c4 + MX8MM_IOMUXC_SAI1_TXD0_GPIO4_IO12 0x184 /* CPLD_D[3] */ - MX8MM_IOMUXC_SAI1_TXD1_GPIO4_IO13 0x1c4 + MX8MM_IOMUXC_SAI1_TXD1_GPIO4_IO13 0x184 /* CPLD_D[2] */ - MX8MM_IOMUXC_SAI1_TXD2_GPIO4_IO14 0x1c4 + MX8MM_IOMUXC_SAI1_TXD2_GPIO4_IO14 0x184 /* CPLD_D[1] */ - MX8MM_IOMUXC_SAI1_TXD3_GPIO4_IO15 0x1c4 + MX8MM_IOMUXC_SAI1_TXD3_GPIO4_IO15 0x184 /* CPLD_D[0] */ - MX8MM_IOMUXC_SAI1_TXD4_GPIO4_IO16 0x1c4 + MX8MM_IOMUXC_SAI1_TXD4_GPIO4_IO16 0x184 /* KBD_intK */ MX8MM_IOMUXC_SAI2_MCLK_GPIO4_IO27 0x1c4 /* DISP_reset */ From f4cd18c5b2000df0c382f6530eeca9141ea41faf Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Sat, 22 Oct 2022 08:23:52 -0700 Subject: [PATCH 018/239] efi/tpm: Pass correct address to memblock_reserve memblock_reserve() expects a physical address, but the address being passed for the TPM final events log is what was returned from early_memremap(). This results in something like the following: [ 0.000000] memblock_reserve: [0xffffffffff2c0000-0xffffffffff2c00e4] efi_tpm_eventlog_init+0x324/0x370 Pass the address from efi like what is done for the TPM events log. Fixes: c46f3405692d ("tpm: Reserve the TPM final events table") Cc: Matthew Garrett Cc: Jarkko Sakkinen Cc: Bartosz Szczepanek Cc: Ard Biesheuvel Signed-off-by: Jerry Snitselaar Acked-by: Jarkko Sakkinen Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index 8f665678e9e39..e8d69bd548f3f 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -97,7 +97,7 @@ int __init efi_tpm_eventlog_init(void) goto out_calc; } - memblock_reserve((unsigned long)final_tbl, + memblock_reserve(efi.tpm_final_log, tbl_size + sizeof(*final_tbl)); efi_tpm_final_log_size = tbl_size; From 161a438d730dade2ba2b1bf8785f0759aba4ca5f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Oct 2022 10:39:08 +0200 Subject: [PATCH 019/239] efi: random: reduce seed size to 32 bytes We no longer need at least 64 bytes of random seed to permit the early crng init to complete. The RNG is now based on Blake2s, so reduce the EFI seed size to the Blake2s hash size, which is sufficient for our purposes. While at it, drop the READ_ONCE(), which was supposed to prevent size from being evaluated after seed was unmapped. However, this cannot actually happen, so READ_ONCE() is unnecessary here. Cc: # v4.14+ Signed-off-by: Ard Biesheuvel Reviewed-by: Jason A. Donenfeld Acked-by: Ilias Apalodimas --- drivers/firmware/efi/efi.c | 2 +- include/linux/efi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 3ecdc43a3f2bb..a46df5d1d0942 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -611,7 +611,7 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables, seed = early_memremap(efi_rng_seed, sizeof(*seed)); if (seed != NULL) { - size = READ_ONCE(seed->size); + size = min(seed->size, EFI_RANDOM_SEED_SIZE); early_memunmap(seed, sizeof(*seed)); } else { pr_err("Could not map UEFI random seed!\n"); diff --git a/include/linux/efi.h b/include/linux/efi.h index 80f3c1c7827dd..929d559ad41d2 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1222,7 +1222,7 @@ efi_status_t efi_random_get_seed(void); arch_efi_call_virt_teardown(); \ }) -#define EFI_RANDOM_SEED_SIZE 64U +#define EFI_RANDOM_SEED_SIZE 32U // BLAKE2S_HASH_SIZE struct linux_efi_random_seed { u32 size; From 7d866e38c7e9ece8a096d0d098fa9d92b9d4f97e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Oct 2022 10:39:09 +0200 Subject: [PATCH 020/239] efi: random: Use 'ACPI reclaim' memory for random seed EFI runtime services data is guaranteed to be preserved by the OS, making it a suitable candidate for the EFI random seed table, which may be passed to kexec kernels as well (after refreshing the seed), and so we need to ensure that the memory is preserved without support from the OS itself. However, runtime services data is intended for allocations that are relevant to the implementations of the runtime services themselves, and so they are unmapped from the kernel linear map, and mapped into the EFI page tables that are active while runtime service invocations are in progress. None of this is needed for the RNG seed. So let's switch to EFI 'ACPI reclaim' memory: in spite of the name, there is nothing exclusively ACPI about it, it is simply a type of allocation that carries firmware provided data which may or may not be relevant to the OS, and it is left up to the OS to decide whether to reclaim it after having consumed its contents. Given that in Linux, we never reclaim these allocations, it is a good choice for the EFI RNG seed, as the allocation is guaranteed to survive kexec reboots. One additional reason for changing this now is to align it with the upcoming recommendation for EFI bootloader provided RNG seeds, which must not use EFI runtime services code/data allocations. Cc: # v4.14+ Signed-off-by: Ard Biesheuvel Reviewed-by: Ilias Apalodimas --- drivers/firmware/efi/libstub/random.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 24aa375353724..33ab567695951 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -75,7 +75,12 @@ efi_status_t efi_random_get_seed(void) if (status != EFI_SUCCESS) return status; - status = efi_bs_call(allocate_pool, EFI_RUNTIME_SERVICES_DATA, + /* + * Use EFI_ACPI_RECLAIM_MEMORY here so that it is guaranteed that the + * allocation will survive a kexec reboot (although we refresh the seed + * beforehand) + */ + status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, sizeof(*seed) + EFI_RANDOM_SEED_SIZE, (void **)&seed); if (status != EFI_SUCCESS) From 9440c42941606af4c379afa3cf8624f0dc43a629 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 18 Oct 2022 14:27:08 +0200 Subject: [PATCH 021/239] x86/syscall: Include asm/ptrace.h in syscall_wrapper header With just the forward declaration of the 'struct pt_regs' in syscall_wrapper.h, the syscall stub functions: __[x64|ia32]_sys_*(struct pt_regs *regs) will have different definition of 'regs' argument in BTF data based on which object file they are defined in. If the syscall's object includes 'struct pt_regs' definition, the BTF argument data will point to a 'struct pt_regs' record, like: [226] STRUCT 'pt_regs' size=168 vlen=21 'r15' type_id=1 bits_offset=0 'r14' type_id=1 bits_offset=64 'r13' type_id=1 bits_offset=128 ... If not, it will point to a fwd declaration record: [15439] FWD 'pt_regs' fwd_kind=struct and make bpf tracing program hooking on those functions unable to access fields from 'struct pt_regs'. Include asm/ptrace.h directly in syscall_wrapper.h to make sure all syscalls see 'struct pt_regs' definition. This then results in BTF for '__*_sys_*(struct pt_regs *regs)' functions to point to the actual struct, not just the forward declaration. [ bp: No Fixes tag as this is not really a bug fix but "adjustment" so that BTF is happy. ] Reported-by: Akihiro HARAI Signed-off-by: Jiri Olsa Signed-off-by: Borislav Petkov Acked-by: Andrii Nakryiko Cc: # this is needed only for BTF so kernels >= 5.15 Link: https://lore.kernel.org/r/20221018122708.823792-1-jolsa@kernel.org --- arch/x86/include/asm/syscall_wrapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 59358d1bf8800..fd2669b1cb2d9 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -6,7 +6,7 @@ #ifndef _ASM_X86_SYSCALL_WRAPPER_H #define _ASM_X86_SYSCALL_WRAPPER_H -struct pt_regs; +#include extern long __x64_sys_ni_syscall(const struct pt_regs *regs); extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); From a9003f74f5a2f487e101f3aa1dd5c3d3a78c6999 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 7 Oct 2022 15:10:01 +0200 Subject: [PATCH 022/239] clk: renesas: r8a779g0: Fix HSCIF parent clocks As serial communication requires a clean clock signal, the High Speed Serial Communication Interfaces with FIFO (HSCIF) is clocked by a clock that is not affected by Spread Spectrum or Fractional Multiplication. Hence change the parent clocks for the HSCIF modules from the S0D3_PER clock to the SASYNCPERD1 clock (which has the same clock rate), cfr. R-Car V4H Hardware User's Manual rev. 0.54. Fixes: 0ab55cf1834177a2 ("clk: renesas: cpg-mssr: Add support for R-Car V4H") Signed-off-by: Geert Uytterhoeven Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/b7928abc8b9f53d5b06ec8624342f449de3d24ec.1665147497.git.geert+renesas@glider.be --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index 3e8c93facfa14..d5b325e3c5398 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -158,10 +158,10 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("avb0", 211, R8A779G0_CLK_S0D4_HSC), DEF_MOD("avb1", 212, R8A779G0_CLK_S0D4_HSC), DEF_MOD("avb2", 213, R8A779G0_CLK_S0D4_HSC), - DEF_MOD("hscif0", 514, R8A779G0_CLK_S0D3_PER), - DEF_MOD("hscif1", 515, R8A779G0_CLK_S0D3_PER), - DEF_MOD("hscif2", 516, R8A779G0_CLK_S0D3_PER), - DEF_MOD("hscif3", 517, R8A779G0_CLK_S0D3_PER), + DEF_MOD("hscif0", 514, R8A779G0_CLK_SASYNCPERD1), + DEF_MOD("hscif1", 515, R8A779G0_CLK_SASYNCPERD1), + DEF_MOD("hscif2", 516, R8A779G0_CLK_SASYNCPERD1), + DEF_MOD("hscif3", 517, R8A779G0_CLK_SASYNCPERD1), DEF_MOD("i2c0", 518, R8A779G0_CLK_S0D6_PER), DEF_MOD("i2c1", 519, R8A779G0_CLK_S0D6_PER), DEF_MOD("i2c2", 520, R8A779G0_CLK_S0D6_PER), From e07ee6fe21f47cfd72ae566395c67a80e7c66163 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Tue, 25 Oct 2022 12:16:27 -0700 Subject: [PATCH 023/239] xfs: increase rename inode reservation xfs_rename can update up to 5 inodes: src_dp, target_dp, src_ip, target_ip and wip. So we need to increase the inode reservation to match. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_trans_resv.c | 4 ++-- fs/xfs/xfs_inode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 2c4ad6e4bb149..5b2f27cbdb808 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -422,7 +422,7 @@ xfs_calc_itruncate_reservation_minlogsize( /* * In renaming a files we can modify: - * the four inodes involved: 4 * inode size + * the five inodes involved: 5 * inode size * the two directory btrees: 2 * (max depth + v2) * dir block size * the two directory bmap btrees: 2 * max depth * block size * And the bmap_finish transaction can free dir and bmap blocks (two sets @@ -437,7 +437,7 @@ xfs_calc_rename_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - max((xfs_calc_inode_res(mp, 4) + + max((xfs_calc_inode_res(mp, 5) + xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c000b74dd2035..aa303be11576f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2818,7 +2818,7 @@ xfs_rename( * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source - * directory, we can lock from 2 to 4 inodes. + * directory, we can lock from 2 to 5 inodes. */ xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); From 5c1df62ca65936139a4b008561110d9fc285c58a Mon Sep 17 00:00:00 2001 From: Stefan Hansson Date: Mon, 17 Oct 2022 17:01:14 +0200 Subject: [PATCH 024/239] kbuild: use POSIX-compatible grep option --file is a GNU extension to grep which is not available in all implementations (such as BusyBox). Use the -f option instead which is eqvuialent according to the GNU grep manpage[1] and is present in POSIX[2]. [1] https://www.gnu.org/software/grep/manual/grep.html [2] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html Signed-off-by: Stefan Hansson Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d148a55bfd0f5..e90bb2b38607a 100644 --- a/Makefile +++ b/Makefile @@ -1218,7 +1218,7 @@ quiet_cmd_ar_vmlinux.a = AR $@ cmd_ar_vmlinux.a = \ rm -f $@; \ $(AR) cDPrST $@ $(KBUILD_VMLINUX_OBJS); \ - $(AR) mPiT $$($(AR) t $@ | head -n1) $@ $$($(AR) t $@ | grep -F --file=$(srctree)/scripts/head-object-list.txt) + $(AR) mPiT $$($(AR) t $@ | head -n1) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) targets += vmlinux.a vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt autoksyms_recursive FORCE From 114ff6fe6cfbe81659f9e517d0b25f53db5dfc5d Mon Sep 17 00:00:00 2001 From: Dan Li Date: Thu, 20 Oct 2022 03:38:23 -0700 Subject: [PATCH 025/239] Documentation: kbuild: Add description of git for reproducible builds The status of git will affect the final compilation result, add it to the documentation of reproducible builds. Signed-off-by: Dan Li Signed-off-by: Masahiro Yamada --- Documentation/kbuild/reproducible-builds.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst index 071f0151a7a4e..f2dcc39044e66 100644 --- a/Documentation/kbuild/reproducible-builds.rst +++ b/Documentation/kbuild/reproducible-builds.rst @@ -119,6 +119,16 @@ To avoid this, you can make the vDSO different for different kernel versions by including an arbitrary string of "salt" in it. This is specified by the Kconfig symbol ``CONFIG_BUILD_SALT``. +Git +--- + +Uncommitted changes or different commit ids in git can also lead +to different compilation results. For example, after executing +``git reset HEAD^``, even if the code is the same, the +``include/config/kernel.release`` generated during compilation +will be different, which will eventually lead to binary differences. +See ``scripts/setlocalversion`` for details. + .. _KBUILD_BUILD_TIMESTAMP: kbuild.html#kbuild-build-timestamp .. _KBUILD_BUILD_USER and KBUILD_BUILD_HOST: kbuild.html#kbuild-build-user-kbuild-build-host .. _KCFLAGS: kbuild.html#kcflags From 3b1e0dd2dc8a280b1e89c0df6d38cd28768575a5 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Tue, 25 Oct 2022 13:17:44 -0700 Subject: [PATCH 026/239] kbuild: fix typo in modpost Commit f73edc8951b2 ("kbuild: unify two modpost invocations") introduced a typo (moudle.symvers-if-present) which results in the kernel's Module.symvers to not be included as a prerequisite for $(KBUILD_EXTMOD)/Module.symvers. Fix the typo to restore the intended functionality. Fixes: f73edc8951b2 ("kbuild: unify two modpost invocations") Signed-off-by: Will McVicker Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 8489a3402eb8c..e41dee64d429c 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -122,7 +122,7 @@ quiet_cmd_modpost = MODPOST $@ sed 's/ko$$/o/' $(or $(modorder-if-needed), /dev/null) | $(MODPOST) $(modpost-args) -T - $(vmlinux.o-if-present) targets += $(output-symdump) -$(output-symdump): $(modorder-if-needed) $(vmlinux.o-if-present) $(moudle.symvers-if-present) $(MODPOST) FORCE +$(output-symdump): $(modorder-if-needed) $(vmlinux.o-if-present) $(module.symvers-if-present) $(MODPOST) FORCE $(call if_changed,modpost) __modpost: $(output-symdump) From 08a32902a56e1670850fe5d518d8203e9ce354b8 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 18 Oct 2022 15:52:56 +0200 Subject: [PATCH 027/239] clk: Remove WARN_ON NULL parent in clk_core_init_rate_req() If a clock has CLK_SET_RATE_PARENT, but core->parent is NULL (most likely because it's orphan), callers of clk_core_init_rate_req() will blindly call this function leading to a very verbose warning. Since it's a fairly common situation, let's just remove the WARN_ON but keep the check that prevents us from dereferencing the pointer. Interestingly, it fixes a regression on the Mediatek MT8195 where the GPU would stall during a clk_set_rate for its main clock. We couldn't come up with a proper explanation since the condition is essentially the same. It was then assumed that it could be timing related since printing the warning stacktrace takes a while, but we couldn't replicate the failure by using fairly large (10ms) mdelays. Fixes: 262ca38f4b6e ("clk: Stop forwarding clk_rate_requests to the parent") Reported-by: AngeloGioacchino Del Regno Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20221018-clk-range-checks-fixes-v1-1-f3ef80518140@cerno.tech Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index c3c3f8c072588..37d623c7b73b7 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1459,7 +1459,7 @@ static void clk_core_init_rate_req(struct clk_core * const core, { struct clk_core *parent; - if (WARN_ON(!core || !req)) + if (!core || WARN_ON(!req)) return; memset(req, 0, sizeof(*req)); From 2079d029387adfc0cc123f01a6fcf9eb6540ee4d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 18 Oct 2022 15:52:57 +0200 Subject: [PATCH 028/239] clk: Initialize the clk_rate_request even if clk_core is NULL Since commit c35e84b09776 ("clk: Introduce clk_hw_init_rate_request()"), users that used to initialize their clk_rate_request by initializing their local structure now rely on clk_hw_init_rate_request(). This function is backed by clk_core_init_rate_req(), which will skip the initialization if either the pointer to struct clk_core or to struct clk_rate_request are NULL. However, the core->parent pointer might be NULL because the clock is orphan, and we will thus end up with our local struct clk_rate_request left untouched. And since clk_hw_init_rate_request() doesn't return an error, we will then call a determine_rate variant with that unitialized structure. In order to avoid this, let's clear our clk_rate_request if the pointer to it is valid but the pointer to struct clk_core isn't. Fixes: c35e84b09776 ("clk: Introduce clk_hw_init_rate_request()") Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20221018-clk-range-checks-fixes-v1-2-f3ef80518140@cerno.tech Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 37d623c7b73b7..eb2f9be9b9aa5 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1459,11 +1459,14 @@ static void clk_core_init_rate_req(struct clk_core * const core, { struct clk_core *parent; - if (!core || WARN_ON(!req)) + if (WARN_ON(!req)) return; memset(req, 0, sizeof(*req)); + if (!core) + return; + req->rate = rate; clk_core_get_boundaries(core, &req->min_rate, &req->max_rate); From 774560cf28fe115f106b6176c54ec641491136ac Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 18 Oct 2022 15:52:58 +0200 Subject: [PATCH 029/239] clk: Initialize max_rate in struct clk_rate_request Since commit b46fd8dbe8ad ("clk: Zero the clk_rate_request structure"), the clk_core_init_rate_req() function clears the struct clk_rate_request passed as argument. However, the default value for max_rate isn't 0 but ULONG_MAX, and we end up creating a clk_rate_request instance where the maximum rate is 0. Let's initialize max_rate to ULONG_MAX properly. Fixes: b46fd8dbe8ad ("clk: Zero the clk_rate_request structure") Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20221018-clk-range-checks-fixes-v1-3-f3ef80518140@cerno.tech Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index eb2f9be9b9aa5..57b83665e5c3a 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1463,6 +1463,7 @@ static void clk_core_init_rate_req(struct clk_core * const core, return; memset(req, 0, sizeof(*req)); + req->max_rate = ULONG_MAX; if (!core) return; From ffa20aa581cf5377fc397b0d0ff9d67ea823629b Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Wed, 19 Oct 2022 11:35:35 +0530 Subject: [PATCH 030/239] clk: qcom: Update the force mem core bit for GPU clocks There are few GPU clocks which are powering up the memories and thus enable the FORCE_MEM_PERIPH always for these clocks to force the periph_on signal to remain active during halt state of the clock. Fixes: a3cc092196ef ("clk: qcom: Add Global Clock controller (GCC) driver for SC7280") Fixes: 3e0f01d6c7e7 ("clk: qcom: Add graphics clock controller driver for SC7280") Signed-off-by: Taniya Das Signed-off-by: Satya Priya Link: https://lore.kernel.org/r/1666159535-6447-1-git-send-email-quic_c_skakit@quicinc.com Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sc7280.c | 1 + drivers/clk/qcom/gpucc-sc7280.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/clk/qcom/gcc-sc7280.c b/drivers/clk/qcom/gcc-sc7280.c index 8afb7575e712f..46d41ebce2b08 100644 --- a/drivers/clk/qcom/gcc-sc7280.c +++ b/drivers/clk/qcom/gcc-sc7280.c @@ -3467,6 +3467,7 @@ static int gcc_sc7280_probe(struct platform_device *pdev) regmap_update_bits(regmap, 0x28004, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x28014, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x7100C, BIT(13), BIT(13)); ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks, ARRAY_SIZE(gcc_dfs_clocks)); diff --git a/drivers/clk/qcom/gpucc-sc7280.c b/drivers/clk/qcom/gpucc-sc7280.c index 9a832f2bcf491..1490cd45a654a 100644 --- a/drivers/clk/qcom/gpucc-sc7280.c +++ b/drivers/clk/qcom/gpucc-sc7280.c @@ -463,6 +463,7 @@ static int gpu_cc_sc7280_probe(struct platform_device *pdev) */ regmap_update_bits(regmap, 0x1170, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x1098, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x1098, BIT(13), BIT(13)); return qcom_cc_really_probe(pdev, &gpu_cc_sc7280_desc, regmap); } From 9a8c5b0d061554fedd7dbe894e63aa34d0bac7c4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 27 Oct 2022 16:04:36 -0400 Subject: [PATCH 031/239] ext4: update the backup superblock's at the end of the online resize When expanding a file system using online resize, various fields in the superblock (e.g., s_blocks_count, s_inodes_count, etc.) change. To update the backup superblocks, the online resize uses the function update_backups() in fs/ext4/resize.c. This function was not updating the checksum field in the backup superblocks. This wasn't a big deal previously, because e2fsck didn't care about the checksum field in the backup superblock. (And indeed, update_backups() goes all the way back to the ext3 days, well before we had support for metadata checksums.) However, there is an alternate, more general way of updating superblock fields, ext4_update_primary_sb() in fs/ext4/ioctl.c. This function does check the checksum of the backup superblock, and if it doesn't match will mark the file system as corrupted. That was clearly not the intent, so avoid to aborting the resize when a bad superblock is found. In addition, teach update_backups() to properly update the checksum in the backup superblocks. We will eventually want to unify updapte_backups() with the infrasture in ext4_update_primary_sb(), but that's for another day. Note: The problem has been around for a while; it just didn't really matter until ext4_update_primary_sb() was added by commit bbc605cdb1e1 ("ext4: implement support for get/set fs label"). And it became trivially easy to reproduce after commit 827891a38acc ("ext4: update the s_overhead_clusters in the backup sb's when resizing") in v6.0. Cc: stable@kernel.org # 5.17+ Fixes: bbc605cdb1e1 ("ext4: implement support for get/set fs label") Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 3 +-- fs/ext4/resize.c | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 4d49c5cfb690f..790d5ffe85596 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -145,9 +145,8 @@ static int ext4_update_backup_sb(struct super_block *sb, if (ext4_has_metadata_csum(sb) && es->s_checksum != ext4_superblock_csum(sb, es)) { ext4_msg(sb, KERN_ERR, "Invalid checksum for backup " - "superblock %llu\n", sb_block); + "superblock %llu", sb_block); unlock_buffer(bh); - err = -EFSBADCRC; goto out_bh; } func(es, arg); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6dfe9ccae0c50..46b87ffeb3045 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1158,6 +1158,7 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, while (group < sbi->s_groups_count) { struct buffer_head *bh; ext4_fsblk_t backup_block; + struct ext4_super_block *es; /* Out of journal space, and can't get more - abort - so sad */ err = ext4_resize_ensure_credits_batch(handle, 1); @@ -1186,6 +1187,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, memcpy(bh->b_data, data, size); if (rest) memset(bh->b_data + size, 0, rest); + es = (struct ext4_super_block *) bh->b_data; + es->s_block_group_nr = cpu_to_le16(group); + if (ext4_has_metadata_csum(sb)) + es->s_checksum = ext4_superblock_csum(sb, es); set_buffer_uptodate(bh); unlock_buffer(bh); err = ext4_handle_dirty_metadata(handle, NULL, bh); From fb3041d61f6867158088c627c2790f94e208d1ea Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 28 Oct 2022 01:28:39 +0900 Subject: [PATCH 032/239] kbuild: fix SIGPIPE error message for AR=gcc-ar and AR=llvm-ar Jiri Slaby reported that building the kernel with AR=gcc-ar shows: /usr/bin/ar terminated with signal 13 [Broken pipe] Nathan Chancellor reported the latest AR=llvm-ar shows: error: write on a pipe with no reader The latter occurs since LLVM commit 51b557adc131 ("Add an error message to the default SIGPIPE handler"). The resulting vmlinux is correct, but it is better to silence it. 'head -n1' exits after reading the first line, so the pipe is closed. Use 'sed -n 1p' to eat the stream till the end. Fixes: 321648455061 ("kbuild: use obj-y instead extra-y for objects placed at the head") Link: https://github.com/ClangBuiltLinux/linux/issues/1651 Reported-by: Jiri Slaby Reported-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Tested-by: Nathan Chancellor --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e90bb2b38607a..e9e7eff906a5e 100644 --- a/Makefile +++ b/Makefile @@ -1218,7 +1218,7 @@ quiet_cmd_ar_vmlinux.a = AR $@ cmd_ar_vmlinux.a = \ rm -f $@; \ $(AR) cDPrST $@ $(KBUILD_VMLINUX_OBJS); \ - $(AR) mPiT $$($(AR) t $@ | head -n1) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) + $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) targets += vmlinux.a vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt autoksyms_recursive FORCE From 633efc8b3dc96f56f5a57f2a49764853a2fa3f50 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Wed, 26 Oct 2022 10:03:21 +0800 Subject: [PATCH 033/239] net: dsa: Fix possible memory leaks in dsa_loop_init() kmemleak reported memory leaks in dsa_loop_init(): kmemleak: 12 new suspected memory leaks unreferenced object 0xffff8880138ce000 (size 2048): comm "modprobe", pid 390, jiffies 4295040478 (age 238.976s) backtrace: [<000000006a94f1d5>] kmalloc_trace+0x26/0x60 [<00000000a9c44622>] phy_device_create+0x5d/0x970 [<00000000d0ee2afc>] get_phy_device+0xf3/0x2b0 [<00000000dca0c71f>] __fixed_phy_register.part.0+0x92/0x4e0 [<000000008a834798>] fixed_phy_register+0x84/0xb0 [<0000000055223fcb>] dsa_loop_init+0xa9/0x116 [dsa_loop] ... There are two reasons for memleak in dsa_loop_init(). First, fixed_phy_register() create and register phy_device: fixed_phy_register() get_phy_device() phy_device_create() # freed by phy_device_free() phy_device_register() # freed by phy_device_remove() But fixed_phy_unregister() only calls phy_device_remove(). So the memory allocated in phy_device_create() is leaked. Second, when mdio_driver_register() fail in dsa_loop_init(), it just returns and there is no cleanup for phydevs. Fix the problems by catching the error of mdio_driver_register() in dsa_loop_init(), then calling both fixed_phy_unregister() and phy_device_free() to release phydevs. Also add a function for phydevs cleanup to avoid duplacate. Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver") Signed-off-by: Chen Zhongjin Signed-off-by: David S. Miller --- drivers/net/dsa/dsa_loop.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index b9107fe400231..5b139f2206b6e 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -376,6 +376,17 @@ static struct mdio_driver dsa_loop_drv = { #define NUM_FIXED_PHYS (DSA_LOOP_NUM_PORTS - 2) +static void dsa_loop_phydevs_unregister(void) +{ + unsigned int i; + + for (i = 0; i < NUM_FIXED_PHYS; i++) + if (!IS_ERR(phydevs[i])) { + fixed_phy_unregister(phydevs[i]); + phy_device_free(phydevs[i]); + } +} + static int __init dsa_loop_init(void) { struct fixed_phy_status status = { @@ -383,23 +394,23 @@ static int __init dsa_loop_init(void) .speed = SPEED_100, .duplex = DUPLEX_FULL, }; - unsigned int i; + unsigned int i, ret; for (i = 0; i < NUM_FIXED_PHYS; i++) phydevs[i] = fixed_phy_register(PHY_POLL, &status, NULL); - return mdio_driver_register(&dsa_loop_drv); + ret = mdio_driver_register(&dsa_loop_drv); + if (ret) + dsa_loop_phydevs_unregister(); + + return ret; } module_init(dsa_loop_init); static void __exit dsa_loop_exit(void) { - unsigned int i; - mdio_driver_unregister(&dsa_loop_drv); - for (i = 0; i < NUM_FIXED_PHYS; i++) - if (!IS_ERR(phydevs[i])) - fixed_phy_unregister(phydevs[i]); + dsa_loop_phydevs_unregister(); } module_exit(dsa_loop_exit); From 8fdf3f6aba7cfa0c0e2bf66ecca7bb5783acd0d6 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Wed, 26 Oct 2022 20:45:24 +0530 Subject: [PATCH 034/239] net: emaclite: update reset_lock member documentation Instead of generic description, mention what reset_lock actually protects i.e. lock to serialize xmit and tx_timeout execution. Signed-off-by: Radhey Shyam Pandey Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 05848ff15fb51..a3967f8de417d 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -108,7 +108,7 @@ * @next_tx_buf_to_use: next Tx buffer to write to * @next_rx_buf_to_use: next Rx buffer to read from * @base_addr: base address of the Emaclite device - * @reset_lock: lock used for synchronization + * @reset_lock: lock to serialize xmit and tx_timeout execution * @deferred_skb: holds an skb (for transmission at a later time) when the * Tx buffer is not free * @phy_dev: pointer to the PHY device From 4a6f278d4827b59ba26ceae0ff4529ee826aa258 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Oct 2022 14:25:20 +0200 Subject: [PATCH 035/239] fuse: add file_modified() to fallocate Add missing file_modified() call to fuse_file_fallocate(). Without this fallocate on fuse failed to clear privileges. Fixes: 05ba1f082300 ("fuse: add FALLOCATE operation") Cc: Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1a3afd469e3a9..71bfb663aac58 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3001,6 +3001,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, goto out; } + err = file_modified(file); + if (err) + goto out; + if (!(mode & FALLOC_FL_KEEP_SIZE)) set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); From 2124becad797245d49252d2d733aee0322233d7e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 14 Oct 2022 07:11:36 -0500 Subject: [PATCH 036/239] ACPI: x86: Add another system to quirk list for forcing StorageD3Enable commit 018d6711c26e4 ("ACPI: x86: Add a quirk for Dell Inspiron 14 2-in-1 for StorageD3Enable") introduced a quirk to allow a system with ambiguous use of _ADR 0 to force StorageD3Enable. Julius Brockmann reports that Inspiron 16 5625 suffers that same symptoms. Add this other system to the list as well. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216440 Reported-and-tested-by: Julius Brockmann Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/utils.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index f8a2cbdc0ce2b..d7d3f1669d4c0 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -219,6 +219,12 @@ static const struct dmi_system_id force_storage_d3_dmi[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 14 7425 2-in-1"), } }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 16 5625"), + } + }, {} }; From f11a74b45d330ad1ab986852b099747161052526 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Oct 2022 15:52:31 +0200 Subject: [PATCH 037/239] efi: efivars: Fix variable writes with unsupported query_variable_store() Commit 8a254d90a775 ("efi: efivars: Fix variable writes without query_variable_store()") addressed an issue that was introduced during the EFI variable store refactor, where alternative implementations of the efivars layer that lacked query_variable_store() would no longer work. Unfortunately, there is another case to consider here, which was missed: if the efivars layer is backed by the EFI runtime services as usual, but the EFI implementation predates the introduction of QueryVariableInfo(), we will return EFI_UNSUPPORTED, and this is no longer being dealt with correctly. So let's fix this, and while at it, clean up the code a bit, by merging the check_var_size() routines as well as their callers. Cc: # v6.0 Fixes: bbc6d2c6ef22 ("efi: vars: Switch to new wrapper layer") Signed-off-by: Ard Biesheuvel Tested-by: Aditya Garg --- drivers/firmware/efi/vars.c | 68 +++++++++++-------------------------- 1 file changed, 20 insertions(+), 48 deletions(-) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 433b615871395..0ba9f18312f5b 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -21,29 +21,22 @@ static struct efivars *__efivars; static DEFINE_SEMAPHORE(efivars_lock); -static efi_status_t check_var_size(u32 attributes, unsigned long size) -{ - const struct efivar_operations *fops; - - fops = __efivars->ops; - - if (!fops->query_variable_store) - return (size <= SZ_64K) ? EFI_SUCCESS : EFI_OUT_OF_RESOURCES; - - return fops->query_variable_store(attributes, size, false); -} - -static -efi_status_t check_var_size_nonblocking(u32 attributes, unsigned long size) +static efi_status_t check_var_size(bool nonblocking, u32 attributes, + unsigned long size) { const struct efivar_operations *fops; + efi_status_t status; fops = __efivars->ops; if (!fops->query_variable_store) + status = EFI_UNSUPPORTED; + else + status = fops->query_variable_store(attributes, size, + nonblocking); + if (status == EFI_UNSUPPORTED) return (size <= SZ_64K) ? EFI_SUCCESS : EFI_OUT_OF_RESOURCES; - - return fops->query_variable_store(attributes, size, true); + return status; } /** @@ -195,26 +188,6 @@ efi_status_t efivar_get_next_variable(unsigned long *name_size, } EXPORT_SYMBOL_NS_GPL(efivar_get_next_variable, EFIVAR); -/* - * efivar_set_variable_blocking() - local helper function for set_variable - * - * Must be called with efivars_lock held. - */ -static efi_status_t -efivar_set_variable_blocking(efi_char16_t *name, efi_guid_t *vendor, - u32 attr, unsigned long data_size, void *data) -{ - efi_status_t status; - - if (data_size > 0) { - status = check_var_size(attr, data_size + - ucs2_strsize(name, 1024)); - if (status != EFI_SUCCESS) - return status; - } - return __efivars->ops->set_variable(name, vendor, attr, data_size, data); -} - /* * efivar_set_variable_locked() - set a variable identified by name/vendor * @@ -228,23 +201,21 @@ efi_status_t efivar_set_variable_locked(efi_char16_t *name, efi_guid_t *vendor, efi_set_variable_t *setvar; efi_status_t status; - if (!nonblocking) - return efivar_set_variable_blocking(name, vendor, attr, - data_size, data); + if (data_size > 0) { + status = check_var_size(nonblocking, attr, + data_size + ucs2_strsize(name, 1024)); + if (status != EFI_SUCCESS) + return status; + } /* * If no _nonblocking variant exists, the ordinary one * is assumed to be non-blocking. */ - setvar = __efivars->ops->set_variable_nonblocking ?: - __efivars->ops->set_variable; + setvar = __efivars->ops->set_variable_nonblocking; + if (!setvar || !nonblocking) + setvar = __efivars->ops->set_variable; - if (data_size > 0) { - status = check_var_size_nonblocking(attr, data_size + - ucs2_strsize(name, 1024)); - if (status != EFI_SUCCESS) - return status; - } return setvar(name, vendor, attr, data_size, data); } EXPORT_SYMBOL_NS_GPL(efivar_set_variable_locked, EFIVAR); @@ -264,7 +235,8 @@ efi_status_t efivar_set_variable(efi_char16_t *name, efi_guid_t *vendor, if (efivar_lock()) return EFI_ABORTED; - status = efivar_set_variable_blocking(name, vendor, attr, data_size, data); + status = efivar_set_variable_locked(name, vendor, attr, data_size, + data, false); efivar_unlock(); return status; } From 6f7630b1b5bc672b54c1285ee6aba752b446672c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 28 Oct 2022 15:32:07 -0700 Subject: [PATCH 038/239] fortify: Capture __bos() results in const temp vars In two recent run-time memcpy() bound checking bug reports (NFS[1] and JFS[2]), the _detection_ was working correctly (in the sense that the requested copy size was larger than the destination field size), but the _warning text_ was showing the destination field size as SIZE_MAX ("unknown size"). This should be impossible, since the detection function will explicitly give up if the destination field size is unknown. For example, the JFS warning was: memcpy: detected field-spanning write (size 132) of single field "ip->i_link" at fs/jfs/namei.c:950 (size 18446744073709551615) Other cases of this warning (e.g.[3]) have reported correctly, and the reproducer only happens under GCC (at least 10.2 and 12.1), so this currently appears to be a GCC bug. Explicitly capturing the __builtin_object_size() results in const temporary variables fixes the report. For example, the JFS reproducer now correctly reports the field size (128): memcpy: detected field-spanning write (size 132) of single field "ip->i_link" at fs/jfs/namei.c:950 (size 128) Examination of the .text delta (which is otherwise identical), shows the literal value used in the report changing: - mov $0xffffffffffffffff,%rcx + mov $0x80,%ecx [1] https://lore.kernel.org/lkml/Y0zEzZwhOxTDcBTB@codemonkey.org.uk/ [2] https://syzkaller.appspot.com/bug?id=23d613df5259b977dac1696bec77f61a85890e3d [3] https://lore.kernel.org/all/202210110948.26b43120-yujie.liu@intel.com/ Cc: "Dr. David Alan Gilbert" Cc: llvm@lists.linux.dev Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 4029fe368a4f6..0f00a551939a4 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -441,13 +441,18 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, #define __fortify_memcpy_chk(p, q, size, p_size, q_size, \ p_size_field, q_size_field, op) ({ \ - size_t __fortify_size = (size_t)(size); \ - WARN_ONCE(fortify_memcpy_chk(__fortify_size, p_size, q_size, \ - p_size_field, q_size_field, #op), \ + const size_t __fortify_size = (size_t)(size); \ + const size_t __p_size = (p_size); \ + const size_t __q_size = (q_size); \ + const size_t __p_size_field = (p_size_field); \ + const size_t __q_size_field = (q_size_field); \ + WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size, \ + __q_size, __p_size_field, \ + __q_size_field, #op), \ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ __fortify_size, \ "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \ - p_size_field); \ + __p_size_field); \ __underlying_##op(p, q, __fortify_size); \ }) From 7354c9024f2835f6122ed9612e21ab379df050f9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Oct 2022 14:21:07 -0700 Subject: [PATCH 039/239] netlink: hide validation union fields from kdoc Mark the validation fields as private, users shouldn't set them directly and they are too complicated to explain in a more succinct way (there's already a long explanation in the comment above). The strict_start_type field is set directly and has a dedicated comment so move that above the "private" section. Link: https://lore.kernel.org/r/20221027212107.2639255-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index 4418b1981e318..7db13b3261fc2 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -317,19 +317,10 @@ struct nla_policy { u8 validation_type; u16 len; union { - const u32 bitfield32_valid; - const u32 mask; - const char *reject_message; - const struct nla_policy *nested_policy; - struct netlink_range_validation *range; - struct netlink_range_validation_signed *range_signed; - struct { - s16 min, max; - u8 network_byte_order:1; - }; - int (*validate)(const struct nlattr *attr, - struct netlink_ext_ack *extack); - /* This entry is special, and used for the attribute at index 0 + /** + * @strict_start_type: first attribute to validate strictly + * + * This entry is special, and used for the attribute at index 0 * only, and specifies special data about the policy, namely it * specifies the "boundary type" where strict length validation * starts for any attribute types >= this value, also, strict @@ -348,6 +339,20 @@ struct nla_policy { * was added to enforce strict validation from thereon. */ u16 strict_start_type; + + /* private: use NLA_POLICY_*() to set */ + const u32 bitfield32_valid; + const u32 mask; + const char *reject_message; + const struct nla_policy *nested_policy; + struct netlink_range_validation *range; + struct netlink_range_validation_signed *range_signed; + struct { + s16 min, max; + u8 network_byte_order:1; + }; + int (*validate)(const struct nlattr *attr, + struct netlink_ext_ack *extack); }; }; From e4ba4554209f626c52e2e57f26cba49a62663c8b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Oct 2022 20:25:01 -0700 Subject: [PATCH 040/239] net: openvswitch: add missing .resv_start_op I missed one of the families in OvS when annotating .resv_start_op. This triggers the warning added in commit ce48ebdd5651 ("genetlink: limit the use of validation workarounds to old ops"). Reported-by: syzbot+40eb8c0447c0e47a7e9b@syzkaller.appspotmail.com Fixes: 9c5d03d36251 ("genetlink: start to validate reserved header bytes") Link: https://lore.kernel.org/r/20221028032501.2724270-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 155263e735124..8b84869eb2ac7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2544,6 +2544,7 @@ struct genl_family dp_vport_genl_family __ro_after_init = { .parallel_ops = true, .small_ops = dp_vport_genl_ops, .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops), + .resv_start_op = OVS_VPORT_CMD_SET + 1, .mcgrps = &ovs_dp_vport_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, From 1208b93dd901bafe2526fa9db005bbc30e7ae83a Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Thu, 27 Oct 2022 21:21:59 -0700 Subject: [PATCH 041/239] enic: MAINTAINERS: Update enic maintainers Update enic maintainers. Signed-off-by: Govindarajulu Varadarajan Link: https://lore.kernel.org/r/20221028042159.735670-1-govind.varadar@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 10c1344b44730..9e437612dd816 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5038,7 +5038,7 @@ F: drivers/scsi/snic/ CISCO VIC ETHERNET NIC DRIVER M: Christian Benvenuti -M: Govindarajulu Varadarajan <_govind@gmx.com> +M: Satish Kharat S: Supported F: drivers/net/ethernet/cisco/enic/ From 8f279fb00bb29def9ac79e28c5d6d8e07d21f3fb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Oct 2022 00:25:56 +0100 Subject: [PATCH 042/239] udp: advertise ipv6 udp support for msghdr::ubuf_info Mark udp ipv6 as supporting msghdr::ubuf_info. In the original commit SOCK_SUPPORT_ZC was supposed to be set by a udp_init_sock() call from udp6_init_sock(), but d38afeec26ed4 ("tcp/udp: Call inet6_destroy_sock() in IPv6 ...") removed it and so ipv6 udp misses the flag. Cc: # 6.0 Fixes: e993ffe3da4bc ("net: flag sockets supporting msghdr originated zerocopy") Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv6/udp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 129ec5a9b0eb7..bc65e5b7195b3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -66,6 +66,7 @@ int udpv6_init_sock(struct sock *sk) { skb_queue_head_init(&udp_sk(sk)->reader_queue); sk->sk_destruct = udpv6_destruct_sock; + set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); return 0; } From fee9ac06647e59a69fb7aec58f25267c134264b4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Oct 2022 00:25:57 +0100 Subject: [PATCH 043/239] net: remove SOCK_SUPPORT_ZC from sockmap sockmap replaces ->sk_prot with its own callbacks, we should remove SOCK_SUPPORT_ZC as the new proto doesn't support msghdr::ubuf_info. Cc: # 6.0 Reported-by: Jakub Kicinski Fixes: e993ffe3da4bc ("net: flag sockets supporting msghdr originated zerocopy") Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- include/net/sock.h | 7 +++++++ net/ipv4/tcp_bpf.c | 4 ++-- net/ipv4/udp_bpf.c | 4 ++-- net/unix/unix_bpf.c | 8 ++++---- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 22f8bab583ddd..5db02546941cf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1889,6 +1889,13 @@ void sock_kfree_s(struct sock *sk, void *mem, int size); void sock_kzfree_s(struct sock *sk, void *mem, int size); void sk_send_sigurg(struct sock *sk); +static inline void sock_replace_proto(struct sock *sk, struct proto *proto) +{ + if (sk->sk_socket) + clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + WRITE_ONCE(sk->sk_prot, proto); +} + struct sockcm_cookie { u64 transmit_time; u32 mark; diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index a1626afe87a10..c501c329b1dbe 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -607,7 +607,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) } else { sk->sk_write_space = psock->saved_write_space; /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); } return 0; } @@ -620,7 +620,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) } /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); + sock_replace_proto(sk, &tcp_bpf_prots[family][config]); return 0; } EXPORT_SYMBOL_GPL(tcp_bpf_update_proto); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index ff15918b7bdc7..e5dc91d0e0793 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -141,14 +141,14 @@ int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) if (restore) { sk->sk_write_space = psock->saved_write_space; - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); return 0; } if (sk->sk_family == AF_INET6) udp_bpf_check_v6_needs_rebuild(psock->sk_proto); - WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]); + sock_replace_proto(sk, &udp_bpf_prots[family]); return 0; } EXPORT_SYMBOL_GPL(udp_bpf_update_proto); diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 7cf14c6b17254..e9bf155139612 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -145,12 +145,12 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re if (restore) { sk->sk_write_space = psock->saved_write_space; - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); return 0; } unix_dgram_bpf_check_needs_rebuild(psock->sk_proto); - WRITE_ONCE(sk->sk_prot, &unix_dgram_bpf_prot); + sock_replace_proto(sk, &unix_dgram_bpf_prot); return 0; } @@ -158,12 +158,12 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r { if (restore) { sk->sk_write_space = psock->saved_write_space; - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); return 0; } unix_stream_bpf_check_needs_rebuild(psock->sk_proto); - WRITE_ONCE(sk->sk_prot, &unix_stream_bpf_prot); + sock_replace_proto(sk, &unix_stream_bpf_prot); return 0; } From e276d62dcfdee6582486e8b8344dd869518e14be Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Oct 2022 00:25:58 +0100 Subject: [PATCH 044/239] net/ulp: remove SOCK_SUPPORT_ZC from tls sockets Remove SOCK_SUPPORT_ZC when we're setting ulp as it might not support msghdr::ubuf_info, e.g. like TLS replacing ->sk_prot with a new set of handlers. Cc: # 6.0 Reported-by: Jakub Kicinski Fixes: e993ffe3da4bc ("net: flag sockets supporting msghdr originated zerocopy") Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ulp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 7c27aa629af19..9ae50b1bd8444 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -136,6 +136,9 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) if (icsk->icsk_ulp_ops) goto out_err; + if (sk->sk_socket) + clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + err = ulp_ops->init(sk); if (err) goto out_err; From 71b7786ea478f3c4611deff4d2b9676b0c17c56b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 27 Oct 2022 00:25:59 +0100 Subject: [PATCH 045/239] net: also flag accepted sockets supporting msghdr originated zerocopy Without this only the client initiated tcp sockets have SOCK_SUPPORT_ZC. The listening socket on the server also has it, but the accepted connections didn't, which meant IORING_OP_SEND[MSG]_ZC will always fails with -EOPNOTSUPP. Fixes: e993ffe3da4b ("net: flag sockets supporting msghdr originated zerocopy") Cc: # 6.0 CC: Jens Axboe Link: https://lore.kernel.org/io-uring/20221024141503.22b4e251@kernel.org/T/#m38aa19b0b825758fb97860a38ad13122051f9dda Signed-off-by: Stefan Metzmacher Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3dd02396517df..4728087c42a5c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -754,6 +754,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags, (TCPF_ESTABLISHED | TCPF_SYN_RECV | TCPF_CLOSE_WAIT | TCPF_CLOSE))); + if (test_bit(SOCK_SUPPORT_ZC, &sock->flags)) + set_bit(SOCK_SUPPORT_ZC, &newsock->flags); sock_graft(sk2, newsock); newsock->state = SS_CONNECTED; From 21ce2c121fa07b00b0906bd781590ea362e82ea2 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Thu, 27 Oct 2022 12:56:55 +0300 Subject: [PATCH 046/239] net: ethernet: adi: adin1110: Fix notifiers ADIN1110 was registering netdev_notifiers on each device probe. This leads to warnings/probe failures because of double registration of the same notifier when to adin1110/2111 devices are connected to the same system. Move the registration of netdev_notifiers in module init call, in this way multiple driver instances can use the same notifiers. Fixes: bc93e19d088b ("net: ethernet: adi: Add ADIN1110 support") Signed-off-by: Alexandru Tachici Link: https://lore.kernel.org/r/20221027095655.89890-2-alexandru.tachici@analog.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/adi/adin1110.c | 38 ++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c index 1744d623999d0..606c976108085 100644 --- a/drivers/net/ethernet/adi/adin1110.c +++ b/drivers/net/ethernet/adi/adin1110.c @@ -1512,16 +1512,15 @@ static struct notifier_block adin1110_switchdev_notifier = { .notifier_call = adin1110_switchdev_event, }; -static void adin1110_unregister_notifiers(void *data) +static void adin1110_unregister_notifiers(void) { unregister_switchdev_blocking_notifier(&adin1110_switchdev_blocking_notifier); unregister_switchdev_notifier(&adin1110_switchdev_notifier); unregister_netdevice_notifier(&adin1110_netdevice_nb); } -static int adin1110_setup_notifiers(struct adin1110_priv *priv) +static int adin1110_setup_notifiers(void) { - struct device *dev = &priv->spidev->dev; int ret; ret = register_netdevice_notifier(&adin1110_netdevice_nb); @@ -1536,13 +1535,14 @@ static int adin1110_setup_notifiers(struct adin1110_priv *priv) if (ret < 0) goto err_sdev; - return devm_add_action_or_reset(dev, adin1110_unregister_notifiers, NULL); + return 0; err_sdev: unregister_switchdev_notifier(&adin1110_switchdev_notifier); err_netdev: unregister_netdevice_notifier(&adin1110_netdevice_nb); + return ret; } @@ -1613,10 +1613,6 @@ static int adin1110_probe_netdevs(struct adin1110_priv *priv) if (ret < 0) return ret; - ret = adin1110_setup_notifiers(priv); - if (ret < 0) - return ret; - for (i = 0; i < priv->cfg->ports_nr; i++) { ret = devm_register_netdev(dev, priv->ports[i]->netdev); if (ret < 0) { @@ -1693,7 +1689,31 @@ static struct spi_driver adin1110_driver = { .probe = adin1110_probe, .id_table = adin1110_spi_id, }; -module_spi_driver(adin1110_driver); + +static int __init adin1110_driver_init(void) +{ + int ret; + + ret = adin1110_setup_notifiers(); + if (ret < 0) + return ret; + + ret = spi_register_driver(&adin1110_driver); + if (ret < 0) { + adin1110_unregister_notifiers(); + return ret; + } + + return 0; +} + +static void __exit adin1110_exit(void) +{ + adin1110_unregister_notifiers(); + spi_unregister_driver(&adin1110_driver); +} +module_init(adin1110_driver_init); +module_exit(adin1110_exit); MODULE_DESCRIPTION("ADIN1110 Network driver"); MODULE_AUTHOR("Alexandru Tachici "); From a2c65a9d0568b6737c02b54f00b80716a53fac61 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 27 Oct 2022 17:54:39 +0300 Subject: [PATCH 047/239] net: dsa: fall back to default tagger if we can't load the one from DT DSA tagging protocol drivers can be changed at runtime through sysfs and at probe time through the device tree (support for the latter was added later). When changing through sysfs, it is assumed that the module for the new tagging protocol was already loaded into the kernel (in fact this is only a concern for Ocelot/Felix switches, where we have tag_ocelot.ko and tag_ocelot_8021q.ko; for every other switch, the default and alternative protocols are compiled within the same .ko, so there is nothing for the user to load). The kernel cannot currently call request_module(), because it has no way of constructing the modalias name of the tagging protocol driver ("dsa_tag-%d", where the number is one of DSA_TAG_PROTO_*_VALUE). The device tree only contains the string name of the tagging protocol ("ocelot-8021q"), and the only mapping between the string and the DSA_TAG_PROTO_OCELOT_8021Q_VALUE is present in tag_ocelot_8021q.ko. So this is a chicken-and-egg situation and dsa_core.ko has nothing based on which it can automatically request the insertion of the module. As a consequence, if CONFIG_NET_DSA_TAG_OCELOT_8021Q is built as module, the switch will forever defer probing. The long-term solution is to make DSA call request_module() somehow, but that probably needs some refactoring. What we can do to keep operating with existing device tree blobs is to cancel the attempt to change the tagging protocol with the one specified there, and to remain operating with the default one. Depending on the situation, the default protocol might still allow some functionality (in the case of ocelot, it does), and it's better to have that than to fail to probe. Fixes: deff710703d8 ("net: dsa: Allow default tag protocol to be overridden from DT") Link: https://lore.kernel.org/lkml/20221027113248.420216-1-michael@walle.cc/ Reported-by: Heiko Thiery Reported-by: Michael Walle Signed-off-by: Vladimir Oltean Tested-by: Michael Walle Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221027145439.3086017-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index af0e2c0394ac3..e504a18fc1254 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1409,9 +1409,9 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, const char *user_protocol) { + const struct dsa_device_ops *tag_ops = NULL; struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst; - const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol default_proto; /* Find out which protocol the switch would prefer. */ @@ -1434,10 +1434,17 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, } tag_ops = dsa_find_tagger_by_name(user_protocol); - } else { - tag_ops = dsa_tag_driver_get(default_proto); + if (IS_ERR(tag_ops)) { + dev_warn(ds->dev, + "Failed to find a tagging driver for protocol %s, using default\n", + user_protocol); + tag_ops = NULL; + } } + if (!tag_ops) + tag_ops = dsa_tag_driver_get(default_proto); + if (IS_ERR(tag_ops)) { if (PTR_ERR(tag_ops) == -ENOPROTOOPT) return -EPROBE_DEFER; From 2f321fd6d89ad1e9525f5aa1f2be9202c2f3e724 Mon Sep 17 00:00:00 2001 From: Max Krummenacher Date: Thu, 22 Sep 2022 18:29:18 +0200 Subject: [PATCH 048/239] arm64: dts: verdin-imx8mp: fix ctrl_sleep_moci The GPIO signaling ctrl_sleep_moci is currently handled as a gpio hog. But the gpio-hog node is made a child of the wrong gpio controller. Move it to the node representing gpio4 so that it actually works. Without this carrier board components jumpered to use the signal are unconditionally switched off. Fixes: a39ed23bdf6e ("arm64: dts: freescale: add initial support for verdin imx8m plus") Signed-off-by: Max Krummenacher Signed-off-by: Marcel Ziswiler Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mp-verdin.dtsi | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi index 7b712d1888ead..5dcd1de586b52 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi @@ -354,16 +354,6 @@ "SODIMM_82", "SODIMM_70", "SODIMM_72"; - - ctrl-sleep-moci-hog { - gpio-hog; - /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */ - gpios = <29 GPIO_ACTIVE_HIGH>; - line-name = "CTRL_SLEEP_MOCI#"; - output-high; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_ctrl_sleep_moci>; - }; }; &gpio3 { @@ -432,6 +422,16 @@ "SODIMM_256", "SODIMM_48", "SODIMM_44"; + + ctrl-sleep-moci-hog { + gpio-hog; + /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */ + gpios = <29 GPIO_ACTIVE_HIGH>; + line-name = "CTRL_SLEEP_MOCI#"; + output-high; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ctrl_sleep_moci>; + }; }; /* On-module I2C */ From e1ec45b9a8127d9d31bb9fc1d802571a2ba8dd89 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Mon, 26 Sep 2022 19:45:32 +0800 Subject: [PATCH 049/239] arm64: dts: imx8mm: remove otg1/2 power domain dependency on hsio pgc_otg1/2 are independent power domain of hsio, they for usb phy, so remove hsio power domain dependency from its node. Fixes: d39d4bb15310 ("arm64: dts: imx8mm: add GPC node") Signed-off-by: Li Jun Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index afb90f59c83c5..41204b871f4f3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -674,13 +674,11 @@ pgc_otg1: power-domain@2 { #power-domain-cells = <0>; reg = ; - power-domains = <&pgc_hsiomix>; }; pgc_otg2: power-domain@3 { #power-domain-cells = <0>; reg = ; - power-domains = <&pgc_hsiomix>; }; pgc_gpumix: power-domain@4 { From 4585c79ff477f9517b7f384a4fce351417e8fa36 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Mon, 26 Sep 2022 19:45:33 +0800 Subject: [PATCH 050/239] arm64: dts: imx8mm: correct usb power domains pgc_otg1/2 is actual the power domain of usb PHY, usb controller is in hsio power domain, and pgc_otg1/2 is required to be powered up to detect usb remote wakeup, so move the pgc_otg1/2 power domain to the usb phy node. Fixes: 01df28d80859 ("arm64: dts: imx8mm: put USB controllers into power-domains") Signed-off-by: Li Jun Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 41204b871f4f3..dabd94dc30c4b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -276,6 +276,7 @@ assigned-clocks = <&clk IMX8MM_CLK_USB_PHY_REF>; assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_100M>; clock-names = "main_clk"; + power-domains = <&pgc_otg1>; }; usbphynop2: usbphynop2 { @@ -285,6 +286,7 @@ assigned-clocks = <&clk IMX8MM_CLK_USB_PHY_REF>; assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_100M>; clock-names = "main_clk"; + power-domains = <&pgc_otg2>; }; soc: soc@0 { @@ -1184,7 +1186,7 @@ assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_500M>; phys = <&usbphynop1>; fsl,usbmisc = <&usbmisc1 0>; - power-domains = <&pgc_otg1>; + power-domains = <&pgc_hsiomix>; status = "disabled"; }; @@ -1204,7 +1206,7 @@ assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_500M>; phys = <&usbphynop2>; fsl,usbmisc = <&usbmisc2 0>; - power-domains = <&pgc_otg2>; + power-domains = <&pgc_hsiomix>; status = "disabled"; }; From 9e0bbb7a5218d856f1ccf8f1bf38c8869572b464 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Mon, 26 Sep 2022 19:45:34 +0800 Subject: [PATCH 051/239] arm64: dts: imx8mn: remove otg1 power domain dependency on hsio pgc_otg1 is an independent power domain of hsio, it's for usb phy, so remove hsio power domain from its node. Fixes: 8b8ebec67360 ("arm64: dts: imx8mn: add GPC node") Signed-off-by: Li Jun Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index cb2836bfbd95c..950f432627fe8 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -662,7 +662,6 @@ pgc_otg1: power-domain@1 { #power-domain-cells = <0>; reg = ; - power-domains = <&pgc_hsiomix>; }; pgc_gpumix: power-domain@2 { From ee895139a761bdb7869f9f5b9ccc19a064d0d740 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Mon, 26 Sep 2022 19:45:35 +0800 Subject: [PATCH 052/239] arm64: dts: imx8mn: Correct the usb power domain pgc_otg1 is actual the power domain of usb PHY, usb controller is in hsio power domain, and pgc_otg1 is required to be powered up to detect usb remote wakeup, so move the pgc_otg1 power domain to the usb phy node. Fixes: ea2b5af58ab2 ("arm64: dts: imx8mn: put USB controller into power-domains") Signed-off-by: Li Jun Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 950f432627fe8..ad0b99adf6911 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -1075,7 +1075,7 @@ assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_500M>; phys = <&usbphynop1>; fsl,usbmisc = <&usbmisc1 0>; - power-domains = <&pgc_otg1>; + power-domains = <&pgc_hsiomix>; status = "disabled"; }; @@ -1174,5 +1174,6 @@ assigned-clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; clock-names = "main_clk"; + power-domains = <&pgc_otg1>; }; }; From bb5ad73941dc3f4e3c2241348f385da6501d50ea Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Thu, 29 Sep 2022 12:52:22 -0700 Subject: [PATCH 053/239] ARM: dts: imx6qdl-gw59{10,13}: fix user pushbutton GPIO offset The GW5910 and GW5913 have a user pushbutton that is tied to the Gateworks System Controller GPIO offset 2. Fix the invalid offset of 0. Fixes: 64bf0a0af18d ("ARM: dts: imx6qdl-gw: add Gateworks System Controller support") Signed-off-by: Tim Harvey Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-gw5910.dtsi | 2 +- arch/arm/boot/dts/imx6qdl-gw5913.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-gw5910.dtsi b/arch/arm/boot/dts/imx6qdl-gw5910.dtsi index 68e5ab2e27e22..6bb4855d13ce5 100644 --- a/arch/arm/boot/dts/imx6qdl-gw5910.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw5910.dtsi @@ -29,7 +29,7 @@ user-pb { label = "user_pb"; - gpios = <&gsc_gpio 0 GPIO_ACTIVE_LOW>; + gpios = <&gsc_gpio 2 GPIO_ACTIVE_LOW>; linux,code = ; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw5913.dtsi b/arch/arm/boot/dts/imx6qdl-gw5913.dtsi index 8e23cec7149e5..696427b487f01 100644 --- a/arch/arm/boot/dts/imx6qdl-gw5913.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw5913.dtsi @@ -26,7 +26,7 @@ user-pb { label = "user_pb"; - gpios = <&gsc_gpio 0 GPIO_ACTIVE_LOW>; + gpios = <&gsc_gpio 2 GPIO_ACTIVE_LOW>; linux,code = ; }; From 5e67d47d0b010f0704aca469d6d27637b1dcb2ce Mon Sep 17 00:00:00 2001 From: Petr Benes Date: Tue, 4 Oct 2022 17:39:20 +0200 Subject: [PATCH 054/239] ARM: dts: imx6dl-yapp4: Do not allow PM to switch PU regulator off on Q/QP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix our design flaw in supply voltage distribution on the Quad and QuadPlus based boards. The problem is that we supply the SoC cache (VDD_CACHE_CAP) from VDD_PU instead of VDD_SOC. The VDD_PU internal regulator can be disabled by PM if VPU or GPU is not used. If that happens the system freezes. To prevent that configure the reg_pu regulator to be always on. Fixes: 0de4ab81ab26 ("ARM: dts: imx6dl-yapp4: Add Y Soft IOTA Crux/Crux+ board") Cc: petrben@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Petr Benes Signed-off-by: Michal Vokáč Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q-yapp4-crux.dts | 4 ++++ arch/arm/boot/dts/imx6qp-yapp4-crux-plus.dts | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/imx6q-yapp4-crux.dts b/arch/arm/boot/dts/imx6q-yapp4-crux.dts index 15f4824a5142a..bddf3822ebf73 100644 --- a/arch/arm/boot/dts/imx6q-yapp4-crux.dts +++ b/arch/arm/boot/dts/imx6q-yapp4-crux.dts @@ -33,6 +33,10 @@ status = "okay"; }; +®_pu { + regulator-always-on; +}; + ®_usb_h1_vbus { status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qp-yapp4-crux-plus.dts b/arch/arm/boot/dts/imx6qp-yapp4-crux-plus.dts index cea165f2161a3..afaf4a6759d4b 100644 --- a/arch/arm/boot/dts/imx6qp-yapp4-crux-plus.dts +++ b/arch/arm/boot/dts/imx6qp-yapp4-crux-plus.dts @@ -33,6 +33,10 @@ status = "okay"; }; +®_pu { + regulator-always-on; +}; + ®_usb_h1_vbus { status = "okay"; }; From 06acb824d7d00a30e9400f67eee481b218371b5a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 10 Oct 2022 18:07:47 +0800 Subject: [PATCH 055/239] arm64: dts: imx8: correct clock order Per bindings/mmc/fsl-imx-esdhc.yaml, the clock order is ipg, ahb, per, otherwise warning: " mmc@5b020000: clock-names:1: 'ahb' was expected mmc@5b020000: clock-names:2: 'per' was expected " Fixes: 16c4ea7501b1 ("arm64: dts: imx8: switch to new lpcg clock binding") Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- .../arm64/boot/dts/freescale/imx8-ss-conn.dtsi | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi index 82a1c44883786..10370d1a6c6de 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi @@ -38,9 +38,9 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b010000 0x10000>; clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>, - <&sdhc0_lpcg IMX_LPCG_CLK_5>, - <&sdhc0_lpcg IMX_LPCG_CLK_0>; - clock-names = "ipg", "per", "ahb"; + <&sdhc0_lpcg IMX_LPCG_CLK_0>, + <&sdhc0_lpcg IMX_LPCG_CLK_5>; + clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_0>; status = "disabled"; }; @@ -49,9 +49,9 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b020000 0x10000>; clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>, - <&sdhc1_lpcg IMX_LPCG_CLK_5>, - <&sdhc1_lpcg IMX_LPCG_CLK_0>; - clock-names = "ipg", "per", "ahb"; + <&sdhc1_lpcg IMX_LPCG_CLK_0>, + <&sdhc1_lpcg IMX_LPCG_CLK_5>; + clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_1>; fsl,tuning-start-tap = <20>; fsl,tuning-step = <2>; @@ -62,9 +62,9 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b030000 0x10000>; clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>, - <&sdhc2_lpcg IMX_LPCG_CLK_5>, - <&sdhc2_lpcg IMX_LPCG_CLK_0>; - clock-names = "ipg", "per", "ahb"; + <&sdhc2_lpcg IMX_LPCG_CLK_0>, + <&sdhc2_lpcg IMX_LPCG_CLK_5>; + clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_2>; status = "disabled"; }; From ef370d8ceec62322dee24c960af8ca67a749f34d Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 10 Oct 2022 18:09:58 +0800 Subject: [PATCH 056/239] dt-bindings: power: gpcv2: add power-domains property Some pgc power-domain requires a parent power domain, so add an optional power-domains property, otherwise there will be dt check warning: gpc@303a0000: pgc:power-domain@1: 'power-domains' does not match any of the regexes: 'pinctrl-[0-9]+' Fixes: 30af8513bdb5 ("dt-bindings: power: add defines for i.MX8MM power domains") Signed-off-by: Peng Fan Acked-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo --- Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml b/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml index 58022ae7d5ddc..dfdb8dfb6b653 100644 --- a/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml +++ b/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml @@ -81,6 +81,9 @@ properties: power-supply: true + power-domains: + maxItems: 1 + resets: description: | A number of phandles to resets that need to be asserted during From 82ce591967517b733c6e6e6882b5096d239b3afe Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 20 Oct 2022 18:16:56 +0800 Subject: [PATCH 057/239] arm64: dts: imx93: correct s4mu interrupt names Per binding doc, interrupt names should be tx and rx. Fixes: 0dfb380d2492 ("arm64: dts: imx93: add s4 mu node") Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 3a5713bb4880e..55a70ee6f91af 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -501,7 +501,7 @@ reg = <0x47520000 0x10000>; interrupts = , ; - interrupt-names = "txirq", "rxirq"; + interrupt-names = "tx", "rx"; #mbox-cells = <2>; }; From d92a110130d492bd5eab81827ce3730581dc933a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 20 Oct 2022 18:16:57 +0800 Subject: [PATCH 058/239] arm64: dts: imx93: correct gpio-ranges Per imx93-pinfunc.h and pinctrl-imx93.c, correct gpio-ranges. Fixes: ec8b5b5058ea ("arm64: dts: freescale: Add i.MX93 dtsi support") Reported-by: David Wolfe Reviewed-by: Haibo Chen Reviewed-by: Jacky Bai Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 55a70ee6f91af..0247866fc86b0 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -451,7 +451,7 @@ clocks = <&clk IMX93_CLK_GPIO2_GATE>, <&clk IMX93_CLK_GPIO2_GATE>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc 0 32 32>; + gpio-ranges = <&iomuxc 0 4 30>; }; gpio3: gpio@43820080 { @@ -465,7 +465,8 @@ clocks = <&clk IMX93_CLK_GPIO3_GATE>, <&clk IMX93_CLK_GPIO3_GATE>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc 0 64 32>; + gpio-ranges = <&iomuxc 0 84 8>, <&iomuxc 8 66 18>, + <&iomuxc 26 34 2>, <&iomuxc 28 0 4>; }; gpio4: gpio@43830080 { @@ -479,7 +480,7 @@ clocks = <&clk IMX93_CLK_GPIO4_GATE>, <&clk IMX93_CLK_GPIO4_GATE>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc 0 96 32>; + gpio-ranges = <&iomuxc 0 38 28>, <&iomuxc 28 36 2>; }; gpio1: gpio@47400080 { @@ -493,7 +494,7 @@ clocks = <&clk IMX93_CLK_GPIO1_GATE>, <&clk IMX93_CLK_GPIO1_GATE>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc 0 0 32>; + gpio-ranges = <&iomuxc 0 92 16>; }; s4muap: mailbox@47520000 { From 0ba7b623f15d52fa056eca26573d8cf1b9c29fd1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 22 Oct 2022 08:08:55 +0200 Subject: [PATCH 059/239] soc: imx: imx93-pd: Fix the error handling path of imx93_pd_probe() In imx93_pd_probe(); if an error occurs, some resources need to be released as done in the remove function. Fixes: 0a0f7cc25d4a ("soc: imx: add i.MX93 SRC power domain driver") Signed-off-by: Christophe JAILLET Signed-off-by: Shawn Guo --- drivers/soc/imx/imx93-pd.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/soc/imx/imx93-pd.c b/drivers/soc/imx/imx93-pd.c index 1f3d7039c1de9..4d235c8c4924d 100644 --- a/drivers/soc/imx/imx93-pd.c +++ b/drivers/soc/imx/imx93-pd.c @@ -135,11 +135,24 @@ static int imx93_pd_probe(struct platform_device *pdev) ret = pm_genpd_init(&domain->genpd, NULL, domain->init_off); if (ret) - return ret; + goto err_clk_unprepare; platform_set_drvdata(pdev, domain); - return of_genpd_add_provider_simple(np, &domain->genpd); + ret = of_genpd_add_provider_simple(np, &domain->genpd); + if (ret) + goto err_genpd_remove; + + return 0; + +err_genpd_remove: + pm_genpd_remove(&domain->genpd); + +err_clk_unprepare: + if (!domain->init_off) + clk_bulk_disable_unprepare(domain->num_clks, domain->clks); + + return ret; } static const struct of_device_id imx93_pd_ids[] = { From c126a0abc5dadd7df236f20aae6d8c3d103f095c Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 25 Oct 2022 17:41:15 +0300 Subject: [PATCH 060/239] arm64: dts: lx2160a: specify clock frequencies for the MDIO controllers Up until now, the external MDIO controller frequency values relied either on the default ones out of reset or on those setup by u-boot. Let's just properly specify the MDC frequency in the DTS so that even without u-boot's intervention Linux can drive the MDIO bus. Fixes: 6e1b8fae892d ("arm64: dts: lx2160a: add emdio1 node") Fixes: 5705b9dcda57 ("arm64: dts: lx2160a: add emdio2 node") Signed-off-by: Ioana Ciornei Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index 6680fb2a6dc92..8c76d86cb7566 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -1385,6 +1385,9 @@ #address-cells = <1>; #size-cells = <0>; little-endian; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(2)>; status = "disabled"; }; @@ -1395,6 +1398,9 @@ little-endian; #address-cells = <1>; #size-cells = <0>; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(2)>; status = "disabled"; }; From d78a57426e64fc4c61e6189e450a0432d24536ca Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 25 Oct 2022 17:41:16 +0300 Subject: [PATCH 061/239] arm64: dts: ls1088a: specify clock frequencies for the MDIO controllers Up until now, the external MDIO controller frequency values relied either on the default ones out of reset or on those setup by u-boot. Let's just properly specify the MDC frequency in the DTS so that even without u-boot's intervention Linux can drive the MDIO bus. Fixes: bbe75af7b092 ("arm64: dts: ls1088a: add external MDIO device nodes") Signed-off-by: Ioana Ciornei Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index 421d879013d7f..260d045dbd9a8 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -779,6 +779,9 @@ little-endian; #address-cells = <1>; #size-cells = <0>; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(1)>; status = "disabled"; }; @@ -788,6 +791,9 @@ little-endian; #address-cells = <1>; #size-cells = <0>; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(1)>; status = "disabled"; }; From d5c921a53c80dfa942f6dff36253db5a50775a5f Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 25 Oct 2022 17:41:17 +0300 Subject: [PATCH 062/239] arm64: dts: ls208xa: specify clock frequencies for the MDIO controllers Up until now, the external MDIO controller frequency values relied either on the default ones out of reset or on those setup by u-boot. Let's just properly specify the MDC frequency in the DTS so that even without u-boot's intervention Linux can drive the MDIO bus. Fixes: 0420dde30a90 ("arm64: dts: ls208xa: add the external MDIO nodes") Signed-off-by: Ioana Ciornei Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index f1b9cc8714dc0..348d9e3a91252 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -532,6 +532,9 @@ little-endian; #address-cells = <1>; #size-cells = <0>; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(2)>; status = "disabled"; }; @@ -541,6 +544,9 @@ little-endian; #address-cells = <1>; #size-cells = <0>; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(2)>; status = "disabled"; }; From bfab00b94bd8569cdb84a6511d6615e6a8104e9c Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 26 Sep 2022 22:37:52 +0200 Subject: [PATCH 063/239] drm/rockchip: dw_hdmi: filter regulator -EPROBE_DEFER error messages When the avdd-0v9 or avdd-1v8 supply are not yet available, EPROBE_DEFER is returned by rockchip_hdmi_parse_dt(). This causes the following error message to be printed multiple times: dwhdmi-rockchip fe0a0000.hdmi: [drm:dw_hdmi_rockchip_bind [rockchipdrm]] *ERROR* Unable to parse OF data Fix that by not printing the message when rockchip_hdmi_parse_dt() returns -EPROBE_DEFER. Fixes: ca80c4eb4b01 ("drm/rockchip: dw_hdmi: add regulator support") Signed-off-by: Aurelien Jarno Reviewed-by: Dmitry Osipenko Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20220926203752.5430-1-aurelien@aurel32.net --- drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index c14f888938688..2f4b8f64cbad3 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -565,7 +565,8 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, ret = rockchip_hdmi_parse_dt(hdmi); if (ret) { - DRM_DEV_ERROR(hdmi->dev, "Unable to parse OF data\n"); + if (ret != -EPROBE_DEFER) + DRM_DEV_ERROR(hdmi->dev, "Unable to parse OF data\n"); return ret; } From 0be67e0556e469c57100ffe3c90df90abc796f3b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 19 Oct 2022 17:03:48 -0700 Subject: [PATCH 064/239] drm/rockchip: dsi: Clean up 'usage_mode' when failing to attach If we fail to attach the first time (especially: EPROBE_DEFER), we fail to clean up 'usage_mode', and thus will fail to attach on any subsequent attempts, with "dsi controller already in use". Re-set to DW_DSI_USAGE_IDLE on attach failure. This is especially common to hit when enabling asynchronous probe on a duel-DSI system (such as RK3399 Gru/Scarlet), such that we're more likely to fail dw_mipi_dsi_rockchip_find_second() the first time. Fixes: 71f68fe7f121 ("drm/rockchip: dsi: add ability to work as a phy instead of full dsi") Cc: Signed-off-by: Brian Norris Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221019170255.1.Ia68dfb27b835d31d22bfe23812baf366ee1c6eac@changeid --- drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index bf6948125b841..d222c68112076 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -1051,23 +1051,31 @@ static int dw_mipi_dsi_rockchip_host_attach(void *priv_data, if (ret) { DRM_DEV_ERROR(dsi->dev, "Failed to register component: %d\n", ret); - return ret; + goto out; } second = dw_mipi_dsi_rockchip_find_second(dsi); - if (IS_ERR(second)) - return PTR_ERR(second); + if (IS_ERR(second)) { + ret = PTR_ERR(second); + goto out; + } if (second) { ret = component_add(second, &dw_mipi_dsi_rockchip_ops); if (ret) { DRM_DEV_ERROR(second, "Failed to register component: %d\n", ret); - return ret; + goto out; } } return 0; + +out: + mutex_lock(&dsi->usage_mutex); + dsi->usage_mode = DW_DSI_USAGE_IDLE; + mutex_unlock(&dsi->usage_mutex); + return ret; } static int dw_mipi_dsi_rockchip_host_detach(void *priv_data, From 81e592f86f7afdb76d655e7fbd7803d7b8f985d8 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 19 Oct 2022 17:03:49 -0700 Subject: [PATCH 065/239] drm/rockchip: dsi: Force synchronous probe We can't safely probe a dual-DSI display asynchronously (driver_async_probe='*' or driver_async_probe='dw-mipi-dsi-rockchip' cmdline), because dw_mipi_dsi_rockchip_find_second() pokes one DSI device's drvdata from the other device without any locking. Request synchronous probe, at least until this driver learns some appropriate locking for dual-DSI initialization. Cc: Signed-off-by: Brian Norris Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221019170255.2.I6b985b0ca372b7e35c6d9ea970b24bcb262d4fc1@changeid --- drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index d222c68112076..528ddce144e54 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -1689,5 +1689,11 @@ struct platform_driver dw_mipi_dsi_rockchip_driver = { .of_match_table = dw_mipi_dsi_rockchip_dt_ids, .pm = &dw_mipi_dsi_rockchip_pm_ops, .name = "dw-mipi-dsi-rockchip", + /* + * For dual-DSI display, one DSI pokes at the other DSI's + * drvdata in dw_mipi_dsi_rockchip_find_second(). This is not + * safe for asynchronous probe. + */ + .probe_type = PROBE_FORCE_SYNCHRONOUS, }, }; From ab78c74cfc5a3caa2bbb7627cb8f3bca40bb5fb0 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Thu, 20 Oct 2022 19:12:47 +0100 Subject: [PATCH 066/239] drm/rockchip: fix fbdev on non-IOMMU devices When switching to the generic fbdev infrastructure, it was missed that framebuffers were created with the alloc_kmap parameter to rockchip_gem_create_object() set to true. The generic infrastructure calls this via the .dumb_create() driver operation and thus creates a buffer without an associated kmap. alloc_kmap only makes a difference on devices without an IOMMU, but when it is missing rockchip_gem_prime_vmap() fails and the framebuffer cannot be used. Detect the case where a buffer is being allocated for the framebuffer and ensure a kernel mapping is created in this case. Fixes: 24af7c34b290 ("drm/rockchip: use generic fbdev setup") Reported-by: Johan Jonker Cc: Thomas Zimmermann Signed-off-by: John Keeping Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221020181248.2497065-1-john@metanate.com --- drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index 614e97aaac805..da8a69953706d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -364,9 +364,12 @@ rockchip_gem_create_with_handle(struct drm_file *file_priv, { struct rockchip_gem_object *rk_obj; struct drm_gem_object *obj; + bool is_framebuffer; int ret; - rk_obj = rockchip_gem_create_object(drm, size, false); + is_framebuffer = drm->fb_helper && file_priv == drm->fb_helper->client.file; + + rk_obj = rockchip_gem_create_object(drm, size, is_framebuffer); if (IS_ERR(rk_obj)) return ERR_CAST(rk_obj); From 553c5a429aee26c9cfaf37ae158a8915540270fe Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sun, 23 Oct 2022 18:07:47 +0200 Subject: [PATCH 067/239] drm/rockchip: dsi: Fix VOP selection on SoCs that support it lcdsel_grf_reg is defined as u32, so "< 0" comaprison is always false, which breaks VOP selection on eg. RK3399. Compare against 0. Fixes: f3aaa6125b6f ("drm/rockchip: dsi: add rk3568 support") Signed-off-by: Ondrej Jirman Tested-by: Chris Morgan Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221023160747.607943-1-megi@xff.cz --- drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index 528ddce144e54..f4df9820b295d 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -752,7 +752,7 @@ static void dw_mipi_dsi_rockchip_config(struct dw_mipi_dsi_rockchip *dsi) static void dw_mipi_dsi_rockchip_set_lcdsel(struct dw_mipi_dsi_rockchip *dsi, int mux) { - if (dsi->cdata->lcdsel_grf_reg < 0) + if (dsi->cdata->lcdsel_grf_reg) regmap_write(dsi->grf_regmap, dsi->cdata->lcdsel_grf_reg, mux ? dsi->cdata->lcdsel_lit : dsi->cdata->lcdsel_big); } @@ -1643,7 +1643,6 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = { static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = { { .reg = 0xfe060000, - .lcdsel_grf_reg = -1, .lanecfg1_grf_reg = RK3568_GRF_VO_CON2, .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI0_SKEWCALHS | RK3568_DSI0_FORCETXSTOPMODE | @@ -1653,7 +1652,6 @@ static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = { }, { .reg = 0xfe070000, - .lcdsel_grf_reg = -1, .lanecfg1_grf_reg = RK3568_GRF_VO_CON3, .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI1_SKEWCALHS | RK3568_DSI1_FORCETXSTOPMODE | From 471bf2406c043491b1a8288e5f04bc278f7d7ca1 Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Fri, 28 Oct 2022 11:52:05 +0200 Subject: [PATCH 068/239] drm/rockchip: vop2: fix null pointer in plane_atomic_disable If the vop2_plane_atomic_disable function is called with NULL as a state, accessing the old_pstate runs into a null pointer exception. However, the drm_atomic_helper_disable_planes_on_crtc function calls the atomic_disable callback with state NULL. Allow to disable a plane without passing a plane state by checking the old_pstate only if a state is passed. Signed-off-by: Michael Tretter Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221028095206.2136601-2-m.tretter@pengutronix.de --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index aac20be5ac082..26f8a8489ded4 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -996,13 +996,15 @@ static int vop2_plane_atomic_check(struct drm_plane *plane, static void vop2_plane_atomic_disable(struct drm_plane *plane, struct drm_atomic_state *state) { - struct drm_plane_state *old_pstate = drm_atomic_get_old_plane_state(state, plane); + struct drm_plane_state *old_pstate = NULL; struct vop2_win *win = to_vop2_win(plane); struct vop2 *vop2 = win->vop2; drm_dbg(vop2->drm, "%s disable\n", win->data->name); - if (!old_pstate->crtc) + if (state) + old_pstate = drm_atomic_get_old_plane_state(state, plane); + if (old_pstate && !old_pstate->crtc) return; vop2_win_disable(win); From 447fb14bf07905b880c9ed1ea92c53d6dd0649d7 Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Fri, 28 Oct 2022 11:52:06 +0200 Subject: [PATCH 069/239] drm/rockchip: vop2: disable planes when disabling the crtc The vop2 driver needs to explicitly disable the planes if the crtc is disabled. Unless the planes are explicitly disabled, the address of the last framebuffer is kept in the registers of the VOP2. When re-enabling the encoder after it has been disabled by the driver, the VOP2 will start and read the framebuffer that has been freed but is still pointed to by the register. The iommu will catch these read accesses and print errors. Explicitly disable the planes when the crtc is disabled to reset the registers. Signed-off-by: Michael Tretter Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221028095206.2136601-3-m.tretter@pengutronix.de --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 26f8a8489ded4..105a548d0abeb 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -877,10 +877,14 @@ static void vop2_crtc_atomic_disable(struct drm_crtc *crtc, { struct vop2_video_port *vp = to_vop2_video_port(crtc); struct vop2 *vop2 = vp->vop2; + struct drm_crtc_state *old_crtc_state; int ret; vop2_lock(vop2); + old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); + drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, false); + drm_crtc_vblank_off(crtc); /* From 88e1f16ba58665e9edfce437ea487da2fa759af9 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 1 Sep 2022 03:10:07 +0000 Subject: [PATCH 070/239] ksefltests: pidfd: Fix wait_states: Test terminated by timeout 0Day/LKP observed that the kselftest blocks forever since one of the pidfd_wait doesn't terminate in 1 of 30 runs. After digging into the source, we found that it blocks at: ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); wait_states has below testing flow: CHILD PARENT ---------------+-------------- 1 STOP itself 2 WAIT for CHILD STOPPED 3 SIGNAL CHILD to CONT 4 CONT 5 STOP itself 5' WAIT for CHILD CONT 6 WAIT for CHILD STOPPED The problem is that the kernel cannot ensure the order of 5 and 5', once 5 goes first, the test will fail. we can reproduce it by: $ while true; do make run_tests -C pidfd; done Introduce a blocking read in child process to make sure the parent can check its WCONTINUED. CC: Philip Li Reported-by: kernel test robot Signed-off-by: Li Zhijian Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_wait.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 070c1c876df15..c3e2a3041f55f 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -95,20 +95,28 @@ TEST(wait_states) .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, }; + int pfd[2]; pid_t pid; siginfo_t info = { .si_signo = 0, }; + ASSERT_EQ(pipe(pfd), 0); pid = sys_clone3(&args); ASSERT_GE(pid, 0); if (pid == 0) { + char buf[2]; + + close(pfd[1]); kill(getpid(), SIGSTOP); + ASSERT_EQ(read(pfd[0], buf, 1), 1); + close(pfd[0]); kill(getpid(), SIGSTOP); exit(EXIT_SUCCESS); } + close(pfd[0]); ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); @@ -117,6 +125,8 @@ TEST(wait_states) ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(write(pfd[1], "C", 1), 1); + close(pfd[1]); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_CONTINUED); ASSERT_EQ(info.si_pid, parent_tid); From 3d982441308ebdf713771c8a85c23d9b8b66b4d4 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 1 Sep 2022 03:17:34 +0000 Subject: [PATCH 071/239] selftests: pidfd: Fix compling warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix warnings and enable Wall. pidfd_wait.c: In function ‘wait_nonblock’: pidfd_wait.c:150:13: warning: unused variable ‘status’ [-Wunused-variable] 150 | int pidfd, status = 0; | ^~~~~~ ... pidfd_test.c: In function ‘child_poll_exec_test’: pidfd_test.c:438:1: warning: no return statement in function returning non-void [-Wreturn-type] 438 | } | ^ Signed-off-by: Li Zhijian v2: fix mistake assignment to pidfd Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/Makefile | 2 +- tools/testing/selftests/pidfd/pidfd_test.c | 2 ++ tools/testing/selftests/pidfd/pidfd_wait.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index f4a2f28f926bb..778b6cdc8aed8 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ -pthread +CFLAGS += -g -I../../../../usr/include/ -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index 9a2d64901d591..d36654265b7a5 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -435,6 +435,8 @@ static int child_poll_exec_test(void *args) */ while (1) sleep(1); + + return 0; } static void test_pidfd_poll_exec(int use_waitpid) diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index c3e2a3041f55f..0dcb8365ddc39 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -148,7 +148,7 @@ TEST(wait_states) TEST(wait_nonblock) { - int pidfd, status = 0; + int pidfd; unsigned int flags = 0; pid_t parent_tid = -1; struct clone_args args = { From 8e4aae6b8ca76afb1fb64dcb24be44ba814e7f8a Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:29 +0800 Subject: [PATCH 072/239] nfc: fdp: Fix potential memory leak in fdp_nci_send() fdp_nci_send() will call fdp_nci_i2c_write that will not free skb in the function. As a result, when fdp_nci_i2c_write() finished, the skb will memleak. fdp_nci_send() should free skb after fdp_nci_i2c_write() finished. Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller --- drivers/nfc/fdp/fdp.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index c6b3334f24c9e..f12f903a9dd13 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -249,11 +249,19 @@ static int fdp_nci_close(struct nci_dev *ndev) static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) { struct fdp_nci_info *info = nci_get_drvdata(ndev); + int ret; if (atomic_dec_and_test(&info->data_pkt_counter)) info->data_pkt_counter_cb(ndev); - return info->phy_ops->write(info->phy, skb); + ret = info->phy_ops->write(info->phy, skb); + if (ret < 0) { + kfree_skb(skb); + return ret; + } + + consume_skb(skb); + return 0; } static int fdp_nci_request_firmware(struct nci_dev *ndev) From 7bf1ed6aff0f70434bd0cdd45495e83f1dffb551 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:30 +0800 Subject: [PATCH 073/239] nfc: nxp-nci: Fix potential memory leak in nxp_nci_send() nxp_nci_send() will call nxp_nci_i2c_write(), and only free skb when nxp_nci_i2c_write() failed. However, even if the nxp_nci_i2c_write() run succeeds, the skb will not be freed in nxp_nci_i2c_write(). As the result, the skb will memleak. nxp_nci_send() should also free the skb when nxp_nci_i2c_write() succeeds. Fixes: dece45855a8b ("NFC: nxp-nci: Add support for NXP NCI chips") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller --- drivers/nfc/nxp-nci/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index 7c93d484dc1bc..580cb6ecffee4 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -80,10 +80,13 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) return -EINVAL; r = info->phy_ops->write(info->phy_id, skb); - if (r < 0) + if (r < 0) { kfree_skb(skb); + return r; + } - return r; + consume_skb(skb); + return 0; } static int nxp_nci_rf_pll_unlocked_ntf(struct nci_dev *ndev, From 3a146b7e3099dc7cf3114f627d9b79291e2d2203 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:31 +0800 Subject: [PATCH 074/239] nfc: s3fwrn5: Fix potential memory leak in s3fwrn5_nci_send() s3fwrn5_nci_send() will call s3fwrn5_i2c_write() or s3fwrn82_uart_write(), and free the skb if write() failed. However, even if the write() run succeeds, the skb will not be freed in write(). As the result, the skb will memleak. s3fwrn5_nci_send() should also free the skb when write() succeeds. Fixes: c04c674fadeb ("nfc: s3fwrn5: Add driver for Samsung S3FWRN5 NFC Chip") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller --- drivers/nfc/s3fwrn5/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index 1c412007fabb6..0270e05b68dff 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -110,11 +110,15 @@ static int s3fwrn5_nci_send(struct nci_dev *ndev, struct sk_buff *skb) } ret = s3fwrn5_write(info, skb); - if (ret < 0) + if (ret < 0) { kfree_skb(skb); + mutex_unlock(&info->mutex); + return ret; + } + consume_skb(skb); mutex_unlock(&info->mutex); - return ret; + return 0; } static int s3fwrn5_nci_post_setup(struct nci_dev *ndev) From 93d904a734a74c54d945a9884b4962977f1176cd Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:32 +0800 Subject: [PATCH 075/239] nfc: nfcmrvl: Fix potential memory leak in nfcmrvl_i2c_nci_send() nfcmrvl_i2c_nci_send() will be called by nfcmrvl_nci_send(), and skb should be freed in nfcmrvl_i2c_nci_send(). However, nfcmrvl_nci_send() will only free skb when i2c_master_send() return >=0, which means skb will memleak when i2c_master_send() failed. Free skb no matter whether i2c_master_send() succeeds. Fixes: b5b3e23e4cac ("NFC: nfcmrvl: add i2c driver") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller --- drivers/nfc/nfcmrvl/i2c.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index acef0cfd76af3..24436c9e54c99 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -132,10 +132,15 @@ static int nfcmrvl_i2c_nci_send(struct nfcmrvl_private *priv, ret = -EREMOTEIO; } else ret = 0; + } + + if (ret) { kfree_skb(skb); + return ret; } - return ret; + consume_skb(skb); + return 0; } static void nfcmrvl_i2c_nci_update_config(struct nfcmrvl_private *priv, From 2153fc9623e5465f503d793d4c94ad65e9ec9b5f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 27 Oct 2022 23:56:26 +1100 Subject: [PATCH 076/239] powerpc/64e: Fix amdgpu build on Book3E w/o AltiVec There's a build failure for Book3E without AltiVec: Error: cc1: error: AltiVec not supported in this target make[6]: *** [/linux/scripts/Makefile.build:250: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o] Error 1 This happens because the amdgpu build is only gated by PPC_LONG_DOUBLE_128, but that symbol can be enabled even though AltiVec is disabled. The only user of PPC_LONG_DOUBLE_128 is amdgpu, so just add a dependency on AltiVec to that symbol to fix the build. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221027125626.1383092-1-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 699df27b0e2fc..20fb1765238c4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -285,7 +285,7 @@ config PPC # config PPC_LONG_DOUBLE_128 - depends on PPC64 + depends on PPC64 && ALTIVEC def_bool $(success,test "$(shell,echo __LONG_DOUBLE_128__ | $(CC) -E -P -)" = 1) config PPC_BARRIER_NOSPEC From 6fdaed8c79887680bc46cb0a51775bd7c8645528 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 27 Oct 2022 22:57:11 +0900 Subject: [PATCH 077/239] drm/format-helper: Only advertise supported formats for conversion drm_fb_build_fourcc_list() currently returns all emulated formats unconditionally as long as the native format is among them, even though not all combinations have conversion helpers. Although the list is arguably provided to userspace in precedence order, userspace can pick something out-of-order (and thus break when it shouldn't), or simply only support a format that is unsupported (and thus think it can work, which results in the appearance of a hang as FB blits fail later on, instead of the initialization error you'd expect in this case). Add checks to filter the list of emulated formats to only those supported for conversion to the native format. This presumes that there is a single native format (only the first is checked, if there are multiple). Refactoring this API to drop the native list or support it properly (by returning the appropriate emulated->native mapping table) is left for a future patch. The simpledrm driver is left as-is with a full table of emulated formats. This keeps all currently working conversions available and drops all the broken ones (i.e. this a strict bugfix patch, adding no new supported formats nor removing any actually working ones). In order to avoid proliferation of emulated formats, future drivers should advertise only XRGB8888 as the sole emulated format (since some userspace assumes its presence). This fixes a real user regression where the ?RGB2101010 support commit started advertising it unconditionally where not supported, and KWin decided to start to use it over the native format and broke, but also the fixes the spurious RGB565/RGB888 formats which have been wrongly unconditionally advertised since the dawn of simpledrm. Fixes: 6ea966fca084 ("drm/simpledrm: Add [AX]RGB2101010 formats") Fixes: 11e8f5fd223b ("drm: Add simpledrm driver") Cc: stable@vger.kernel.org Signed-off-by: Hector Martin Acked-by: Pekka Paalanen Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20221027135711.24425-1-marcan@marcan.st --- drivers/gpu/drm/drm_format_helper.c | 66 ++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index e2f76621453c7..3ee59bae9d2ff 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -807,6 +807,38 @@ static bool is_listed_fourcc(const uint32_t *fourccs, size_t nfourccs, uint32_t return false; } +static const uint32_t conv_from_xrgb8888[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB888, +}; + +static const uint32_t conv_from_rgb565_888[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, +}; + +static bool is_conversion_supported(uint32_t from, uint32_t to) +{ + switch (from) { + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + return is_listed_fourcc(conv_from_xrgb8888, ARRAY_SIZE(conv_from_xrgb8888), to); + case DRM_FORMAT_RGB565: + case DRM_FORMAT_RGB888: + return is_listed_fourcc(conv_from_rgb565_888, ARRAY_SIZE(conv_from_rgb565_888), to); + case DRM_FORMAT_XRGB2101010: + return to == DRM_FORMAT_ARGB2101010; + case DRM_FORMAT_ARGB2101010: + return to == DRM_FORMAT_XRGB2101010; + default: + return false; + } +} + /** * drm_fb_build_fourcc_list - Filters a list of supported color formats against * the device's native formats @@ -827,7 +859,9 @@ static bool is_listed_fourcc(const uint32_t *fourccs, size_t nfourccs, uint32_t * be handed over to drm_universal_plane_init() et al. Native formats * will go before emulated formats. Other heuristics might be applied * to optimize the order. Formats near the beginning of the list are - * usually preferred over formats near the end of the list. + * usually preferred over formats near the end of the list. Formats + * without conversion helpers will be skipped. New drivers should only + * pass in XRGB8888 and avoid exposing additional emulated formats. * * Returns: * The number of color-formats 4CC codes returned in @fourccs_out. @@ -839,7 +873,7 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, { u32 *fourccs = fourccs_out; const u32 *fourccs_end = fourccs_out + nfourccs_out; - bool found_native = false; + uint32_t native_format = 0; size_t i; /* @@ -858,26 +892,18 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, drm_dbg_kms(dev, "adding native format %p4cc\n", &fourcc); - if (!found_native) - found_native = is_listed_fourcc(driver_fourccs, driver_nfourccs, fourcc); + /* + * There should only be one native format with the current API. + * This API needs to be refactored to correctly support arbitrary + * sets of native formats, since it needs to report which native + * format to use for each emulated format. + */ + if (!native_format) + native_format = fourcc; *fourccs = fourcc; ++fourccs; } - /* - * The plane's atomic_update helper converts the framebuffer's color format - * to a native format when copying to device memory. - * - * If there is not a single format supported by both, device and - * driver, the native formats are likely not supported by the conversion - * helpers. Therefore *only* support the native formats and add a - * conversion helper ASAP. - */ - if (!found_native) { - drm_warn(dev, "Format conversion helpers required to add extra formats.\n"); - goto out; - } - /* * The extra formats, emulated by the driver, go second. */ @@ -890,6 +916,9 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, } else if (fourccs == fourccs_end) { drm_warn(dev, "Ignoring emulated format %p4cc\n", &fourcc); continue; /* end of available output buffer */ + } else if (!is_conversion_supported(fourcc, native_format)) { + drm_dbg_kms(dev, "Unsupported emulated format %p4cc\n", &fourcc); + continue; /* format is not supported for conversion */ } drm_dbg_kms(dev, "adding emulated format %p4cc\n", &fourcc); @@ -898,7 +927,6 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, ++fourccs; } -out: return fourccs - fourccs_out; } EXPORT_SYMBOL(drm_fb_build_fourcc_list); From 06a4df5863f73af193a4ff7abf7cb04058584f06 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 28 Oct 2022 10:09:11 +0800 Subject: [PATCH 078/239] net: fec: fix improper use of NETDEV_TX_BUSY The ndo_start_xmit() method must not free skb when returning NETDEV_TX_BUSY, since caller is going to requeue freed skb. Fix it by returning NETDEV_TX_OK in case of dma_map_single() fails. Fixes: 79f339125ea3 ("net: fec: Add software TSO support") Signed-off-by: Zhang Changzhong Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 28ef4d3c18789..f623c12eaf951 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -713,7 +713,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb, dev_kfree_skb_any(skb); if (net_ratelimit()) netdev_err(ndev, "Tx DMA memory map failed\n"); - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } bdp->cbd_datlen = cpu_to_fec16(size); @@ -775,7 +775,7 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq, dev_kfree_skb_any(skb); if (net_ratelimit()) netdev_err(ndev, "Tx DMA memory map failed\n"); - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } } From 171a93182eccd6e6835d2c86b40787f9f832efaa Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 29 Oct 2022 00:07:06 +0300 Subject: [PATCH 079/239] ata: pata_legacy: fix pdc20230_set_piomode() Clang gives a warning when compiling pata_legacy.c with 'make W=1' about the 'rt' local variable in pdc20230_set_piomode() being set but unused. Quite obviously, there is an outb() call missing to write back the updated variable. Moreover, checking the docs by Petr Soucek revealed that bitwise AND should have been done with a negated timing mask and the master/slave timing masks were swapped while updating... Fixes: 669a5db411d8 ("[libata] Add a bunch of PATA drivers.") Reported-by: Damien Le Moal Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_legacy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index 0a8bf09a5c19e..03c580625c2cc 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -315,9 +315,10 @@ static void pdc20230_set_piomode(struct ata_port *ap, struct ata_device *adev) outb(inb(0x1F4) & 0x07, 0x1F4); rt = inb(0x1F3); - rt &= 0x07 << (3 * adev->devno); + rt &= ~(0x07 << (3 * !adev->devno)); if (pio) - rt |= (1 + 3 * pio) << (3 * adev->devno); + rt |= (1 + 3 * pio) << (3 * !adev->devno); + outb(rt, 0x1F3); udelay(100); outb(inb(0x1F2) | 0x01, 0x1F2); From 015618c3ec19584c83ff179fa631be8cec906aaf Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 29 Oct 2022 15:49:31 +0800 Subject: [PATCH 080/239] ata: palmld: fix return value check in palmld_pata_probe() If devm_platform_ioremap_resource() fails, it never return NULL pointer, replace the check with IS_ERR(). Fixes: 57bf0f5a162d ("ARM: pxa: use pdev resource for palmld mmio") Signed-off-by: Yang Yingliang Reviewed-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_palmld.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_palmld.c b/drivers/ata/pata_palmld.c index 400e65190904f..51caa2a427dd8 100644 --- a/drivers/ata/pata_palmld.c +++ b/drivers/ata/pata_palmld.c @@ -63,8 +63,8 @@ static int palmld_pata_probe(struct platform_device *pdev) /* remap drive's physical memory address */ mem = devm_platform_ioremap_resource(pdev, 0); - if (!mem) - return -ENOMEM; + if (IS_ERR(mem)) + return PTR_ERR(mem); /* request and activate power and reset GPIOs */ lda->power = devm_gpiod_get(dev, "power", GPIOD_OUT_HIGH); From 8bdc2acd420c6f3dd1f1c78750ec989f02a1e2b9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Oct 2022 18:05:00 +0300 Subject: [PATCH 081/239] net: sched: Fix use after free in red_enqueue() We can't use "skb" again after passing it to qdisc_enqueue(). This is basically identical to commit 2f09707d0c97 ("sch_sfb: Also store skb len before calling child enqueue"). Fixes: d7f4f332f082 ("sch_red: update backlog as well") Signed-off-by: Dan Carpenter Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_red.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index a5a401f93c1a2..98129324e1573 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -72,6 +72,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; + unsigned int len; int ret; q->vars.qavg = red_calc_qavg(&q->parms, @@ -126,9 +127,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; } + len = qdisc_pkt_len(skb); ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; From d7164a5048e8a6afe2cc4aaf7f12643c14e7f241 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 25 Oct 2022 14:44:55 +0300 Subject: [PATCH 082/239] drm/i915/tgl+: Add locking around DKL PHY register accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accessing the TypeC DKL PHY registers during modeset-commit, -verification, DP link-retraining and AUX power well toggling is racy due to these code paths being concurrent and the PHY register bank selection register (HIP_INDEX_REG) being shared between PHY instances (aka TC ports) and the bank selection being not atomic wrt. the actual PHY register access. Add the required locking around each PHY register bank selection-> register access sequence. Kudos to Ville for noticing the race conditions. v2: - Add the DKL PHY register accessors to intel_dkl_phy.[ch]. (Jani) - Make the DKL_REG_TC_PORT macro independent of PHY internals. - Move initing the DKL PHY lock to a more logical place. v3: - Fix parameter reuse in the DKL_REG_TC_PORT definition. - Document the usage of phy_lock. v4: - Fix adding TC_PORT_1 offset in the DKL_REG_TC_PORT definition. Cc: Ville Syrjälä Cc: Jani Nikula Cc: # v5.5+ Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20221025114457.2191004-1-imre.deak@intel.com (cherry picked from commit 89cb0ba4ceee6bed1059904859c5723b3f39da68) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/intel_ddi.c | 68 +++++------ .../gpu/drm/i915/display/intel_display_core.h | 8 ++ .../i915/display/intel_display_power_well.c | 7 +- drivers/gpu/drm/i915/display/intel_dkl_phy.c | 109 ++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dkl_phy.h | 24 ++++ drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 59 +++++----- drivers/gpu/drm/i915/i915_driver.c | 1 + drivers/gpu/drm/i915/i915_reg.h | 3 + 9 files changed, 204 insertions(+), 76 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/intel_dkl_phy.c create mode 100644 drivers/gpu/drm/i915/display/intel_dkl_phy.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index a26edcdadc217..cea00aaca04b6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -282,6 +282,7 @@ i915-y += \ display/intel_ddi.o \ display/intel_ddi_buf_trans.o \ display/intel_display_trace.o \ + display/intel_dkl_phy.o \ display/intel_dp.o \ display/intel_dp_aux.o \ display/intel_dp_aux_backlight.o \ diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index da8472cdc1357..69ecf2a3d6c65 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -43,6 +43,7 @@ #include "intel_de.h" #include "intel_display_power.h" #include "intel_display_types.h" +#include "intel_dkl_phy.h" #include "intel_dp.h" #include "intel_dp_link_training.h" #include "intel_dp_mst.h" @@ -1262,33 +1263,30 @@ static void tgl_dkl_phy_set_signal_levels(struct intel_encoder *encoder, for (ln = 0; ln < 2; ln++) { int level; - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, ln)); - - intel_de_write(dev_priv, DKL_TX_PMD_LANE_SUS(tc_port), 0); + intel_dkl_phy_write(dev_priv, DKL_TX_PMD_LANE_SUS(tc_port), ln, 0); level = intel_ddi_level(encoder, crtc_state, 2*ln+0); - intel_de_rmw(dev_priv, DKL_TX_DPCNTL0(tc_port), - DKL_TX_PRESHOOT_COEFF_MASK | - DKL_TX_DE_EMPAHSIS_COEFF_MASK | - DKL_TX_VSWING_CONTROL_MASK, - DKL_TX_PRESHOOT_COEFF(trans->entries[level].dkl.preshoot) | - DKL_TX_DE_EMPHASIS_COEFF(trans->entries[level].dkl.de_emphasis) | - DKL_TX_VSWING_CONTROL(trans->entries[level].dkl.vswing)); + intel_dkl_phy_rmw(dev_priv, DKL_TX_DPCNTL0(tc_port), ln, + DKL_TX_PRESHOOT_COEFF_MASK | + DKL_TX_DE_EMPAHSIS_COEFF_MASK | + DKL_TX_VSWING_CONTROL_MASK, + DKL_TX_PRESHOOT_COEFF(trans->entries[level].dkl.preshoot) | + DKL_TX_DE_EMPHASIS_COEFF(trans->entries[level].dkl.de_emphasis) | + DKL_TX_VSWING_CONTROL(trans->entries[level].dkl.vswing)); level = intel_ddi_level(encoder, crtc_state, 2*ln+1); - intel_de_rmw(dev_priv, DKL_TX_DPCNTL1(tc_port), - DKL_TX_PRESHOOT_COEFF_MASK | - DKL_TX_DE_EMPAHSIS_COEFF_MASK | - DKL_TX_VSWING_CONTROL_MASK, - DKL_TX_PRESHOOT_COEFF(trans->entries[level].dkl.preshoot) | - DKL_TX_DE_EMPHASIS_COEFF(trans->entries[level].dkl.de_emphasis) | - DKL_TX_VSWING_CONTROL(trans->entries[level].dkl.vswing)); + intel_dkl_phy_rmw(dev_priv, DKL_TX_DPCNTL1(tc_port), ln, + DKL_TX_PRESHOOT_COEFF_MASK | + DKL_TX_DE_EMPAHSIS_COEFF_MASK | + DKL_TX_VSWING_CONTROL_MASK, + DKL_TX_PRESHOOT_COEFF(trans->entries[level].dkl.preshoot) | + DKL_TX_DE_EMPHASIS_COEFF(trans->entries[level].dkl.de_emphasis) | + DKL_TX_VSWING_CONTROL(trans->entries[level].dkl.vswing)); - intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port), - DKL_TX_DP20BITMODE, 0); + intel_dkl_phy_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port), ln, + DKL_TX_DP20BITMODE, 0); if (IS_ALDERLAKE_P(dev_priv)) { u32 val; @@ -1306,10 +1304,10 @@ static void tgl_dkl_phy_set_signal_levels(struct intel_encoder *encoder, val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(0); } - intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port), - DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK | - DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK, - val); + intel_dkl_phy_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port), ln, + DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK | + DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK, + val); } } } @@ -2019,12 +2017,8 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, return; if (DISPLAY_VER(dev_priv) >= 12) { - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x0)); - ln0 = intel_de_read(dev_priv, DKL_DP_MODE(tc_port)); - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x1)); - ln1 = intel_de_read(dev_priv, DKL_DP_MODE(tc_port)); + ln0 = intel_dkl_phy_read(dev_priv, DKL_DP_MODE(tc_port), 0); + ln1 = intel_dkl_phy_read(dev_priv, DKL_DP_MODE(tc_port), 1); } else { ln0 = intel_de_read(dev_priv, MG_DP_MODE(0, tc_port)); ln1 = intel_de_read(dev_priv, MG_DP_MODE(1, tc_port)); @@ -2085,12 +2079,8 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, } if (DISPLAY_VER(dev_priv) >= 12) { - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x0)); - intel_de_write(dev_priv, DKL_DP_MODE(tc_port), ln0); - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x1)); - intel_de_write(dev_priv, DKL_DP_MODE(tc_port), ln1); + intel_dkl_phy_write(dev_priv, DKL_DP_MODE(tc_port), 0, ln0); + intel_dkl_phy_write(dev_priv, DKL_DP_MODE(tc_port), 1, ln1); } else { intel_de_write(dev_priv, MG_DP_MODE(0, tc_port), ln0); intel_de_write(dev_priv, MG_DP_MODE(1, tc_port), ln1); @@ -3094,10 +3084,8 @@ static void adlp_tbt_to_dp_alt_switch_wa(struct intel_encoder *encoder) enum tc_port tc_port = intel_port_to_tc(i915, encoder->port); int ln; - for (ln = 0; ln < 2; ln++) { - intel_de_write(i915, HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); - intel_de_rmw(i915, DKL_PCS_DW5(tc_port), DKL_PCS_DW5_CORE_SOFTRESET, 0); - } + for (ln = 0; ln < 2; ln++) + intel_dkl_phy_rmw(i915, DKL_PCS_DW5(tc_port), ln, DKL_PCS_DW5_CORE_SOFTRESET, 0); } static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 96cf994b0ad1f..9b51148e8ba56 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -315,6 +315,14 @@ struct intel_display { struct intel_global_obj obj; } dbuf; + struct { + /* + * dkl.phy_lock protects against concurrent access of the + * Dekel TypeC PHYs. + */ + spinlock_t phy_lock; + } dkl; + struct { /* VLV/CHV/BXT/GLK DSI MMIO register base address */ u32 mmio_base; diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index df7ee4969ef17..1d18eee562534 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -12,6 +12,7 @@ #include "intel_de.h" #include "intel_display_power_well.h" #include "intel_display_types.h" +#include "intel_dkl_phy.h" #include "intel_dmc.h" #include "intel_dpio_phy.h" #include "intel_dpll.h" @@ -529,11 +530,9 @@ icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, enum tc_port tc_port; tc_port = TGL_AUX_PW_TO_TC_PORT(i915_power_well_instance(power_well)->hsw.idx); - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x2)); - if (intel_de_wait_for_set(dev_priv, DKL_CMN_UC_DW_27(tc_port), - DKL_CMN_UC_DW27_UC_HEALTH, 1)) + if (wait_for(intel_dkl_phy_read(dev_priv, DKL_CMN_UC_DW_27(tc_port), 2) & + DKL_CMN_UC_DW27_UC_HEALTH, 1)) drm_warn(&dev_priv->drm, "Timeout waiting TC uC health\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_dkl_phy.c b/drivers/gpu/drm/i915/display/intel_dkl_phy.c new file mode 100644 index 0000000000000..710b030c7ed54 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dkl_phy.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_reg.h" + +#include "intel_de.h" +#include "intel_display.h" +#include "intel_dkl_phy.h" + +static void +dkl_phy_set_hip_idx(struct drm_i915_private *i915, i915_reg_t reg, int idx) +{ + enum tc_port tc_port = DKL_REG_TC_PORT(reg); + + drm_WARN_ON(&i915->drm, tc_port < TC_PORT_1 || tc_port >= I915_MAX_TC_PORTS); + + intel_de_write(i915, + HIP_INDEX_REG(tc_port), + HIP_INDEX_VAL(tc_port, idx)); +} + +/** + * intel_dkl_phy_read - read a Dekel PHY register + * @i915: i915 device instance + * @reg: Dekel PHY register + * @ln: lane instance of @reg + * + * Read the @reg Dekel PHY register. + * + * Returns the read value. + */ +u32 +intel_dkl_phy_read(struct drm_i915_private *i915, i915_reg_t reg, int ln) +{ + u32 val; + + spin_lock(&i915->display.dkl.phy_lock); + + dkl_phy_set_hip_idx(i915, reg, ln); + val = intel_de_read(i915, reg); + + spin_unlock(&i915->display.dkl.phy_lock); + + return val; +} + +/** + * intel_dkl_phy_write - write a Dekel PHY register + * @i915: i915 device instance + * @reg: Dekel PHY register + * @ln: lane instance of @reg + * @val: value to write + * + * Write @val to the @reg Dekel PHY register. + */ +void +intel_dkl_phy_write(struct drm_i915_private *i915, i915_reg_t reg, int ln, u32 val) +{ + spin_lock(&i915->display.dkl.phy_lock); + + dkl_phy_set_hip_idx(i915, reg, ln); + intel_de_write(i915, reg, val); + + spin_unlock(&i915->display.dkl.phy_lock); +} + +/** + * intel_dkl_phy_rmw - read-modify-write a Dekel PHY register + * @i915: i915 device instance + * @reg: Dekel PHY register + * @ln: lane instance of @reg + * @clear: mask to clear + * @set: mask to set + * + * Read the @reg Dekel PHY register, clearing then setting the @clear/@set bits in it, and writing + * this value back to the register if the value differs from the read one. + */ +void +intel_dkl_phy_rmw(struct drm_i915_private *i915, i915_reg_t reg, int ln, u32 clear, u32 set) +{ + spin_lock(&i915->display.dkl.phy_lock); + + dkl_phy_set_hip_idx(i915, reg, ln); + intel_de_rmw(i915, reg, clear, set); + + spin_unlock(&i915->display.dkl.phy_lock); +} + +/** + * intel_dkl_phy_posting_read - do a posting read from a Dekel PHY register + * @i915: i915 device instance + * @reg: Dekel PHY register + * @ln: lane instance of @reg + * + * Read the @reg Dekel PHY register without returning the read value. + */ +void +intel_dkl_phy_posting_read(struct drm_i915_private *i915, i915_reg_t reg, int ln) +{ + spin_lock(&i915->display.dkl.phy_lock); + + dkl_phy_set_hip_idx(i915, reg, ln); + intel_de_posting_read(i915, reg); + + spin_unlock(&i915->display.dkl.phy_lock); +} diff --git a/drivers/gpu/drm/i915/display/intel_dkl_phy.h b/drivers/gpu/drm/i915/display/intel_dkl_phy.h new file mode 100644 index 0000000000000..260ad121a0b18 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dkl_phy.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __INTEL_DKL_PHY_H__ +#define __INTEL_DKL_PHY_H__ + +#include + +#include "i915_reg_defs.h" + +struct drm_i915_private; + +u32 +intel_dkl_phy_read(struct drm_i915_private *i915, i915_reg_t reg, int ln); +void +intel_dkl_phy_write(struct drm_i915_private *i915, i915_reg_t reg, int ln, u32 val); +void +intel_dkl_phy_rmw(struct drm_i915_private *i915, i915_reg_t reg, int ln, u32 clear, u32 set); +void +intel_dkl_phy_posting_read(struct drm_i915_private *i915, i915_reg_t reg, int ln); + +#endif /* __INTEL_DKL_PHY_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index e5fb66a5dd025..64dd603dc69aa 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -25,6 +25,7 @@ #include "intel_de.h" #include "intel_display_types.h" +#include "intel_dkl_phy.h" #include "intel_dpio_phy.h" #include "intel_dpll.h" #include "intel_dpll_mgr.h" @@ -3508,15 +3509,12 @@ static bool dkl_pll_get_hw_state(struct drm_i915_private *dev_priv, * All registers read here have the same HIP_INDEX_REG even though * they are on different building blocks */ - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x2)); - - hw_state->mg_refclkin_ctl = intel_de_read(dev_priv, - DKL_REFCLKIN_CTL(tc_port)); + hw_state->mg_refclkin_ctl = intel_dkl_phy_read(dev_priv, + DKL_REFCLKIN_CTL(tc_port), 2); hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK; hw_state->mg_clktop2_hsclkctl = - intel_de_read(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port)); + intel_dkl_phy_read(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port), 2); hw_state->mg_clktop2_hsclkctl &= MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | @@ -3524,32 +3522,32 @@ static bool dkl_pll_get_hw_state(struct drm_i915_private *dev_priv, MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; hw_state->mg_clktop2_coreclkctl1 = - intel_de_read(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port)); + intel_dkl_phy_read(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port), 2); hw_state->mg_clktop2_coreclkctl1 &= MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; - hw_state->mg_pll_div0 = intel_de_read(dev_priv, DKL_PLL_DIV0(tc_port)); + hw_state->mg_pll_div0 = intel_dkl_phy_read(dev_priv, DKL_PLL_DIV0(tc_port), 2); val = DKL_PLL_DIV0_MASK; if (dev_priv->display.vbt.override_afc_startup) val |= DKL_PLL_DIV0_AFC_STARTUP_MASK; hw_state->mg_pll_div0 &= val; - hw_state->mg_pll_div1 = intel_de_read(dev_priv, DKL_PLL_DIV1(tc_port)); + hw_state->mg_pll_div1 = intel_dkl_phy_read(dev_priv, DKL_PLL_DIV1(tc_port), 2); hw_state->mg_pll_div1 &= (DKL_PLL_DIV1_IREF_TRIM_MASK | DKL_PLL_DIV1_TDC_TARGET_CNT_MASK); - hw_state->mg_pll_ssc = intel_de_read(dev_priv, DKL_PLL_SSC(tc_port)); + hw_state->mg_pll_ssc = intel_dkl_phy_read(dev_priv, DKL_PLL_SSC(tc_port), 2); hw_state->mg_pll_ssc &= (DKL_PLL_SSC_IREF_NDIV_RATIO_MASK | DKL_PLL_SSC_STEP_LEN_MASK | DKL_PLL_SSC_STEP_NUM_MASK | DKL_PLL_SSC_EN); - hw_state->mg_pll_bias = intel_de_read(dev_priv, DKL_PLL_BIAS(tc_port)); + hw_state->mg_pll_bias = intel_dkl_phy_read(dev_priv, DKL_PLL_BIAS(tc_port), 2); hw_state->mg_pll_bias &= (DKL_PLL_BIAS_FRAC_EN_H | DKL_PLL_BIAS_FBDIV_FRAC_MASK); hw_state->mg_pll_tdc_coldst_bias = - intel_de_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port)); + intel_dkl_phy_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port), 2); hw_state->mg_pll_tdc_coldst_bias &= (DKL_PLL_TDC_SSC_STEP_SIZE_MASK | DKL_PLL_TDC_FEED_FWD_GAIN_MASK); @@ -3737,61 +3735,58 @@ static void dkl_pll_write(struct drm_i915_private *dev_priv, * All registers programmed here have the same HIP_INDEX_REG even * though on different building block */ - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x2)); - /* All the registers are RMW */ - val = intel_de_read(dev_priv, DKL_REFCLKIN_CTL(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_REFCLKIN_CTL(tc_port), 2); val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK; val |= hw_state->mg_refclkin_ctl; - intel_de_write(dev_priv, DKL_REFCLKIN_CTL(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_REFCLKIN_CTL(tc_port), 2, val); - val = intel_de_read(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port), 2); val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; val |= hw_state->mg_clktop2_coreclkctl1; - intel_de_write(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port), 2, val); - val = intel_de_read(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port), 2); val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK); val |= hw_state->mg_clktop2_hsclkctl; - intel_de_write(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port), 2, val); val = DKL_PLL_DIV0_MASK; if (dev_priv->display.vbt.override_afc_startup) val |= DKL_PLL_DIV0_AFC_STARTUP_MASK; - intel_de_rmw(dev_priv, DKL_PLL_DIV0(tc_port), val, - hw_state->mg_pll_div0); + intel_dkl_phy_rmw(dev_priv, DKL_PLL_DIV0(tc_port), 2, val, + hw_state->mg_pll_div0); - val = intel_de_read(dev_priv, DKL_PLL_DIV1(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_PLL_DIV1(tc_port), 2); val &= ~(DKL_PLL_DIV1_IREF_TRIM_MASK | DKL_PLL_DIV1_TDC_TARGET_CNT_MASK); val |= hw_state->mg_pll_div1; - intel_de_write(dev_priv, DKL_PLL_DIV1(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_PLL_DIV1(tc_port), 2, val); - val = intel_de_read(dev_priv, DKL_PLL_SSC(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_PLL_SSC(tc_port), 2); val &= ~(DKL_PLL_SSC_IREF_NDIV_RATIO_MASK | DKL_PLL_SSC_STEP_LEN_MASK | DKL_PLL_SSC_STEP_NUM_MASK | DKL_PLL_SSC_EN); val |= hw_state->mg_pll_ssc; - intel_de_write(dev_priv, DKL_PLL_SSC(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_PLL_SSC(tc_port), 2, val); - val = intel_de_read(dev_priv, DKL_PLL_BIAS(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_PLL_BIAS(tc_port), 2); val &= ~(DKL_PLL_BIAS_FRAC_EN_H | DKL_PLL_BIAS_FBDIV_FRAC_MASK); val |= hw_state->mg_pll_bias; - intel_de_write(dev_priv, DKL_PLL_BIAS(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_PLL_BIAS(tc_port), 2, val); - val = intel_de_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port), 2); val &= ~(DKL_PLL_TDC_SSC_STEP_SIZE_MASK | DKL_PLL_TDC_FEED_FWD_GAIN_MASK); val |= hw_state->mg_pll_tdc_coldst_bias; - intel_de_write(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port), 2, val); - intel_de_posting_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port)); + intel_dkl_phy_posting_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port), 2); } static void icl_pll_power_enable(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index c459eb362c47f..f2a15d8155f4a 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -353,6 +353,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->display.wm.wm_mutex); mutex_init(&dev_priv->display.pps.mutex); mutex_init(&dev_priv->display.hdcp.comp_mutex); + spin_lock_init(&dev_priv->display.dkl.phy_lock); i915_memcpy_init_early(dev_priv); intel_runtime_pm_init_early(&dev_priv->runtime_pm); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0b287a59dc2f4..da35bb2db26b6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7420,6 +7420,9 @@ enum skl_power_gate { #define _DKL_PHY5_BASE 0x16C000 #define _DKL_PHY6_BASE 0x16D000 +#define DKL_REG_TC_PORT(__reg) \ + (TC_PORT_1 + ((__reg).reg - _DKL_PHY1_BASE) / (_DKL_PHY2_BASE - _DKL_PHY1_BASE)) + /* DEKEL PHY MMIO Address = Phy base + (internal address & ~index_mask) */ #define _DKL_PCS_DW5 0x14 #define DKL_PCS_DW5(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ From d3f6bacfca86f6cf6bf85be1e8b54083d68d8195 Mon Sep 17 00:00:00 2001 From: Robert Beckett Date: Thu, 20 Oct 2022 13:03:08 +0200 Subject: [PATCH 083/239] drm/i915: stop abusing swiotlb_max_segment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit swiotlb_max_segment used to return either the maximum size that swiotlb could bounce, or for Xen PV PAGE_SIZE even if swiotlb could bounce buffer larger mappings. This made i915 on Xen PV work as it bypasses the coherency aspect of the DMA API and can't cope with bounce buffering and this avoided bounce buffering for the Xen/PV case. So instead of adding this hack back, check for Xen/PV directly in i915 for the Xen case and otherwise use the proper DMA API helper to query the maximum mapping size. Replace swiotlb_max_segment() calls with dma_max_mapping_size(). In i915_gem_object_get_pages_internal() no longer consider max_segment only if CONFIG_SWIOTLB is enabled. There can be other (iommu related) causes of specific max segment sizes. Fixes: a2daa27c0c61 ("swiotlb: simplify swiotlb_max_segment") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Robert Beckett Signed-off-by: Christoph Hellwig [hch: added the Xen hack, rewrote the changelog] Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221020110308.1582518-1-hch@lst.de (cherry picked from commit 78a07fe777c42800bd1adaec12abe5dcee43919e) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 +++-------- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 +-- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.h | 34 ++++++++++++-------- 5 files changed, 29 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15fe..629acb403a2c9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + unsigned int max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; + max_order = min(max_order, get_order(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index f42ca1179f373..11125c32dd35d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4f861782c3e85..a4aa9500fa179 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -538,7 +538,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + i915_sg_segment_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index d4398948f0162..f34e01a7fefb9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev); struct sg_table *st; unsigned int sg_page_sizes; struct page **pvec; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 9ddb3e743a3e5..b0a1db44f8950 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -9,7 +9,8 @@ #include #include -#include +#include +#include #include "i915_gem.h" @@ -127,19 +128,26 @@ static inline unsigned int i915_sg_dma_sizes(struct scatterlist *sg) return page_sizes; } -static inline unsigned int i915_sg_segment_size(void) +static inline unsigned int i915_sg_segment_size(struct device *dev) { - unsigned int size = swiotlb_max_segment(); - - if (size == 0) - size = UINT_MAX; - - size = rounddown(size, PAGE_SIZE); - /* swiotlb_max_segment_size can return 1 byte when it means one page. */ - if (size < PAGE_SIZE) - size = PAGE_SIZE; - - return size; + size_t max = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev)); + + /* + * For Xen PV guests pages aren't contiguous in DMA (machine) address + * space. The DMA API takes care of that both in dma_alloc_* (by + * calling into the hypervisor to make the pages contiguous) and in + * dma_map_* (by bounce buffering). But i915 abuses ignores the + * coherency aspects of the DMA API and thus can't cope with bounce + * buffering actually happening, so add a hack here to force small + * allocations and mappings when running in PV mode on Xen. + * + * Note this will still break if bounce buffering is required for other + * reasons, like confidential computing hypervisors or PCIe root ports + * with addressing limitations. + */ + if (xen_pv_domain()) + max = PAGE_SIZE; + return round_down(max, PAGE_SIZE); } bool i915_sg_trim(struct sg_table *orig_st); From 3e206b6aa6df7eed4297577e0cf8403169b800a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2022 13:11:27 +0300 Subject: [PATCH 084/239] drm/i915/sdvo: Filter out invalid outputs more sensibly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We try to filter out the corresponding xxx1 output if the xxx0 output is not present. But the way that is being done is pretty awkward. Make it less so. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221026101134.20865-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit cc1e66394daaa7e9f005e2487a84e34a39f9308b) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_sdvo.c | 27 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index f5b744bef18ff..30d3778c61366 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2926,16 +2926,33 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) return false; } +static u16 intel_sdvo_filter_output_flags(u16 flags) +{ + flags &= SDVO_OUTPUT_MASK; + + /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/ + if (!(flags & SDVO_OUTPUT_TMDS0)) + flags &= ~SDVO_OUTPUT_TMDS1; + + if (!(flags & SDVO_OUTPUT_RGB0)) + flags &= ~SDVO_OUTPUT_RGB1; + + if (!(flags & SDVO_OUTPUT_LVDS0)) + flags &= ~SDVO_OUTPUT_LVDS1; + + return flags; +} + static bool intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) { - /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/ + flags = intel_sdvo_filter_output_flags(flags); if (flags & SDVO_OUTPUT_TMDS0) if (!intel_sdvo_dvi_init(intel_sdvo, 0)) return false; - if ((flags & SDVO_TMDS_MASK) == SDVO_TMDS_MASK) + if (flags & SDVO_OUTPUT_TMDS1) if (!intel_sdvo_dvi_init(intel_sdvo, 1)) return false; @@ -2956,7 +2973,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) if (!intel_sdvo_analog_init(intel_sdvo, 0)) return false; - if ((flags & SDVO_RGB_MASK) == SDVO_RGB_MASK) + if (flags & SDVO_OUTPUT_RGB1) if (!intel_sdvo_analog_init(intel_sdvo, 1)) return false; @@ -2964,11 +2981,11 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) if (!intel_sdvo_lvds_init(intel_sdvo, 0)) return false; - if ((flags & SDVO_LVDS_MASK) == SDVO_LVDS_MASK) + if (flags & SDVO_OUTPUT_LVDS1) if (!intel_sdvo_lvds_init(intel_sdvo, 1)) return false; - if ((flags & SDVO_OUTPUT_MASK) == 0) { + if (flags == 0) { unsigned char bytes[2]; intel_sdvo->controlled_output = 0; From e79762512120f11c51317570519a1553c70805d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2022 13:11:28 +0300 Subject: [PATCH 085/239] drm/i915/sdvo: Setup DDC fully before output init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call intel_sdvo_select_ddc_bus() before initializing any of the outputs. And before that is functional (assuming no VBT) we have to set up the controlled_outputs thing. Otherwise DDC won't be functional during the output init but LVDS really needs it for the fixed mode setup. Note that the whole multi output support still looks very bogus, and more work will be needed to make it correct. But for now this should at least fix the LVDS EDID fixed mode setup. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7301 Fixes: aa2b88074a56 ("drm/i915/sdvo: Fix multi function encoder stuff") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221026101134.20865-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 64b7b557dc8a96d9cfed6aedbf81de2df80c025d) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_sdvo.c | 31 +++++++++-------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 30d3778c61366..8046d02a8ad01 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2747,13 +2747,10 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) if (!intel_sdvo_connector) return false; - if (device == 0) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_TMDS0; + if (device == 0) intel_sdvo_connector->output_flag = SDVO_OUTPUT_TMDS0; - } else if (device == 1) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_TMDS1; + else if (device == 1) intel_sdvo_connector->output_flag = SDVO_OUTPUT_TMDS1; - } intel_connector = &intel_sdvo_connector->base; connector = &intel_connector->base; @@ -2808,7 +2805,6 @@ intel_sdvo_tv_init(struct intel_sdvo *intel_sdvo, int type) encoder->encoder_type = DRM_MODE_ENCODER_TVDAC; connector->connector_type = DRM_MODE_CONNECTOR_SVIDEO; - intel_sdvo->controlled_output |= type; intel_sdvo_connector->output_flag = type; if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { @@ -2849,13 +2845,10 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, int device) encoder->encoder_type = DRM_MODE_ENCODER_DAC; connector->connector_type = DRM_MODE_CONNECTOR_VGA; - if (device == 0) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_RGB0; + if (device == 0) intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB0; - } else if (device == 1) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_RGB1; + else if (device == 1) intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB1; - } if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { kfree(intel_sdvo_connector); @@ -2885,13 +2878,10 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) encoder->encoder_type = DRM_MODE_ENCODER_LVDS; connector->connector_type = DRM_MODE_CONNECTOR_LVDS; - if (device == 0) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_LVDS0; + if (device == 0) intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS0; - } else if (device == 1) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_LVDS1; + else if (device == 1) intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS1; - } if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { kfree(intel_sdvo_connector); @@ -2946,8 +2936,14 @@ static u16 intel_sdvo_filter_output_flags(u16 flags) static bool intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) { + struct drm_i915_private *i915 = to_i915(intel_sdvo->base.base.dev); + flags = intel_sdvo_filter_output_flags(flags); + intel_sdvo->controlled_output = flags; + + intel_sdvo_select_ddc_bus(i915, intel_sdvo); + if (flags & SDVO_OUTPUT_TMDS0) if (!intel_sdvo_dvi_init(intel_sdvo, 0)) return false; @@ -2988,7 +2984,6 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) if (flags == 0) { unsigned char bytes[2]; - intel_sdvo->controlled_output = 0; memcpy(bytes, &intel_sdvo->caps.output_flags, 2); DRM_DEBUG_KMS("%s: Unknown SDVO output type (0x%02x%02x)\n", SDVO_NAME(intel_sdvo), @@ -3400,8 +3395,6 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, */ intel_sdvo->base.cloneable = 0; - intel_sdvo_select_ddc_bus(dev_priv, intel_sdvo); - /* Set the input timing to the screen. Assume always input 0. */ if (!intel_sdvo_set_target_input(intel_sdvo)) goto err_output; From 224e858f215a3d6304f95a92357a1753475ca9cf Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 29 Oct 2022 09:04:29 +0800 Subject: [PATCH 086/239] ublk_drv: return flag of UBLK_F_URING_CMD_COMP_IN_TASK in case of module UBLK_F_URING_CMD_COMP_IN_TASK needs to be set and returned to userspace if ublk driver is built as module, otherwise userspace may get wrong flags shown. Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Signed-off-by: Ming Lei Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20221029010432.598367-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 5afce6ffaadfa..6b2f214f0d5c6 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1658,6 +1658,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) */ ub->dev_info.flags &= UBLK_F_ALL; + if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK)) + ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK; + /* We are not ready to support zero copy */ ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY; From d57c2c6c1145148bb23d68db73de0b52d482d4ba Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 29 Oct 2022 09:04:30 +0800 Subject: [PATCH 087/239] ublk_drv: comment on ublk_driver entry of Kconfig Add help info for choosing to build ublk_drv as module or builtin. Signed-off-by: Ming Lei Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20221029010432.598367-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index db1b4b202646e..a41145d52de94 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -408,6 +408,12 @@ config BLK_DEV_UBLK definition isn't finalized yet, and might change according to future requirement, so mark is as experimental now. + Say Y if you want to get better performance because task_work_add() + can be used in IO path for replacing io_uring cmd, which will become + shared between IO tasks and ubq daemon, meantime task_work_add() can + can handle batch more effectively, but task_work_add() isn't exported + for module, so ublk has to be built to kernel. + source "drivers/block/rnbd/Kconfig" endif # BLK_DEV From 3ab6e94ca539242247d4f00414a1bde584d001ed Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 29 Oct 2022 09:04:31 +0800 Subject: [PATCH 088/239] ublk_drv: avoid to touch io_uring cmd in blk_mq io path io_uring cmd is supposed to be used in ubq daemon context mainly, and we should try to avoid to touch it in ublk io submission context, otherwise this data could become shared between the two contexts, and performance is hurt. So link request into one per-queue list, and use same batching policy of io_uring command, just avoid to touch ucmd in blk-mq io context. Signed-off-by: Ming Lei Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20221029010432.598367-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 83 +++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 30 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6b2f214f0d5c6..3a59271dafe4d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -57,11 +57,14 @@ #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD) struct ublk_rq_data { - struct callback_head work; + union { + struct callback_head work; + struct llist_node node; + }; }; struct ublk_uring_cmd_pdu { - struct request *req; + struct ublk_queue *ubq; }; /* @@ -119,6 +122,8 @@ struct ublk_queue { struct task_struct *ubq_daemon; char *io_cmd_buf; + struct llist_head io_cmds; + unsigned long io_addr; /* mapped vm address */ unsigned int max_io_sz; bool force_abort; @@ -764,8 +769,12 @@ static inline void __ublk_rq_task_work(struct request *req) static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + struct ublk_queue *ubq = pdu->ubq; + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data; - __ublk_rq_task_work(pdu->req); + llist_for_each_entry(data, io_cmds, node) + __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); } static void ublk_rq_task_work_fn(struct callback_head *work) @@ -777,17 +786,50 @@ static void ublk_rq_task_work_fn(struct callback_head *work) __ublk_rq_task_work(req); } +static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) +{ + struct ublk_io *io = &ubq->ios[rq->tag]; + + /* + * If the check pass, we know that this is a re-issued request aborted + * previously in monitor_work because the ubq_daemon(cmd's task) is + * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore + * because this ioucmd's io_uring context may be freed now if no inflight + * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work. + * + * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing + * the tag). Then the request is re-started(allocating the tag) and we are here. + * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED + * guarantees that here is a re-issued request aborted previously. + */ + if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) { + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data; + + llist_for_each_entry(data, io_cmds, node) + __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); + } else { + struct io_uring_cmd *cmd = io->cmd; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + + pdu->ubq = ubq; + io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); + } +} + static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct ublk_queue *ubq = hctx->driver_data; struct request *rq = bd->rq; + struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); blk_status_t res; /* fill iod to slot in io cmd buffer */ res = ublk_setup_iod(ubq, rq); if (unlikely(res != BLK_STS_OK)) return BLK_STS_IOERR; + /* With recovery feature enabled, force_abort is set in * ublk_stop_dev() before calling del_gendisk(). We have to * abort all requeued and new rqs here to let del_gendisk() @@ -809,36 +851,15 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, } if (ublk_can_use_task_work(ubq)) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); enum task_work_notify_mode notify_mode = bd->last ? TWA_SIGNAL_NO_IPI : TWA_NONE; if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) goto fail; } else { - struct ublk_io *io = &ubq->ios[rq->tag]; - struct io_uring_cmd *cmd = io->cmd; - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - - /* - * If the check pass, we know that this is a re-issued request aborted - * previously in monitor_work because the ubq_daemon(cmd's task) is - * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore - * because this ioucmd's io_uring context may be freed now if no inflight - * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work. - * - * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing - * the tag). Then the request is re-started(allocating the tag) and we are here. - * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED - * guarantees that here is a re-issued request aborted previously. - */ - if ((io->flags & UBLK_IO_FLAG_ABORTED)) - goto fail; - - pdu->req = rq; - io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); + if (llist_add(&data->node, &ubq->io_cmds)) + ublk_submit_cmd(ubq, rq); } - return BLK_STS_OK; } @@ -1168,17 +1189,19 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, { struct ublk_queue *ubq = ublk_get_queue(ub, q_id); struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); if (ublk_can_use_task_work(ubq)) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - /* should not fail since we call it just in ubq->ubq_daemon */ task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI); } else { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - pdu->req = req; - io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); + if (llist_add(&data->node, &ubq->io_cmds)) { + pdu->ubq = ubq; + io_uring_cmd_complete_in_task(cmd, + ublk_rq_task_work_cb); + } } } From fee32f312405726eec6b35b5740c48acda0315e9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 29 Oct 2022 09:04:32 +0800 Subject: [PATCH 089/239] ublk_drv: add ublk_queue_cmd() for cleanup Add helper of ublk_queue_cmd() so that both ublk_queue_rq() and ublk_handle_need_get_data() can reuse this helper. Signed-off-by: Ming Lei Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20221029010432.598367-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 47 ++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 3a59271dafe4d..f96cb01e9604d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -817,12 +817,28 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) } } +static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq, + bool last) +{ + struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); + + if (ublk_can_use_task_work(ubq)) { + enum task_work_notify_mode notify_mode = last ? + TWA_SIGNAL_NO_IPI : TWA_NONE; + + if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) + __ublk_abort_rq(ubq, rq); + } else { + if (llist_add(&data->node, &ubq->io_cmds)) + ublk_submit_cmd(ubq, rq); + } +} + static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct ublk_queue *ubq = hctx->driver_data; struct request *rq = bd->rq; - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); blk_status_t res; /* fill iod to slot in io cmd buffer */ @@ -845,21 +861,12 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); if (unlikely(ubq_daemon_is_dying(ubq))) { - fail: __ublk_abort_rq(ubq, rq); return BLK_STS_OK; } - if (ublk_can_use_task_work(ubq)) { - enum task_work_notify_mode notify_mode = bd->last ? - TWA_SIGNAL_NO_IPI : TWA_NONE; + ublk_queue_cmd(ubq, rq, bd->last); - if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) - goto fail; - } else { - if (llist_add(&data->node, &ubq->io_cmds)) - ublk_submit_cmd(ubq, rq); - } return BLK_STS_OK; } @@ -1185,24 +1192,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) } static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, - int tag, struct io_uring_cmd *cmd) + int tag) { struct ublk_queue *ubq = ublk_get_queue(ub, q_id); struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - if (ublk_can_use_task_work(ubq)) { - /* should not fail since we call it just in ubq->ubq_daemon */ - task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI); - } else { - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - - if (llist_add(&data->node, &ubq->io_cmds)) { - pdu->ubq = ubq; - io_uring_cmd_complete_in_task(cmd, - ublk_rq_task_work_cb); - } - } + ublk_queue_cmd(ubq, req, true); } static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) @@ -1290,7 +1285,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) io->addr = ub_cmd->addr; io->cmd = cmd; io->flags |= UBLK_IO_FLAG_ACTIVE; - ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd); + ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag); break; default: goto out; From fa81cbafbf5764ad5053512152345fab37a1fe18 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Sat, 29 Oct 2022 15:13:55 +0800 Subject: [PATCH 090/239] block: Fix possible memory leak for rq_wb on add_disk failure kmemleak reported memory leaks in device_add_disk(): kmemleak: 3 new suspected memory leaks unreferenced object 0xffff88800f420800 (size 512): comm "modprobe", pid 4275, jiffies 4295639067 (age 223.512s) hex dump (first 32 bytes): 04 00 00 00 08 00 00 00 01 00 00 00 00 00 00 00 ................ 00 e1 f5 05 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d3662699>] kmalloc_trace+0x26/0x60 [<00000000edc7aadc>] wbt_init+0x50/0x6f0 [<0000000069601d16>] wbt_enable_default+0x157/0x1c0 [<0000000028fc393f>] blk_register_queue+0x2a4/0x420 [<000000007345a042>] device_add_disk+0x6fd/0xe40 [<0000000060e6aab0>] nbd_dev_add+0x828/0xbf0 [nbd] ... It is because the memory allocated in wbt_enable_default() is not released in device_add_disk() error path. Normally, these memory are freed in: del_gendisk() rq_qos_exit() rqos->ops->exit(rqos); wbt_exit() So rq_qos_exit() is called to free the rq_wb memory for wbt_init(). However in the error path of device_add_disk(), only blk_unregister_queue() is called and make rq_wb memory leaked. Add rq_qos_exit() to the error path to fix it. Fixes: 83cbce957446 ("block: add error handling for device_add_disk / add_disk") Signed-off-by: Chen Zhongjin Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20221029071355.35462-1-chenzhongjin@huawei.com Signed-off-by: Jens Axboe --- block/genhd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/genhd.c b/block/genhd.c index fee90eb98b4a5..0f9769db2de83 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -527,6 +527,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, bdi_unregister(disk->bdi); out_unregister_queue: blk_unregister_queue(disk); + rq_qos_exit(disk->queue); out_put_slave_dir: kobject_put(disk->slave_dir); out_put_holder_dir: From d372ec94a018c3a19dad71e2ee3478126394d9fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 27 Sep 2022 21:06:13 +0300 Subject: [PATCH 091/239] drm/i915: Simplify intel_panel_add_edid_alt_fixed_modes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit a5810f551d0a ("drm/i915: Allow more varied alternate fixed modes for panels") intel_panel_add_edid_alt_fixed_modes() no longer considers vrr vs. drrs separately. So no reason to pass them as separate parameters either. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220927180615.25476-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit eb89e83c152b122a94e79527d63cb7c79823c37e) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- drivers/gpu/drm/i915/display/intel_lvds.c | 3 +-- drivers/gpu/drm/i915/display/intel_panel.c | 4 ++-- drivers/gpu/drm/i915/display/intel_panel.h | 2 +- drivers/gpu/drm/i915/display/intel_sdvo.c | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 47419d162f302..2b5bc95a8b0df 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5276,7 +5276,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, encoder->devdata, IS_ERR(edid) ? NULL : edid); intel_panel_add_edid_fixed_modes(intel_connector, - intel_connector->panel.vbt.drrs_type != DRRS_TYPE_NONE, + intel_connector->panel.vbt.drrs_type != DRRS_TYPE_NONE || intel_vrr_is_capable(intel_connector)); /* MSO requires information from the EDID */ diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 9aa38e8141b52..e5352239b2a2f 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -972,8 +972,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) /* Try EDID first */ intel_panel_add_edid_fixed_modes(intel_connector, - intel_connector->panel.vbt.drrs_type != DRRS_TYPE_NONE, - false); + intel_connector->panel.vbt.drrs_type != DRRS_TYPE_NONE); /* Failed to get EDID, what about VBT? */ if (!intel_panel_preferred_fixed_mode(intel_connector)) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index a3a3f9fe4342f..41cec9dc42231 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -254,10 +254,10 @@ static void intel_panel_destroy_probed_modes(struct intel_connector *connector) } void intel_panel_add_edid_fixed_modes(struct intel_connector *connector, - bool has_drrs, bool has_vrr) + bool use_alt_fixed_modes) { intel_panel_add_edid_preferred_mode(connector); - if (intel_panel_preferred_fixed_mode(connector) && (has_drrs || has_vrr)) + if (intel_panel_preferred_fixed_mode(connector) && use_alt_fixed_modes) intel_panel_add_edid_alt_fixed_modes(connector); intel_panel_destroy_probed_modes(connector); } diff --git a/drivers/gpu/drm/i915/display/intel_panel.h b/drivers/gpu/drm/i915/display/intel_panel.h index eff3ffd3d0825..5c5b5b7f95b6c 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.h +++ b/drivers/gpu/drm/i915/display/intel_panel.h @@ -44,7 +44,7 @@ int intel_panel_fitting(struct intel_crtc_state *crtc_state, int intel_panel_compute_config(struct intel_connector *connector, struct drm_display_mode *adjusted_mode); void intel_panel_add_edid_fixed_modes(struct intel_connector *connector, - bool has_drrs, bool has_vrr); + bool use_alt_fixed_modes); void intel_panel_add_vbt_lfp_fixed_mode(struct intel_connector *connector); void intel_panel_add_vbt_sdvo_fixed_mode(struct intel_connector *connector); void intel_panel_add_encoder_fixed_mode(struct intel_connector *connector, diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 8046d02a8ad01..8ee7b05ab7333 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2901,7 +2901,7 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) if (!intel_panel_preferred_fixed_mode(intel_connector)) { intel_ddc_get_modes(connector, &intel_sdvo->ddc); - intel_panel_add_edid_fixed_modes(intel_connector, false, false); + intel_panel_add_edid_fixed_modes(intel_connector, false); } intel_panel_init(intel_connector); From 12caf46cf4fc92b1c3884cb363ace2e12732fd2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2022 13:11:29 +0300 Subject: [PATCH 092/239] drm/i915/sdvo: Grab mode_config.mutex during LVDS init to avoid WARNs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_mode_probed_add() is unhappy about being called w/o mode_config.mutex. Grab it during LVDS fixed mode setup to silence the WARNs. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7301 Fixes: aa2b88074a56 ("drm/i915/sdvo: Fix multi function encoder stuff") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221026101134.20865-4-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit a3cd4f447281c56377de2ee109327400eb00668d) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_sdvo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 8ee7b05ab7333..774c1dc31a521 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2900,8 +2900,12 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) intel_panel_add_vbt_sdvo_fixed_mode(intel_connector); if (!intel_panel_preferred_fixed_mode(intel_connector)) { + mutex_lock(&i915->drm.mode_config.mutex); + intel_ddc_get_modes(connector, &intel_sdvo->ddc); intel_panel_add_edid_fixed_modes(intel_connector, false); + + mutex_unlock(&i915->drm.mode_config.mutex); } intel_panel_init(intel_connector); From 943f45b9399ed8b2b5190cbc797995edaa97f58f Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Mon, 31 Oct 2022 03:12:42 +0000 Subject: [PATCH 093/239] blk-mq: Fix kmemleak in blk_mq_init_allocated_queue There is a kmemleak caused by modprobe null_blk.ko unreferenced object 0xffff8881acb1f000 (size 1024): comm "modprobe", pid 836, jiffies 4294971190 (age 27.068s) hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 00 53 99 9e ff ff ff ff .........S...... backtrace: [<000000004a10c249>] kmalloc_node_trace+0x22/0x60 [<00000000648f7950>] blk_mq_alloc_and_init_hctx+0x289/0x350 [<00000000af06de0e>] blk_mq_realloc_hw_ctxs+0x2fe/0x3d0 [<00000000e00c1872>] blk_mq_init_allocated_queue+0x48c/0x1440 [<00000000d16b4e68>] __blk_mq_alloc_disk+0xc8/0x1c0 [<00000000d10c98c3>] 0xffffffffc450d69d [<00000000b9299f48>] 0xffffffffc4538392 [<0000000061c39ed6>] do_one_initcall+0xd0/0x4f0 [<00000000b389383b>] do_init_module+0x1a4/0x680 [<0000000087cf3542>] load_module+0x6249/0x7110 [<00000000beba61b8>] __do_sys_finit_module+0x140/0x200 [<00000000fdcfff51>] do_syscall_64+0x35/0x80 [<000000003c0f1f71>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 That is because q->ma_ops is set to NULL before blk_release_queue is called. blk_mq_init_queue_data blk_mq_init_allocated_queue blk_mq_realloc_hw_ctxs for (i = 0; i < set->nr_hw_queues; i++) { old_hctx = xa_load(&q->hctx_table, i); if (!blk_mq_alloc_and_init_hctx(.., i, ..)) [1] if (!old_hctx) break; xa_for_each_start(&q->hctx_table, j, hctx, j) blk_mq_exit_hctx(q, set, hctx, j); [2] if (!q->nr_hw_queues) [3] goto err_hctxs; err_exit: q->mq_ops = NULL; [4] blk_put_queue blk_release_queue if (queue_is_mq(q)) [5] blk_mq_release(q); [1]: blk_mq_alloc_and_init_hctx failed at i != 0. [2]: The hctxs allocated by [1] are moved to q->unused_hctx_list and will be cleaned up in blk_mq_release. [3]: q->nr_hw_queues is 0. [4]: Set q->mq_ops to NULL. [5]: queue_is_mq returns false due to [4]. And blk_mq_release will not be called. The hctxs in q->unused_hctx_list are leaked. To fix it, call blk_release_queue in exception path. Fixes: 2f8f1336a48b ("blk-mq: always free hctx after request queue is freed") Signed-off-by: Yuan Can Signed-off-by: Chen Jun Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20221031031242.94107-1-chenjun102@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 75c8296b6feb3..21cc7c2da0f96 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4193,9 +4193,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, return 0; err_hctxs: - xa_destroy(&q->hctx_table); - q->nr_hw_queues = 0; - blk_mq_sysfs_deinit(q); + blk_mq_release(q); err_poll: blk_stat_free_callback(q->poll_cb); q->poll_cb = NULL; From 063b1f21cc9be07291a1f5e227436f353c6d1695 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 30 Oct 2022 08:35:28 +0100 Subject: [PATCH 094/239] btrfs: fix a memory allocation failure test in btrfs_submit_direct After allocation 'dip' is tested instead of 'dip->csums'. Fix it. Fixes: 642c5d34da53 ("btrfs: allocate the btrfs_dio_private as part of the iomap dio bio") CC: stable@vger.kernel.org # 5.19+ Reviewed-by: Nikolay Borisov Signed-off-by: Christophe JAILLET Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 45ebef8d3ea8d..f0ebc17aeb672 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7980,7 +7980,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, */ status = BLK_STS_RESOURCE; dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS); - if (!dip) + if (!dip->csums) goto out_err; status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums); From 47ba8cc7b4f82c927cec3ad7c7392e4c45c81c56 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 24 Oct 2022 10:11:02 -0700 Subject: [PATCH 095/239] xfs: fix incorrect return type for fsdax fault handlers The kernel robot complained about this: >> fs/xfs/xfs_file.c:1266:31: sparse: sparse: incorrect type in return expression (different base types) @@ expected int @@ got restricted vm_fault_t @@ fs/xfs/xfs_file.c:1266:31: sparse: expected int fs/xfs/xfs_file.c:1266:31: sparse: got restricted vm_fault_t fs/xfs/xfs_file.c:1314:21: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted vm_fault_t [usertype] ret @@ got int @@ fs/xfs/xfs_file.c:1314:21: sparse: expected restricted vm_fault_t [usertype] ret fs/xfs/xfs_file.c:1314:21: sparse: got int Fix the incorrect return type for these two functions. While we're at it, make the !fsdax version return VM_FAULT_SIGBUS because a zero return value will cause some callers to try to lock vmf->page, which we never set here. Fixes: ea6c49b784f0 ("xfs: support CoW in fsdax mode") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c6c80265c0b25..e462d39c840e6 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1261,7 +1261,7 @@ xfs_file_llseek( } #ifdef CONFIG_FS_DAX -static int +static inline vm_fault_t xfs_dax_fault( struct vm_fault *vmf, enum page_entry_size pe_size, @@ -1274,14 +1274,15 @@ xfs_dax_fault( &xfs_read_iomap_ops); } #else -static int +static inline vm_fault_t xfs_dax_fault( struct vm_fault *vmf, enum page_entry_size pe_size, bool write_fault, pfn_t *pfn) { - return 0; + ASSERT(0); + return VM_FAULT_SIGBUS; } #endif From 8184620ae21213d51eaf2e0bd4186baacb928172 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 28 Oct 2022 13:15:35 +0100 Subject: [PATCH 096/239] btrfs: fix lost file sync on direct IO write with nowait and dsync iocb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When doing a direct IO write using a iocb with nowait and dsync set, we end up not syncing the file once the write completes. This is because we tell iomap to not call generic_write_sync(), which would result in calling btrfs_sync_file(), in order to avoid a deadlock since iomap can call it while we are holding the inode's lock and btrfs_sync_file() needs to acquire the inode's lock. The deadlock happens only if the write happens synchronously, when iomap_dio_rw() calls iomap_dio_complete() before it returns. Instead we do the sync ourselves at btrfs_do_write_iter(). For a nowait write however we can end up not doing the sync ourselves at at btrfs_do_write_iter() because the write could have been queued, and therefore we get -EIOCBQUEUED returned from iomap in such case. That makes us skip the sync call at btrfs_do_write_iter(), as we don't do it for any error returned from btrfs_direct_write(). We can't simply do the call even if -EIOCBQUEUED is returned, since that would block the task waiting for IO, both for the data since there are bios still in progress as well as potentially blocking when joining a log transaction and when syncing the log (writing log trees, super blocks, etc). So let iomap do the sync call itself and in order to avoid deadlocks for the case of synchronous writes (without nowait), use __iomap_dio_rw() and have ourselves call iomap_dio_complete() after unlocking the inode. A test case will later be sent for fstests, after this is fixed in Linus' tree. Fixes: 51bd9563b678 ("btrfs: fix deadlock due to page faults during direct IO reads and writes") Reported-by: Марк Коренберг Link: https://lore.kernel.org/linux-btrfs/CAEmTpZGRKbzc16fWPvxbr6AfFsQoLmz-Lcg-7OgJOZDboJ+SGQ@mail.gmail.com/ CC: stable@vger.kernel.org # 6.0+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 ++++- fs/btrfs/file.c | 22 ++++++++++++++++------ fs/btrfs/inode.c | 14 +++++++++++--- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 727595eee9732..f677b49df8ae0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3462,7 +3462,10 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter, ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, const struct btrfs_ioctl_encoded_io_args *encoded); -ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before); +ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before); +struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before); extern const struct dentry_operations btrfs_dentry_operations; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 176b432035aea..d55ad46384d17 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1765,6 +1765,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) loff_t endbyte; ssize_t err; unsigned int ilock_flags = 0; + struct iomap_dio *dio; if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; @@ -1825,11 +1826,22 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ -again: from->nofault = true; - err = btrfs_dio_rw(iocb, from, written); + dio = btrfs_dio_write(iocb, from, written); from->nofault = false; + /* + * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync + * iocb, and that needs to lock the inode. So unlock it before calling + * iomap_dio_complete() to avoid a deadlock. + */ + btrfs_inode_unlock(inode, ilock_flags); + + if (IS_ERR_OR_NULL(dio)) + err = PTR_ERR_OR_ZERO(dio); + else + err = iomap_dio_complete(dio); + /* No increment (+=) because iomap returns a cumulative value. */ if (err > 0) written = err; @@ -1855,12 +1867,10 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) } else { fault_in_iov_iter_readable(from, left); prev_left = left; - goto again; + goto relock; } } - btrfs_inode_unlock(inode, ilock_flags); - /* * If 'err' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. @@ -4035,7 +4045,7 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) */ pagefault_disable(); to->nofault = true; - ret = btrfs_dio_rw(iocb, to, read); + ret = btrfs_dio_read(iocb, to, read); to->nofault = false; pagefault_enable(); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f0ebc17aeb672..d70f85b73169b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8078,13 +8078,21 @@ static const struct iomap_dio_ops btrfs_dio_ops = { .bio_set = &btrfs_dio_bioset, }; -ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) +ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) { struct btrfs_dio_data data; return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, - IOMAP_DIO_PARTIAL | IOMAP_DIO_NOSYNC, - &data, done_before); + IOMAP_DIO_PARTIAL, &data, done_before); +} + +struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before) +{ + struct btrfs_dio_data data; + + return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, + IOMAP_DIO_PARTIAL, &data, done_before); } static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, From 59da7ff49d67a1b63b1b81c7f53dcb6a84cdad2b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2022 16:08:11 -0700 Subject: [PATCH 097/239] xfs: fix validation in attr log item recovery Before we start fixing all the complaints about memcpy'ing log items around, let's fix some inadequate validation in the xattr log item recovery code and get rid of the (now trivial) copy_format function. Signed-off-by: Darrick J. Wong Reviewed-by: Kees Cook Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/xfs_attr_item.c | 54 ++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index cf5ce607dc051..ee8f678a10a17 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -245,28 +245,6 @@ xfs_attri_init( return attrip; } -/* - * Copy an attr format buffer from the given buf, and into the destination attr - * format structure. - */ -STATIC int -xfs_attri_copy_format( - struct xfs_log_iovec *buf, - struct xfs_attri_log_format *dst_attr_fmt) -{ - struct xfs_attri_log_format *src_attr_fmt = buf->i_addr; - size_t len; - - len = sizeof(struct xfs_attri_log_format); - if (buf->i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; - } - - memcpy((char *)dst_attr_fmt, (char *)src_attr_fmt, len); - return 0; -} - static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_attrd_log_item, attrd_item); @@ -731,24 +709,44 @@ xlog_recover_attri_commit_pass2( struct xfs_attri_log_nameval *nv; const void *attr_value = NULL; const void *attr_name; - int error; + size_t len; attri_formatp = item->ri_buf[0].i_addr; attr_name = item->ri_buf[1].i_addr; /* Validate xfs_attri_log_format before the large memory allocation */ + len = sizeof(struct xfs_attri_log_format); + if (item->ri_buf[0].i_len != len) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + if (!xfs_attri_validate(mp, attri_formatp)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } + /* Validate the attr name */ + if (item->ri_buf[1].i_len != + xlog_calc_iovec_len(attri_formatp->alfi_name_len)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } - if (attri_formatp->alfi_value_len) + /* Validate the attr value, if present */ + if (attri_formatp->alfi_value_len != 0) { + if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + attr_value = item->ri_buf[2].i_addr; + } /* * Memory alloc failure will cause replay to abort. We attach the @@ -760,9 +758,7 @@ xlog_recover_attri_commit_pass2( attri_formatp->alfi_value_len); attrip = xfs_attri_init(mp, nv); - error = xfs_attri_copy_format(&item->ri_buf[0], &attrip->attri_format); - if (error) - goto out; + memcpy(&attrip->attri_format, attri_formatp, len); /* * The ATTRI has two references. One for the ATTRD and one for ATTRI to @@ -774,10 +770,6 @@ xlog_recover_attri_commit_pass2( xfs_attri_release(attrip); xfs_attri_log_nameval_put(nv); return 0; -out: - xfs_attri_item_free(attrip); - xfs_attri_log_nameval_put(nv); - return error; } /* From a38ebce1da271f480e47c3def4f810c6106b74a1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2022 16:17:41 -0700 Subject: [PATCH 098/239] xfs: fix memcpy fortify errors in BUI log format copying Starting in 6.1, CONFIG_FORTIFY_SOURCE checks the length parameter of memcpy. Unfortunately, it doesn't handle flex arrays correctly: ------------[ cut here ]------------ memcpy: detected field-spanning write (size 48) of single field "dst_bui_fmt" at fs/xfs/xfs_bmap_item.c:628 (size 16) Fix this by refactoring the xfs_bui_copy_format function to handle the copying of the head and the flex array members separately. While we're at it, fix a minor validation deficiency in the recovery function. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/xfs_bmap_item.c | 46 ++++++++++++++++++++---------------------- fs/xfs/xfs_ondisk.h | 5 +++++ 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 51f66e9824846..a1da6205252ba 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -608,28 +608,18 @@ static const struct xfs_item_ops xfs_bui_item_ops = { .iop_relog = xfs_bui_item_relog, }; -/* - * Copy an BUI format buffer from the given buf, and into the destination - * BUI format structure. The BUI/BUD items were designed not to need any - * special alignment handling. - */ -static int +static inline void xfs_bui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_bui_log_format *dst_bui_fmt) + struct xfs_bui_log_format *dst, + const struct xfs_bui_log_format *src) { - struct xfs_bui_log_format *src_bui_fmt; - uint len; + unsigned int i; - src_bui_fmt = buf->i_addr; - len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents); + memcpy(dst, src, offsetof(struct xfs_bui_log_format, bui_extents)); - if (buf->i_len == len) { - memcpy(dst_bui_fmt, src_bui_fmt, len); - return 0; - } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; + for (i = 0; i < src->bui_nextents; i++) + memcpy(&dst->bui_extents[i], &src->bui_extents[i], + sizeof(struct xfs_map_extent)); } /* @@ -646,23 +636,31 @@ xlog_recover_bui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_bui_log_item *buip; struct xfs_bui_log_format *bui_formatp; + size_t len; bui_formatp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; + } + if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; } - buip = xfs_bui_init(mp); - error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); - if (error) { - xfs_bui_item_free(buip); - return error; + + len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; } + + buip = xfs_bui_init(mp); + xfs_bui_copy_format(&buip->bui_format, bui_formatp); atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); /* * Insert the intent into the AIL directly and drop one reference so diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 758702b9495ff..56917e2363702 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -134,6 +134,11 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format, 40); XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); + + XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); /* * The v5 superblock format extended several v4 header structures with From a38935c03c7914a6ab22eefb750b259868ed5a4b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2022 16:23:16 -0700 Subject: [PATCH 099/239] xfs: fix memcpy fortify errors in CUI log format copying Starting in 6.1, CONFIG_FORTIFY_SOURCE checks the length parameter of memcpy. Since we're already fixing problems with BUI item copying, we should fix it everything else. Refactor the xfs_cui_copy_format function to handle the copying of the head and the flex array members separately. While we're at it, fix a minor validation deficiency in the recovery function. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/xfs_ondisk.h | 4 ++++ fs/xfs/xfs_refcount_item.c | 45 ++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 56917e2363702..e20d2844b0c53 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -136,9 +136,13 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16); XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); /* * The v5 superblock format extended several v4 header structures with diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7e97bf19793df..24cf4c64ebaab 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -622,28 +622,18 @@ static const struct xfs_item_ops xfs_cui_item_ops = { .iop_relog = xfs_cui_item_relog, }; -/* - * Copy an CUI format buffer from the given buf, and into the destination - * CUI format structure. The CUI/CUD items were designed not to need any - * special alignment handling. - */ -static int +static inline void xfs_cui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_cui_log_format *dst_cui_fmt) + struct xfs_cui_log_format *dst, + const struct xfs_cui_log_format *src) { - struct xfs_cui_log_format *src_cui_fmt; - uint len; + unsigned int i; - src_cui_fmt = buf->i_addr; - len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents); + memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents)); - if (buf->i_len == len) { - memcpy(dst_cui_fmt, src_cui_fmt, len); - return 0; - } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; + for (i = 0; i < src->cui_nextents; i++) + memcpy(&dst->cui_extents[i], &src->cui_extents[i], + sizeof(struct xfs_phys_extent)); } /* @@ -660,19 +650,26 @@ xlog_recover_cui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_cui_log_item *cuip; struct xfs_cui_log_format *cui_formatp; + size_t len; cui_formatp = item->ri_buf[0].i_addr; - cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); - error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); - if (error) { - xfs_cui_item_free(cuip); - return error; + if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; } + + len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; + } + + cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); + xfs_cui_copy_format(&cuip->cui_format, cui_formatp); atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); /* * Insert the intent into the AIL directly and drop one reference so From b45ca961e94673df83ab1900802afe82776966e6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2022 16:26:36 -0700 Subject: [PATCH 100/239] xfs: fix memcpy fortify errors in RUI log format copying Starting in 6.1, CONFIG_FORTIFY_SOURCE checks the length parameter of memcpy. Since we're already fixing problems with BUI item copying, we should fix it everything else. Refactor the xfs_rui_copy_format function to handle the copying of the head and the flex array members separately. While we're at it, fix a minor validation deficiency in the recovery function. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/xfs_ondisk.h | 3 +++ fs/xfs/xfs_rmap_item.c | 58 ++++++++++++++++++++---------------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index e20d2844b0c53..19c1df00b48e8 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -138,11 +138,14 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_rui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16); XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16); /* * The v5 superblock format extended several v4 header structures with diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index fef92e02f3bb6..27047e73f5829 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -155,31 +155,6 @@ xfs_rui_init( return ruip; } -/* - * Copy an RUI format buffer from the given buf, and into the destination - * RUI format structure. The RUI/RUD items were designed not to need any - * special alignment handling. - */ -STATIC int -xfs_rui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_rui_log_format *dst_rui_fmt) -{ - struct xfs_rui_log_format *src_rui_fmt; - uint len; - - src_rui_fmt = buf->i_addr; - len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents); - - if (buf->i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; - } - - memcpy(dst_rui_fmt, src_rui_fmt, len); - return 0; -} - static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_rud_log_item, rud_item); @@ -652,6 +627,20 @@ static const struct xfs_item_ops xfs_rui_item_ops = { .iop_relog = xfs_rui_item_relog, }; +static inline void +xfs_rui_copy_format( + struct xfs_rui_log_format *dst, + const struct xfs_rui_log_format *src) +{ + unsigned int i; + + memcpy(dst, src, offsetof(struct xfs_rui_log_format, rui_extents)); + + for (i = 0; i < src->rui_nextents; i++) + memcpy(&dst->rui_extents[i], &src->rui_extents[i], + sizeof(struct xfs_map_extent)); +} + /* * This routine is called to create an in-core extent rmap update * item from the rui format structure which was logged on disk. @@ -666,19 +655,26 @@ xlog_recover_rui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_rui_log_item *ruip; struct xfs_rui_log_format *rui_formatp; + size_t len; rui_formatp = item->ri_buf[0].i_addr; - ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); - error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format); - if (error) { - xfs_rui_item_free(ruip); - return error; + if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; } + + len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; + } + + ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); + xfs_rui_copy_format(&ruip->rui_format, rui_formatp); atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); /* * Insert the intent into the AIL directly and drop one reference so From f850995f60e49818093ef5e477cdb0ff2c11a0a4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 25 Oct 2022 18:18:21 -0700 Subject: [PATCH 101/239] xfs: make sure aglen never goes negative in xfs_refcount_adjust_extents Prior to calling xfs_refcount_adjust_extents, we trimmed agbno/aglen such that the end of the range would not be in the middle of a refcount record. If this is no longer the case, something is seriously wrong with the btree. Bail out with a corruption error. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 64b910caafaad..831353ba96dc8 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -986,15 +986,29 @@ xfs_refcount_adjust_extents( (*agbno) += tmp.rc_blockcount; (*aglen) -= tmp.rc_blockcount; + /* Stop if there's nothing left to modify */ + if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) + break; + + /* Move the cursor to the start of ext. */ error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); if (error) goto out_error; } - /* Stop if there's nothing left to modify */ - if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) - break; + /* + * A previous step trimmed agbno/aglen such that the end of the + * range would not be in the middle of the record. If this is + * no longer the case, something is seriously wrong with the + * btree. Make sure we never feed the synthesized record into + * the processing loop below. + */ + if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || + XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { + error = -EFSCORRUPTED; + goto out_error; + } /* * Adjust the reference count and either update the tree From 03a7485cd701e1c08baadcf39d9592d83715e224 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2022 16:39:59 -0700 Subject: [PATCH 102/239] xfs: fix memcpy fortify errors in EFI log format copying Starting in 6.1, CONFIG_FORTIFY_SOURCE checks the length parameter of memcpy. Since we're already fixing problems with BUI item copying, we should fix it everything else. An extra difficulty here is that the ef[id]_extents arrays are declared as single-element arrays. This is not the convention for flex arrays in the modern kernel, and it causes all manner of problems with static checking tools, since they often cannot tell the difference between a single element array and a flex array. So for starters, change those array[1] declarations to array[] declarations to signal that they are proper flex arrays and adjust all the "size-1" expressions to fit the new declaration style. Next, refactor the xfs_efi_copy_format function to handle the copying of the head and the flex array members separately. While we're at it, fix a minor validation deficiency in the recovery function. Signed-off-by: Darrick J. Wong Reviewed-by: Kees Cook Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_log_format.h | 12 ++++++------ fs/xfs/xfs_extfree_item.c | 31 +++++++++++++++++++++---------- fs/xfs/xfs_ondisk.h | 11 +++++++---- fs/xfs/xfs_super.c | 4 ++-- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index b351b9dc65618..2f41fa8477c9d 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -613,7 +613,7 @@ typedef struct xfs_efi_log_format { uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_t efi_extents[1]; /* array of extents to free */ + xfs_extent_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_t; typedef struct xfs_efi_log_format_32 { @@ -621,7 +621,7 @@ typedef struct xfs_efi_log_format_32 { uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_32_t efi_extents[1]; /* array of extents to free */ + xfs_extent_32_t efi_extents[]; /* array of extents to free */ } __attribute__((packed)) xfs_efi_log_format_32_t; typedef struct xfs_efi_log_format_64 { @@ -629,7 +629,7 @@ typedef struct xfs_efi_log_format_64 { uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_64_t efi_extents[1]; /* array of extents to free */ + xfs_extent_64_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_64_t; /* @@ -642,7 +642,7 @@ typedef struct xfs_efd_log_format { uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_t efd_extents[1]; /* array of extents freed */ + xfs_extent_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_t; typedef struct xfs_efd_log_format_32 { @@ -650,7 +650,7 @@ typedef struct xfs_efd_log_format_32 { uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_32_t efd_extents[1]; /* array of extents freed */ + xfs_extent_32_t efd_extents[]; /* array of extents freed */ } __attribute__((packed)) xfs_efd_log_format_32_t; typedef struct xfs_efd_log_format_64 { @@ -658,7 +658,7 @@ typedef struct xfs_efd_log_format_64 { uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_64_t efd_extents[1]; /* array of extents freed */ + xfs_extent_64_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_64_t; /* diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 27ccfcd82f042..466cc5c5cd33f 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -76,7 +76,7 @@ xfs_efi_item_sizeof( struct xfs_efi_log_item *efip) { return sizeof(struct xfs_efi_log_format) + - (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t); + efip->efi_format.efi_nextents * sizeof(xfs_extent_t); } STATIC void @@ -160,7 +160,7 @@ xfs_efi_init( ASSERT(nextents > 0); if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { size = (uint)(sizeof(struct xfs_efi_log_item) + - ((nextents - 1) * sizeof(xfs_extent_t))); + (nextents * sizeof(xfs_extent_t))); efip = kmem_zalloc(size, 0); } else { efip = kmem_cache_zalloc(xfs_efi_cache, @@ -189,14 +189,19 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) xfs_efi_log_format_t *src_efi_fmt = buf->i_addr; uint i; uint len = sizeof(xfs_efi_log_format_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t); + src_efi_fmt->efi_nextents * sizeof(xfs_extent_t); uint len32 = sizeof(xfs_efi_log_format_32_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t); + src_efi_fmt->efi_nextents * sizeof(xfs_extent_32_t); uint len64 = sizeof(xfs_efi_log_format_64_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t); + src_efi_fmt->efi_nextents * sizeof(xfs_extent_64_t); if (buf->i_len == len) { - memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len); + memcpy(dst_efi_fmt, src_efi_fmt, + offsetof(struct xfs_efi_log_format, efi_extents)); + for (i = 0; i < src_efi_fmt->efi_nextents; i++) + memcpy(&dst_efi_fmt->efi_extents[i], + &src_efi_fmt->efi_extents[i], + sizeof(struct xfs_extent)); return 0; } else if (buf->i_len == len32) { xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr; @@ -256,7 +261,7 @@ xfs_efd_item_sizeof( struct xfs_efd_log_item *efdp) { return sizeof(xfs_efd_log_format_t) + - (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t); + efdp->efd_format.efd_nextents * sizeof(xfs_extent_t); } STATIC void @@ -341,7 +346,7 @@ xfs_trans_get_efd( if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + - (nextents - 1) * sizeof(struct xfs_extent), + nextents * sizeof(struct xfs_extent), 0); } else { efdp = kmem_cache_zalloc(xfs_efd_cache, @@ -733,6 +738,12 @@ xlog_recover_efi_commit_pass2( efi_formatp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len < + offsetof(struct xfs_efi_log_format, efi_extents)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; + } + efip = xfs_efi_init(mp, efi_formatp->efi_nextents); error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); if (error) { @@ -772,9 +783,9 @@ xlog_recover_efd_commit_pass2( efd_formatp = item->ri_buf[0].i_addr; ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || + (efd_formatp->efd_nextents * sizeof(xfs_extent_32_t)))) || (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); + (efd_formatp->efd_nextents * sizeof(xfs_extent_64_t))))); xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); return 0; diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 19c1df00b48e8..9737b5a9f405e 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -118,10 +118,10 @@ xfs_check_ondisk_structs(void) /* log structures */ XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24); - XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28); - XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32); - XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 28); - XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12); XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176); @@ -146,6 +146,9 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format, efi_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16); /* * The v5 superblock format extended several v4 header structures with diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f029c6702dda1..8485e3b37ca01 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2029,7 +2029,7 @@ xfs_init_caches(void) xfs_efd_cache = kmem_cache_create("xfs_efd_item", (sizeof(struct xfs_efd_log_item) + - (XFS_EFD_MAX_FAST_EXTENTS - 1) * + XFS_EFD_MAX_FAST_EXTENTS * sizeof(struct xfs_extent)), 0, 0, NULL); if (!xfs_efd_cache) @@ -2037,7 +2037,7 @@ xfs_init_caches(void) xfs_efi_cache = kmem_cache_create("xfs_efi_item", (sizeof(struct xfs_efi_log_item) + - (XFS_EFI_MAX_FAST_EXTENTS - 1) * + XFS_EFI_MAX_FAST_EXTENTS * sizeof(struct xfs_extent)), 0, 0, NULL); if (!xfs_efi_cache) From b65e08f83b119ae9345ed23d4da357a72b3cb55c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 28 Oct 2022 15:48:58 -0700 Subject: [PATCH 103/239] xfs: create a predicate to verify per-AG extents Create a predicate function to verify that a given agbno/blockcount pair fit entirely within a single allocation group and don't suffer mathematical overflows. Refactor the existng open-coded logic; we're going to add more calls to this function in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_ag.h | 15 +++++++++++++++ fs/xfs/libxfs/xfs_alloc.c | 6 +----- fs/xfs/libxfs/xfs_refcount.c | 6 +----- fs/xfs/libxfs/xfs_rmap.c | 9 ++------- fs/xfs/scrub/alloc.c | 4 +--- fs/xfs/scrub/ialloc.c | 5 ++--- fs/xfs/scrub/refcount.c | 5 ++--- 7 files changed, 24 insertions(+), 26 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 517a138faa669..191b22b9a35bf 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -133,6 +133,21 @@ xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno) return true; } +static inline bool +xfs_verify_agbext( + struct xfs_perag *pag, + xfs_agblock_t agbno, + xfs_agblock_t len) +{ + if (agbno + len <= agbno) + return false; + + if (!xfs_verify_agbno(pag, agbno)) + return false; + + return xfs_verify_agbno(pag, agbno + len - 1); +} + /* * Verify that an AG inode number pointer neither points outside the AG * nor points at static metadata. diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6261599bb389a..de79f5d07f651 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -263,11 +263,7 @@ xfs_alloc_get_rec( goto out_bad_rec; /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, *bno)) - goto out_bad_rec; - if (*bno > *bno + *len) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, *bno + *len - 1)) + if (!xfs_verify_agbext(pag, *bno, *len)) goto out_bad_rec; return 0; diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 831353ba96dc8..1a50ca53304a2 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -135,11 +135,7 @@ xfs_refcount_get_rec( } /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, realstart)) - goto out_bad_rec; - if (realstart > realstart + irec->rc_blockcount) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, realstart + irec->rc_blockcount - 1)) + if (!xfs_verify_agbext(pag, realstart, irec->rc_blockcount)) goto out_bad_rec; if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 094dfc897ebcd..b56aca1e7c66c 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -235,13 +235,8 @@ xfs_rmap_get_rec( goto out_bad_rec; } else { /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, irec->rm_startblock)) - goto out_bad_rec; - if (irec->rm_startblock > - irec->rm_startblock + irec->rm_blockcount) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, - irec->rm_startblock + irec->rm_blockcount - 1)) + if (!xfs_verify_agbext(pag, irec->rm_startblock, + irec->rm_blockcount)) goto out_bad_rec; } diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index ab427b4d7fe0b..3b38f4e2a5373 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -100,9 +100,7 @@ xchk_allocbt_rec( bno = be32_to_cpu(rec->alloc.ar_startblock); len = be32_to_cpu(rec->alloc.ar_blockcount); - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + if (!xfs_verify_agbext(pag, bno, len)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); xchk_allocbt_xref(bs->sc, bno, len); diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index e1026e07bf946..e312be7cd3751 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -108,9 +108,8 @@ xchk_iallocbt_chunk( xfs_agblock_t bno; bno = XFS_AGINO_TO_AGBNO(mp, agino); - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + + if (!xfs_verify_agbext(pag, bno, len)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index c68b767dc08fe..9959397f797f1 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -354,9 +354,8 @@ xchk_refcountbt_rec( /* Check the extent. */ bno &= ~XFS_REFC_COW_START; - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + + if (!xfs_verify_agbext(pag, bno, len)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (refcount == 0) From 3c5aaaced99912c9fb3352fc5af5b104df67d4aa Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 21 Oct 2022 09:10:05 -0700 Subject: [PATCH 104/239] xfs: refactor all the EFI/EFD log item sizeof logic Refactor all the open-coded sizeof logic for EFI/EFD log item and log format structures into common helper functions whose names reflect the struct names. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_log_format.h | 48 +++++++++++++++++++++++ fs/xfs/xfs_extfree_item.c | 69 ++++++++++------------------------ fs/xfs/xfs_extfree_item.h | 16 ++++++++ fs/xfs/xfs_super.c | 12 ++---- 4 files changed, 88 insertions(+), 57 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 2f41fa8477c9d..f13e0809dc63f 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -616,6 +616,14 @@ typedef struct xfs_efi_log_format { xfs_extent_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_t; +static inline size_t +xfs_efi_log_format_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format) + + nr * sizeof(struct xfs_extent); +} + typedef struct xfs_efi_log_format_32 { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ @@ -624,6 +632,14 @@ typedef struct xfs_efi_log_format_32 { xfs_extent_32_t efi_extents[]; /* array of extents to free */ } __attribute__((packed)) xfs_efi_log_format_32_t; +static inline size_t +xfs_efi_log_format32_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format_32) + + nr * sizeof(struct xfs_extent_32); +} + typedef struct xfs_efi_log_format_64 { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ @@ -632,6 +648,14 @@ typedef struct xfs_efi_log_format_64 { xfs_extent_64_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_64_t; +static inline size_t +xfs_efi_log_format64_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format_64) + + nr * sizeof(struct xfs_extent_64); +} + /* * This is the structure used to lay out an efd log item in the * log. The efd_extents array is a variable size array whose @@ -645,6 +669,14 @@ typedef struct xfs_efd_log_format { xfs_extent_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_t; +static inline size_t +xfs_efd_log_format_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format) + + nr * sizeof(struct xfs_extent); +} + typedef struct xfs_efd_log_format_32 { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ @@ -653,6 +685,14 @@ typedef struct xfs_efd_log_format_32 { xfs_extent_32_t efd_extents[]; /* array of extents freed */ } __attribute__((packed)) xfs_efd_log_format_32_t; +static inline size_t +xfs_efd_log_format32_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format_32) + + nr * sizeof(struct xfs_extent_32); +} + typedef struct xfs_efd_log_format_64 { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ @@ -661,6 +701,14 @@ typedef struct xfs_efd_log_format_64 { xfs_extent_64_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_64_t; +static inline size_t +xfs_efd_log_format64_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format_64) + + nr * sizeof(struct xfs_extent_64); +} + /* * RUI/RUD (reverse mapping) log format definitions */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 466cc5c5cd33f..f7e52db8da667 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -66,27 +66,16 @@ xfs_efi_release( xfs_efi_item_free(efip); } -/* - * This returns the number of iovecs needed to log the given efi item. - * We only need 1 iovec for an efi item. It just logs the efi_log_format - * structure. - */ -static inline int -xfs_efi_item_sizeof( - struct xfs_efi_log_item *efip) -{ - return sizeof(struct xfs_efi_log_format) + - efip->efi_format.efi_nextents * sizeof(xfs_extent_t); -} - STATIC void xfs_efi_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { + struct xfs_efi_log_item *efip = EFI_ITEM(lip); + *nvecs += 1; - *nbytes += xfs_efi_item_sizeof(EFI_ITEM(lip)); + *nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents); } /* @@ -112,7 +101,7 @@ xfs_efi_item_format( xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format, - xfs_efi_item_sizeof(efip)); + xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents)); } @@ -155,13 +144,11 @@ xfs_efi_init( { struct xfs_efi_log_item *efip; - uint size; ASSERT(nextents > 0); if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { - size = (uint)(sizeof(struct xfs_efi_log_item) + - (nextents * sizeof(xfs_extent_t))); - efip = kmem_zalloc(size, 0); + efip = kzalloc(xfs_efi_log_item_sizeof(nextents), + GFP_KERNEL | __GFP_NOFAIL); } else { efip = kmem_cache_zalloc(xfs_efi_cache, GFP_KERNEL | __GFP_NOFAIL); @@ -188,12 +175,9 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) { xfs_efi_log_format_t *src_efi_fmt = buf->i_addr; uint i; - uint len = sizeof(xfs_efi_log_format_t) + - src_efi_fmt->efi_nextents * sizeof(xfs_extent_t); - uint len32 = sizeof(xfs_efi_log_format_32_t) + - src_efi_fmt->efi_nextents * sizeof(xfs_extent_32_t); - uint len64 = sizeof(xfs_efi_log_format_64_t) + - src_efi_fmt->efi_nextents * sizeof(xfs_extent_64_t); + uint len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents); + uint len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents); + uint len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents); if (buf->i_len == len) { memcpy(dst_efi_fmt, src_efi_fmt, @@ -251,27 +235,16 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp) kmem_cache_free(xfs_efd_cache, efdp); } -/* - * This returns the number of iovecs needed to log the given efd item. - * We only need 1 iovec for an efd item. It just logs the efd_log_format - * structure. - */ -static inline int -xfs_efd_item_sizeof( - struct xfs_efd_log_item *efdp) -{ - return sizeof(xfs_efd_log_format_t) + - efdp->efd_format.efd_nextents * sizeof(xfs_extent_t); -} - STATIC void xfs_efd_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { + struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + *nvecs += 1; - *nbytes += xfs_efd_item_sizeof(EFD_ITEM(lip)); + *nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents); } /* @@ -296,7 +269,7 @@ xfs_efd_item_format( xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format, - xfs_efd_item_sizeof(efdp)); + xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents)); } /* @@ -345,9 +318,8 @@ xfs_trans_get_efd( ASSERT(nextents > 0); if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { - efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + - nextents * sizeof(struct xfs_extent), - 0); + efdp = kzalloc(xfs_efd_log_item_sizeof(nextents), + GFP_KERNEL | __GFP_NOFAIL); } else { efdp = kmem_cache_zalloc(xfs_efd_cache, GFP_KERNEL | __GFP_NOFAIL); @@ -738,8 +710,7 @@ xlog_recover_efi_commit_pass2( efi_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len < - offsetof(struct xfs_efi_log_format, efi_extents)) { + if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; } @@ -782,10 +753,10 @@ xlog_recover_efd_commit_pass2( struct xfs_efd_log_format *efd_formatp; efd_formatp = item->ri_buf[0].i_addr; - ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + - (efd_formatp->efd_nextents * sizeof(xfs_extent_32_t)))) || - (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + - (efd_formatp->efd_nextents * sizeof(xfs_extent_64_t))))); + ASSERT(item->ri_buf[0].i_len == xfs_efd_log_format32_sizeof( + efd_formatp->efd_nextents) || + item->ri_buf[0].i_len == xfs_efd_log_format64_sizeof( + efd_formatp->efd_nextents)); xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); return 0; diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 186d0f2137f1f..da6a5afa607cf 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -52,6 +52,14 @@ struct xfs_efi_log_item { xfs_efi_log_format_t efi_format; }; +static inline size_t +xfs_efi_log_item_sizeof( + unsigned int nr) +{ + return offsetof(struct xfs_efi_log_item, efi_format) + + xfs_efi_log_format_sizeof(nr); +} + /* * This is the "extent free done" log item. It is used to log * the fact that some extents earlier mentioned in an efi item @@ -64,6 +72,14 @@ struct xfs_efd_log_item { xfs_efd_log_format_t efd_format; }; +static inline size_t +xfs_efd_log_item_sizeof( + unsigned int nr) +{ + return offsetof(struct xfs_efd_log_item, efd_format) + + xfs_efd_log_format_sizeof(nr); +} + /* * Max number of extents in fast allocation path. */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 8485e3b37ca01..ee4b429a2f2c9 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2028,18 +2028,14 @@ xfs_init_caches(void) goto out_destroy_trans_cache; xfs_efd_cache = kmem_cache_create("xfs_efd_item", - (sizeof(struct xfs_efd_log_item) + - XFS_EFD_MAX_FAST_EXTENTS * - sizeof(struct xfs_extent)), - 0, 0, NULL); + xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), + 0, 0, NULL); if (!xfs_efd_cache) goto out_destroy_buf_item_cache; xfs_efi_cache = kmem_cache_create("xfs_efi_item", - (sizeof(struct xfs_efi_log_item) + - XFS_EFI_MAX_FAST_EXTENTS * - sizeof(struct xfs_extent)), - 0, 0, NULL); + xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), + 0, 0, NULL); if (!xfs_efi_cache) goto out_destroy_efd_cache; From 8edbe0cf8b4bbe2cf47513998641797b0aca8ee2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 10 Oct 2022 11:33:47 -0700 Subject: [PATCH 105/239] xfs: check deferred refcount op continuation parameters If we're in the middle of a deferred refcount operation and decide to roll the transaction to avoid overflowing the transaction space, we need to check the new agbno/aglen parameters that we're about to record in the new intent. Specifically, we need to check that the new extent is completely within the filesystem, and that continuation does not put us into a different AG. If the keys of a node block are wrong, the lookup to resume an xfs_refcount_adjust_extents operation can put us into the wrong record block. If this happens, we might not find that we run out of aglen at an exact record boundary, which will cause the loop control to do the wrong thing. The previous patch should take care of that problem, but let's add this extra sanity check to stop corruption problems sooner than later. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 38 ++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 1a50ca53304a2..542f749d0c6a0 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1134,6 +1134,32 @@ xfs_refcount_finish_one_cleanup( xfs_trans_brelse(tp, agbp); } +/* + * Set up a continuation a deferred refcount operation by updating the intent. + * Checks to make sure we're not going to run off the end of the AG. + */ +static inline int +xfs_refcount_continue_op( + struct xfs_btree_cur *cur, + xfs_fsblock_t startblock, + xfs_agblock_t new_agbno, + xfs_extlen_t new_len, + xfs_fsblock_t *new_fsbno) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_perag *pag = cur->bc_ag.pag; + + if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len))) + return -EFSCORRUPTED; + + *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + + ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len)); + ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno)); + + return 0; +} + /* * Process one of the deferred refcount operations. We pass back the * btree cursor to maintain our lock on the btree between calls. @@ -1201,12 +1227,20 @@ xfs_refcount_finish_one( case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, new_len, XFS_REFCOUNT_ADJUST_INCREASE); - *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + if (error) + goto out_drop; + if (*new_len > 0) + error = xfs_refcount_continue_op(rcur, startblock, + new_agbno, *new_len, new_fsb); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, new_len, XFS_REFCOUNT_ADJUST_DECREASE); - *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + if (error) + goto out_drop; + if (*new_len > 0) + error = xfs_refcount_continue_op(rcur, startblock, + new_agbno, *new_len, new_fsb); break; case XFS_REFCOUNT_ALLOC_COW: *new_fsb = startblock + blockcount; From 921ed96b4f4e3bd19da7f775f39234226e6647e7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 25 Oct 2022 15:14:06 -0700 Subject: [PATCH 106/239] xfs: actually abort log recovery on corrupt intent-done log items If log recovery picks up intent-done log items that are not of the correct size it needs to abort recovery and fail the mount. Debug assertions are not good enough. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_extfree_item.c | 20 ++++++++++++++++---- fs/xfs/xfs_rmap_item.c | 6 +++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f7e52db8da667..18c2243513434 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -751,12 +751,24 @@ xlog_recover_efd_commit_pass2( xfs_lsn_t lsn) { struct xfs_efd_log_format *efd_formatp; + int buflen = item->ri_buf[0].i_len; efd_formatp = item->ri_buf[0].i_addr; - ASSERT(item->ri_buf[0].i_len == xfs_efd_log_format32_sizeof( - efd_formatp->efd_nextents) || - item->ri_buf[0].i_len == xfs_efd_log_format64_sizeof( - efd_formatp->efd_nextents)); + + if (buflen < sizeof(struct xfs_efd_log_format)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + efd_formatp, buflen); + return -EFSCORRUPTED; + } + + if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof( + efd_formatp->efd_nextents) && + item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof( + efd_formatp->efd_nextents)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + efd_formatp, buflen); + return -EFSCORRUPTED; + } xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); return 0; diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 27047e73f5829..5a360c384ea5c 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -707,7 +707,11 @@ xlog_recover_rud_commit_pass2( struct xfs_rud_log_format *rud_formatp; rud_formatp = item->ri_buf[0].i_addr; - ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format)); + if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + rud_formatp, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id); return 0; From 9e7e2436c159490fbbadbc4b5a4ee6bc30dae02e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 10 Oct 2022 08:47:59 -0700 Subject: [PATCH 107/239] xfs: move _irec structs to xfs_types.h Structure definitions for incore objects do not belong in the ondisk format header. Move them to the incore types header where they belong. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_format.h | 20 -------------------- fs/xfs/libxfs/xfs_types.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index b55bdfa9c8a8c..005dd65b71cd6 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1564,20 +1564,6 @@ struct xfs_rmap_rec { #define RMAPBT_UNUSED_OFFSET_BITLEN 7 #define RMAPBT_OFFSET_BITLEN 54 -#define XFS_RMAP_ATTR_FORK (1 << 0) -#define XFS_RMAP_BMBT_BLOCK (1 << 1) -#define XFS_RMAP_UNWRITTEN (1 << 2) -#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \ - XFS_RMAP_BMBT_BLOCK) -#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN) -struct xfs_rmap_irec { - xfs_agblock_t rm_startblock; /* extent start block */ - xfs_extlen_t rm_blockcount; /* extent length */ - uint64_t rm_owner; /* extent owner */ - uint64_t rm_offset; /* offset within the owner */ - unsigned int rm_flags; /* state flags */ -}; - /* * Key structure * @@ -1640,12 +1626,6 @@ struct xfs_refcount_key { __be32 rc_startblock; /* starting block number */ }; -struct xfs_refcount_irec { - xfs_agblock_t rc_startblock; /* starting block number */ - xfs_extlen_t rc_blockcount; /* count of free blocks */ - xfs_nlink_t rc_refcount; /* number of inodes linked here */ -}; - #define MAXREFCOUNT ((xfs_nlink_t)~0U) #define MAXREFCEXTLEN ((xfs_extlen_t)~0U) diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index a6b7d98cf68fa..2d9ebc7338b11 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -166,6 +166,26 @@ typedef struct xfs_bmbt_irec xfs_exntst_t br_state; /* extent state */ } xfs_bmbt_irec_t; +struct xfs_refcount_irec { + xfs_agblock_t rc_startblock; /* starting block number */ + xfs_extlen_t rc_blockcount; /* count of free blocks */ + xfs_nlink_t rc_refcount; /* number of inodes linked here */ +}; + +#define XFS_RMAP_ATTR_FORK (1 << 0) +#define XFS_RMAP_BMBT_BLOCK (1 << 1) +#define XFS_RMAP_UNWRITTEN (1 << 2) +#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \ + XFS_RMAP_BMBT_BLOCK) +#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN) +struct xfs_rmap_irec { + xfs_agblock_t rm_startblock; /* extent start block */ + xfs_extlen_t rm_blockcount; /* extent length */ + uint64_t rm_owner; /* extent owner */ + uint64_t rm_offset; /* offset within the owner */ + unsigned int rm_flags; /* state flags */ +}; + /* per-AG block reservation types */ enum xfs_ag_resv_type { XFS_AG_RESV_NONE = 0, From 950f0d50ee7138d7e631aefea8528d485426eda6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 25 Oct 2022 15:07:14 -0700 Subject: [PATCH 108/239] xfs: dump corrupt recovered log intent items to dmesg consistently If log recovery decides that an intent item is corrupt and wants to abort the mount, capture a hexdump of the corrupt log item in the kernel log for further analysis. Some of the log item code already did this, so we're fixing the rest to do it consistently. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_attr_item.c | 19 +++++++++++++------ fs/xfs/xfs_bmap_item.c | 12 ++++++++---- fs/xfs/xfs_extfree_item.c | 6 ++++-- fs/xfs/xfs_refcount_item.c | 16 +++++++++++----- fs/xfs/xfs_rmap_item.c | 10 +++++++--- 5 files changed, 43 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index ee8f678a10a17..2788a6f2edcdb 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -717,31 +717,37 @@ xlog_recover_attri_commit_pass2( /* Validate xfs_attri_log_format before the large memory allocation */ len = sizeof(struct xfs_attri_log_format); if (item->ri_buf[0].i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } if (!xfs_attri_validate(mp, attri_formatp)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } /* Validate the attr name */ if (item->ri_buf[1].i_len != xlog_calc_iovec_len(attri_formatp->alfi_name_len)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[1].i_addr, item->ri_buf[1].i_len); return -EFSCORRUPTED; } /* Validate the attr value, if present */ if (attri_formatp->alfi_value_len != 0) { if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, + item->ri_buf[0].i_len); return -EFSCORRUPTED; } @@ -834,7 +840,8 @@ xlog_recover_attrd_commit_pass2( attrd_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_attrd_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index a1da6205252ba..41323da523d1a 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -644,18 +644,21 @@ xlog_recover_bui_commit_pass2( bui_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents); if (item->ri_buf[0].i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } @@ -694,7 +697,8 @@ xlog_recover_bud_commit_pass2( bud_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 18c2243513434..d5130d1fcfaea 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -216,7 +216,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } return 0; } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->i_addr, + buf->i_len); return -EFSCORRUPTED; } @@ -711,7 +712,8 @@ xlog_recover_efi_commit_pass2( efi_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 24cf4c64ebaab..858e3e9eb4a85 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -523,7 +523,9 @@ xfs_cui_item_recover( type = refc_type; break; default: - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &cuip->cui_format, + sizeof(cuip->cui_format)); error = -EFSCORRUPTED; goto abort_error; } @@ -536,7 +538,8 @@ xfs_cui_item_recover( &new_fsb, &new_len, &rcur); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - refc, sizeof(*refc)); + &cuip->cui_format, + sizeof(cuip->cui_format)); if (error) goto abort_error; @@ -658,13 +661,15 @@ xlog_recover_cui_commit_pass2( cui_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); if (item->ri_buf[0].i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } @@ -703,7 +708,8 @@ xlog_recover_cud_commit_pass2( cud_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 5a360c384ea5c..534504ede1a33 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -557,7 +557,9 @@ xfs_rui_item_recover( type = XFS_RMAP_FREE; break; default: - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &ruip->rui_format, + sizeof(ruip->rui_format)); error = -EFSCORRUPTED; goto abort_error; } @@ -663,13 +665,15 @@ xlog_recover_rui_commit_pass2( rui_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents); if (item->ri_buf[0].i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } From 5a8c345ca8b99a9f54b89991f2f6a20521cb05f4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 11 Oct 2022 11:22:54 -0700 Subject: [PATCH 109/239] xfs: refactor refcount record usage in xchk_refcountbt_rec Consolidate the open-coded xfs_refcount_irec fields into an actual struct and use the existing _btrec_to_irec to decode the ondisk record. This will reduce code churn in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/refcount.c | 54 ++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 9959397f797f1..9e6b36ac80798 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -269,15 +269,13 @@ xchk_refcountbt_process_rmap_fragments( STATIC void xchk_refcountbt_xref_rmap( struct xfs_scrub *sc, - xfs_agblock_t bno, - xfs_extlen_t len, - xfs_nlink_t refcount) + const struct xfs_refcount_irec *irec) { struct xchk_refcnt_check refchk = { - .sc = sc, - .bno = bno, - .len = len, - .refcount = refcount, + .sc = sc, + .bno = irec->rc_startblock, + .len = irec->rc_blockcount, + .refcount = irec->rc_refcount, .seen = 0, }; struct xfs_rmap_irec low; @@ -291,9 +289,9 @@ xchk_refcountbt_xref_rmap( /* Cross-reference with the rmapbt to confirm the refcount. */ memset(&low, 0, sizeof(low)); - low.rm_startblock = bno; + low.rm_startblock = irec->rc_startblock; memset(&high, 0xFF, sizeof(high)); - high.rm_startblock = bno + len - 1; + high.rm_startblock = irec->rc_startblock + irec->rc_blockcount - 1; INIT_LIST_HEAD(&refchk.fragments); error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, @@ -302,7 +300,7 @@ xchk_refcountbt_xref_rmap( goto out_free; xchk_refcountbt_process_rmap_fragments(&refchk); - if (refcount != refchk.seen) + if (irec->rc_refcount != refchk.seen) xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); out_free: @@ -315,17 +313,16 @@ xchk_refcountbt_xref_rmap( /* Cross-reference with the other btrees. */ STATIC void xchk_refcountbt_xref( - struct xfs_scrub *sc, - xfs_agblock_t agbno, - xfs_extlen_t len, - xfs_nlink_t refcount) + struct xfs_scrub *sc, + const struct xfs_refcount_irec *irec) { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; - xchk_xref_is_used_space(sc, agbno, len); - xchk_xref_is_not_inode_chunk(sc, agbno, len); - xchk_refcountbt_xref_rmap(sc, agbno, len, refcount); + xchk_xref_is_used_space(sc, irec->rc_startblock, irec->rc_blockcount); + xchk_xref_is_not_inode_chunk(sc, irec->rc_startblock, + irec->rc_blockcount); + xchk_refcountbt_xref_rmap(sc, irec); } /* Scrub a refcountbt record. */ @@ -334,34 +331,31 @@ xchk_refcountbt_rec( struct xchk_btree *bs, const union xfs_btree_rec *rec) { + struct xfs_refcount_irec irec; xfs_agblock_t *cow_blocks = bs->private; struct xfs_perag *pag = bs->cur->bc_ag.pag; - xfs_agblock_t bno; - xfs_extlen_t len; - xfs_nlink_t refcount; bool has_cowflag; - bno = be32_to_cpu(rec->refc.rc_startblock); - len = be32_to_cpu(rec->refc.rc_blockcount); - refcount = be32_to_cpu(rec->refc.rc_refcount); + xfs_refcount_btrec_to_irec(rec, &irec); /* Only CoW records can have refcount == 1. */ - has_cowflag = (bno & XFS_REFC_COW_START); - if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) + has_cowflag = (irec.rc_startblock & XFS_REFC_COW_START); + if ((irec.rc_refcount == 1 && !has_cowflag) || + (irec.rc_refcount != 1 && has_cowflag)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (has_cowflag) - (*cow_blocks) += len; + (*cow_blocks) += irec.rc_blockcount; /* Check the extent. */ - bno &= ~XFS_REFC_COW_START; + irec.rc_startblock &= ~XFS_REFC_COW_START; - if (!xfs_verify_agbext(pag, bno, len)) + if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (refcount == 0) + if (irec.rc_refcount == 0) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - xchk_refcountbt_xref(bs->sc, bno, len, refcount); + xchk_refcountbt_xref(bs->sc, &irec); return 0; } From 9a50ee4f8db6e4dd0d8d757b7adaf0591776860a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 10 Oct 2022 09:06:24 -0700 Subject: [PATCH 110/239] xfs: track cow/shared record domains explicitly in xfs_refcount_irec Just prior to committing the reflink code into upstream, the xfs maintainer at the time requested that I find a way to shard the refcount records into two domains -- one for records tracking shared extents, and a second for tracking CoW staging extents. The idea here was to minimize mount time CoW reclamation by pushing all the CoW records to the right edge of the keyspace, and it was accomplished by setting the upper bit in rc_startblock. We don't allow AGs to have more than 2^31 blocks, so the bit was free. Unfortunately, this was a very late addition to the codebase, so most of the refcount record processing code still treats rc_startblock as a u32 and pays no attention to whether or not the upper bit (the cow flag) is set. This is a weakness is theoretically exploitable, since we're not fully validating the incoming metadata records. Fuzzing demonstrates practical exploits of this weakness. If the cow flag of a node block key record is corrupted, a lookup operation can go to the wrong record block and start returning records from the wrong cow/shared domain. This causes the math to go all wrong (since cow domain is still implicit in the upper bit of rc_startblock) and we can crash the kernel by tricking xfs into jumping into a nonexistent AG and tripping over xfs_perag_get(mp, ) returning NULL. To fix this, start tracking the domain as an explicit part of struct xfs_refcount_irec, adjust all refcount functions to check the domain of a returned record, and alter the function definitions to accept them where necessary. Found by fuzzing keys[2].cowflag = add in xfs/464. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 146 +++++++++++++++++++---------- fs/xfs/libxfs/xfs_refcount.h | 28 +++++- fs/xfs/libxfs/xfs_refcount_btree.c | 15 ++- fs/xfs/libxfs/xfs_types.h | 6 ++ fs/xfs/scrub/refcount.c | 23 ++--- 5 files changed, 151 insertions(+), 67 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 542f749d0c6a0..0f920eff34c4a 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -46,13 +46,16 @@ STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, int xfs_refcount_lookup_le( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); } @@ -63,13 +66,16 @@ xfs_refcount_lookup_le( int xfs_refcount_lookup_ge( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_GE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); } @@ -80,13 +86,16 @@ xfs_refcount_lookup_ge( int xfs_refcount_lookup_eq( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); } @@ -96,7 +105,17 @@ xfs_refcount_btrec_to_irec( const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec) { - irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); + uint32_t start; + + start = be32_to_cpu(rec->refc.rc_startblock); + if (start & XFS_REFC_COW_START) { + start &= ~XFS_REFC_COW_START; + irec->rc_domain = XFS_REFC_DOMAIN_COW; + } else { + irec->rc_domain = XFS_REFC_DOMAIN_SHARED; + } + + irec->rc_startblock = start; irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); } @@ -114,7 +133,6 @@ xfs_refcount_get_rec( struct xfs_perag *pag = cur->bc_ag.pag; union xfs_btree_rec *rec; int error; - xfs_agblock_t realstart; error = xfs_btree_get_rec(cur, &rec, stat); if (error || !*stat) @@ -124,18 +142,14 @@ xfs_refcount_get_rec( if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) goto out_bad_rec; - /* handle special COW-staging state */ - realstart = irec->rc_startblock; - if (realstart & XFS_REFC_COW_START) { - if (irec->rc_refcount != 1) - goto out_bad_rec; - realstart &= ~XFS_REFC_COW_START; - } else if (irec->rc_refcount < 2) { + /* handle special COW-staging domain */ + if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1) + goto out_bad_rec; + if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2) goto out_bad_rec; - } /* check for valid extent range, including overflow */ - if (!xfs_verify_agbext(pag, realstart, irec->rc_blockcount)) + if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) goto out_bad_rec; if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) @@ -165,12 +179,17 @@ xfs_refcount_update( struct xfs_refcount_irec *irec) { union xfs_btree_rec rec; + uint32_t start; int error; trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); - rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); + + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + rec.refc.rc_startblock = cpu_to_be32(start); rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); + error = xfs_btree_update(cur, &rec); if (error) trace_xfs_refcount_update_error(cur->bc_mp, @@ -192,9 +211,12 @@ xfs_refcount_insert( int error; trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); + cur->bc_rec.rc.rc_startblock = irec->rc_startblock; cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; cur->bc_rec.rc.rc_refcount = irec->rc_refcount; + cur->bc_rec.rc.rc_domain = irec->rc_domain; + error = xfs_btree_insert(cur, i); if (error) goto out_error; @@ -240,7 +262,8 @@ xfs_refcount_delete( } if (error) goto out_error; - error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); + error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, + &found_rec); out_error: if (error) trace_xfs_refcount_delete_error(cur->bc_mp, @@ -339,6 +362,7 @@ xfs_refc_next( STATIC int xfs_refcount_split_extent( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t agbno, bool *shape_changed) { @@ -347,7 +371,7 @@ xfs_refcount_split_extent( int error; *shape_changed = false; - error = xfs_refcount_lookup_le(cur, agbno, &found_rec); + error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); if (error) goto out_error; if (!found_rec) @@ -419,8 +443,8 @@ xfs_refcount_merge_center_extents( * call removes the center and the second one removes the right * extent. */ - error = xfs_refcount_lookup_ge(cur, center->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_ge(cur, center->rc_domain, + center->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -447,8 +471,8 @@ xfs_refcount_merge_center_extents( } /* Enlarge the left extent. */ - error = xfs_refcount_lookup_le(cur, left->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, left->rc_domain, + left->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -489,8 +513,8 @@ xfs_refcount_merge_left_extent( /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ if (cleft->rc_refcount > 1) { - error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, cleft->rc_domain, + cleft->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -508,8 +532,8 @@ xfs_refcount_merge_left_extent( } /* Enlarge the left extent. */ - error = xfs_refcount_lookup_le(cur, left->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, left->rc_domain, + left->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -553,8 +577,8 @@ xfs_refcount_merge_right_extent( * remove it. */ if (cright->rc_refcount > 1) { - error = xfs_refcount_lookup_le(cur, cright->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, cright->rc_domain, + cright->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -572,8 +596,8 @@ xfs_refcount_merge_right_extent( } /* Enlarge the right extent. */ - error = xfs_refcount_lookup_le(cur, right->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, right->rc_domain, + right->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -612,11 +636,17 @@ xfs_refcount_find_left_extents( int flags) { struct xfs_refcount_irec tmp; + enum xfs_refc_domain domain; int error; int found_rec; + if (flags & XFS_FIND_RCEXT_SHARED) + domain = XFS_REFC_DOMAIN_SHARED; + else + domain = XFS_REFC_DOMAIN_COW; + left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; - error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); + error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); if (error) goto out_error; if (!found_rec) @@ -667,6 +697,7 @@ xfs_refcount_find_left_extents( cleft->rc_blockcount = min(aglen, tmp.rc_startblock - agbno); cleft->rc_refcount = 1; + cleft->rc_domain = domain; } } else { /* @@ -676,6 +707,7 @@ xfs_refcount_find_left_extents( cleft->rc_startblock = agbno; cleft->rc_blockcount = aglen; cleft->rc_refcount = 1; + cleft->rc_domain = domain; } trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, cleft, agbno); @@ -701,11 +733,17 @@ xfs_refcount_find_right_extents( int flags) { struct xfs_refcount_irec tmp; + enum xfs_refc_domain domain; int error; int found_rec; + if (flags & XFS_FIND_RCEXT_SHARED) + domain = XFS_REFC_DOMAIN_SHARED; + else + domain = XFS_REFC_DOMAIN_COW; + right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; - error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); + error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); if (error) goto out_error; if (!found_rec) @@ -756,6 +794,7 @@ xfs_refcount_find_right_extents( cright->rc_blockcount = right->rc_startblock - cright->rc_startblock; cright->rc_refcount = 1; + cright->rc_domain = domain; } } else { /* @@ -765,6 +804,7 @@ xfs_refcount_find_right_extents( cright->rc_startblock = agbno; cright->rc_blockcount = aglen; cright->rc_refcount = 1; + cright->rc_domain = domain; } trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, cright, right, agbno + aglen); @@ -929,7 +969,8 @@ xfs_refcount_adjust_extents( if (*aglen == 0) return 0; - error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); + error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, + &found_rec); if (error) goto out_error; @@ -941,6 +982,7 @@ xfs_refcount_adjust_extents( ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; + ext.rc_domain = XFS_REFC_DOMAIN_SHARED; } /* @@ -953,6 +995,8 @@ xfs_refcount_adjust_extents( tmp.rc_blockcount = min(*aglen, ext.rc_startblock - *agbno); tmp.rc_refcount = 1 + adj; + tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; + trace_xfs_refcount_modify_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &tmp); @@ -987,7 +1031,8 @@ xfs_refcount_adjust_extents( break; /* Move the cursor to the start of ext. */ - error = xfs_refcount_lookup_ge(cur, *agbno, + error = xfs_refcount_lookup_ge(cur, + XFS_REFC_DOMAIN_SHARED, *agbno, &found_rec); if (error) goto out_error; @@ -1080,13 +1125,15 @@ xfs_refcount_adjust( /* * Ensure that no rcextents cross the boundary of the adjustment range. */ - error = xfs_refcount_split_extent(cur, agbno, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, + agbno, &shape_changed); if (error) goto out_error; if (shape_changed) shape_changes++; - error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, + agbno + aglen, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1351,7 +1398,8 @@ xfs_refcount_find_shared( *flen = 0; /* Try to find a refcount extent that crosses the start */ - error = xfs_refcount_lookup_le(cur, agbno, &have); + error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, + &have); if (error) goto out_error; if (!have) { @@ -1499,17 +1547,18 @@ xfs_refcount_adjust_cow_extents( return 0; /* Find any overlapping refcount records */ - error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); + error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, + &found_rec); if (error) goto out_error; error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; if (!found_rec) { - ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + - XFS_REFC_COW_START; + ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; + ext.rc_domain = XFS_REFC_DOMAIN_COW; } switch (adj) { @@ -1524,6 +1573,8 @@ xfs_refcount_adjust_cow_extents( tmp.rc_startblock = agbno; tmp.rc_blockcount = aglen; tmp.rc_refcount = 1; + tmp.rc_domain = XFS_REFC_DOMAIN_COW; + trace_xfs_refcount_modify_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &tmp); @@ -1586,16 +1637,16 @@ xfs_refcount_adjust_cow( bool shape_changed; int error; - agbno += XFS_REFC_COW_START; - /* * Ensure that no rcextents cross the boundary of the adjustment range. */ - error = xfs_refcount_split_extent(cur, agbno, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, + agbno, &shape_changed); if (error) goto out_error; - error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, + agbno + aglen, &shape_changed); if (error) goto out_error; @@ -1731,7 +1782,6 @@ xfs_refcount_recover_cow_leftovers( union xfs_btree_irec low; union xfs_btree_irec high; xfs_fsblock_t fsb; - xfs_agblock_t agbno; int error; if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) @@ -1761,7 +1811,7 @@ xfs_refcount_recover_cow_leftovers( /* Find all the leftover CoW staging extents. */ memset(&low, 0, sizeof(low)); memset(&high, 0, sizeof(high)); - low.rc.rc_startblock = XFS_REFC_COW_START; + low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW; high.rc.rc_startblock = -1U; error = xfs_btree_query_range(cur, &low, &high, xfs_refcount_recover_extent, &debris); @@ -1782,8 +1832,8 @@ xfs_refcount_recover_cow_leftovers( &rr->rr_rrec); /* Free the orphan record */ - agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; - fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno); + fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, + rr->rr_rrec.rc_startblock); xfs_refcount_free_cow_extent(tp, fsb, rr->rr_rrec.rc_blockcount); @@ -1814,6 +1864,7 @@ xfs_refcount_recover_cow_leftovers( int xfs_refcount_has_record( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, xfs_extlen_t len, bool *exists) @@ -1825,6 +1876,7 @@ xfs_refcount_has_record( low.rc.rc_startblock = bno; memset(&high, 0xFF, sizeof(high)); high.rc.rc_startblock = bno + len - 1; + low.rc.rc_domain = high.rc.rc_domain = domain; return xfs_btree_has_record(cur, &low, &high, exists); } diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index e8b322de7f3d9..3beb5a30a9c9e 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -14,14 +14,33 @@ struct xfs_bmbt_irec; struct xfs_refcount_irec; extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); +static inline uint32_t +xfs_refcount_encode_startblock( + xfs_agblock_t startblock, + enum xfs_refc_domain domain) +{ + uint32_t start; + + /* + * low level btree operations need to handle the generic btree range + * query functions (which set rc_domain == -1U), so we check that the + * domain is /not/ shared. + */ + start = startblock & ~XFS_REFC_COW_START; + if (domain != XFS_REFC_DOMAIN_SHARED) + start |= XFS_REFC_COW_START; + + return start; +} + enum xfs_refcount_intent_type { XFS_REFCOUNT_INCREASE = 1, XFS_REFCOUNT_DECREASE, @@ -79,7 +98,8 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, #define XFS_REFCOUNT_ITEM_OVERHEAD 32 extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, - xfs_agblock_t bno, xfs_extlen_t len, bool *exists); + enum xfs_refc_domain domain, xfs_agblock_t bno, + xfs_extlen_t len, bool *exists); union xfs_btree_rec; extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 316c1ec0c3c26..e1f7898666831 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -13,6 +13,7 @@ #include "xfs_btree.h" #include "xfs_btree_staging.h" #include "xfs_refcount_btree.h" +#include "xfs_refcount.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -160,7 +161,12 @@ xfs_refcountbt_init_rec_from_cur( struct xfs_btree_cur *cur, union xfs_btree_rec *rec) { - rec->refc.rc_startblock = cpu_to_be32(cur->bc_rec.rc.rc_startblock); + const struct xfs_refcount_irec *irec = &cur->bc_rec.rc; + uint32_t start; + + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + rec->refc.rc_startblock = cpu_to_be32(start); rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount); rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount); } @@ -182,10 +188,13 @@ xfs_refcountbt_key_diff( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { - struct xfs_refcount_irec *rec = &cur->bc_rec.rc; const struct xfs_refcount_key *kp = &key->refc; + const struct xfs_refcount_irec *irec = &cur->bc_rec.rc; + uint32_t start; - return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock; + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + return (int64_t)be32_to_cpu(kp->rc_startblock) - start; } STATIC int64_t diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 2d9ebc7338b11..eb9a98338bb98 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -166,10 +166,16 @@ typedef struct xfs_bmbt_irec xfs_exntst_t br_state; /* extent state */ } xfs_bmbt_irec_t; +enum xfs_refc_domain { + XFS_REFC_DOMAIN_SHARED = 0, + XFS_REFC_DOMAIN_COW, +}; + struct xfs_refcount_irec { xfs_agblock_t rc_startblock; /* starting block number */ xfs_extlen_t rc_blockcount; /* count of free blocks */ xfs_nlink_t rc_refcount; /* number of inodes linked here */ + enum xfs_refc_domain rc_domain; /* shared or cow staging extent? */ }; #define XFS_RMAP_ATTR_FORK (1 << 0) diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 9e6b36ac80798..af5b796ec9ecb 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -334,21 +334,19 @@ xchk_refcountbt_rec( struct xfs_refcount_irec irec; xfs_agblock_t *cow_blocks = bs->private; struct xfs_perag *pag = bs->cur->bc_ag.pag; - bool has_cowflag; xfs_refcount_btrec_to_irec(rec, &irec); /* Only CoW records can have refcount == 1. */ - has_cowflag = (irec.rc_startblock & XFS_REFC_COW_START); - if ((irec.rc_refcount == 1 && !has_cowflag) || - (irec.rc_refcount != 1 && has_cowflag)) + if (irec.rc_domain == XFS_REFC_DOMAIN_SHARED && irec.rc_refcount == 1) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (has_cowflag) + if (irec.rc_domain == XFS_REFC_DOMAIN_COW) { + if (irec.rc_refcount != 1) + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); (*cow_blocks) += irec.rc_blockcount; + } /* Check the extent. */ - irec.rc_startblock &= ~XFS_REFC_COW_START; - if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); @@ -419,7 +417,6 @@ xchk_xref_is_cow_staging( xfs_extlen_t len) { struct xfs_refcount_irec rc; - bool has_cowflag; int has_refcount; int error; @@ -427,8 +424,8 @@ xchk_xref_is_cow_staging( return; /* Find the CoW staging extent. */ - error = xfs_refcount_lookup_le(sc->sa.refc_cur, - agbno + XFS_REFC_COW_START, &has_refcount); + error = xfs_refcount_lookup_le(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW, + agbno, &has_refcount); if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (!has_refcount) { @@ -445,8 +442,7 @@ xchk_xref_is_cow_staging( } /* CoW flag must be set, refcount must be 1. */ - has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); - if (!has_cowflag || rc.rc_refcount != 1) + if (rc.rc_domain != XFS_REFC_DOMAIN_COW || rc.rc_refcount != 1) xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); /* Must be at least as long as what was passed in */ @@ -470,7 +466,8 @@ xchk_xref_is_not_shared( if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) return; - error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); + error = xfs_refcount_has_record(sc->sa.refc_cur, XFS_REFC_DOMAIN_SHARED, + agbno, len, &shared); if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (shared) From 571423a162cd86acb1b010a01c6203369586daa6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 26 Oct 2022 14:23:58 -0700 Subject: [PATCH 111/239] xfs: report refcount domain in tracepoints Now that we've broken out the startblock and shared/cow domain in the incore refcount extent record structure, update the tracepoints to report the domain. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_types.h | 4 ++++ fs/xfs/xfs_trace.h | 48 +++++++++++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index eb9a98338bb98..5ebdda7e10780 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -171,6 +171,10 @@ enum xfs_refc_domain { XFS_REFC_DOMAIN_COW, }; +#define XFS_REFC_DOMAIN_STRINGS \ + { XFS_REFC_DOMAIN_SHARED, "shared" }, \ + { XFS_REFC_DOMAIN_COW, "cow" } + struct xfs_refcount_irec { xfs_agblock_t rc_startblock; /* starting block number */ xfs_extlen_t rc_blockcount; /* count of free blocks */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index cb7c81ba7fa38..372d871bccc5e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -799,6 +799,9 @@ TRACE_DEFINE_ENUM(PE_SIZE_PTE); TRACE_DEFINE_ENUM(PE_SIZE_PMD); TRACE_DEFINE_ENUM(PE_SIZE_PUD); +TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED); +TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW); + TRACE_EVENT(xfs_filemap_fault, TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size, bool write_fault), @@ -2925,6 +2928,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, domain) __field(xfs_agblock_t, startblock) __field(xfs_extlen_t, blockcount) __field(xfs_nlink_t, refcount) @@ -2932,13 +2936,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->domain = irec->rc_domain; __entry->startblock = irec->rc_startblock; __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS), __entry->startblock, __entry->blockcount, __entry->refcount) @@ -2958,6 +2964,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, domain) __field(xfs_agblock_t, startblock) __field(xfs_extlen_t, blockcount) __field(xfs_nlink_t, refcount) @@ -2966,14 +2973,16 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->domain = irec->rc_domain; __entry->startblock = irec->rc_startblock; __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS), __entry->startblock, __entry->blockcount, __entry->refcount, @@ -2994,9 +3003,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) @@ -3004,20 +3015,24 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount) @@ -3038,9 +3053,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) @@ -3049,21 +3066,25 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount, @@ -3086,12 +3107,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) + __field(enum xfs_refc_domain, i3_domain) __field(xfs_agblock_t, i3_startblock) __field(xfs_extlen_t, i3_blockcount) __field(xfs_nlink_t, i3_refcount) @@ -3099,27 +3123,33 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; + __entry->i3_domain = i3->rc_domain; __entry->i3_startblock = i3->rc_startblock; __entry->i3_blockcount = i3->rc_blockcount; __entry->i3_refcount = i3->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount, + __print_symbolic(__entry->i3_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i3_startblock, __entry->i3_blockcount, __entry->i3_refcount) From f492135df0aa0417337f9b8b1cc6d6a994d61d25 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 26 Oct 2022 14:31:27 -0700 Subject: [PATCH 112/239] xfs: refactor domain and refcount checking Create a helper function to ensure that CoW staging extent records have a single refcount and that shared extent records have more than 1 refcount. We'll put this to more use in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 5 +---- fs/xfs/libxfs/xfs_refcount.h | 12 ++++++++++++ fs/xfs/scrub/refcount.c | 10 ++++------ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 0f920eff34c4a..8eaa11470f469 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -142,10 +142,7 @@ xfs_refcount_get_rec( if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) goto out_bad_rec; - /* handle special COW-staging domain */ - if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1) - goto out_bad_rec; - if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2) + if (!xfs_refcount_check_domain(irec)) goto out_bad_rec; /* check for valid extent range, including overflow */ diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 3beb5a30a9c9e..ee32e8eb5a993 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -55,6 +55,18 @@ struct xfs_refcount_intent { xfs_fsblock_t ri_startblock; }; +/* Check that the refcount is appropriate for the record domain. */ +static inline bool +xfs_refcount_check_domain( + const struct xfs_refcount_irec *irec) +{ + if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1) + return false; + if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2) + return false; + return true; +} + void xfs_refcount_increase_extent(struct xfs_trans *tp, struct xfs_bmbt_irec *irec); void xfs_refcount_decrease_extent(struct xfs_trans *tp, diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index af5b796ec9ecb..fe5ffe4f478d8 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -337,14 +337,12 @@ xchk_refcountbt_rec( xfs_refcount_btrec_to_irec(rec, &irec); - /* Only CoW records can have refcount == 1. */ - if (irec.rc_domain == XFS_REFC_DOMAIN_SHARED && irec.rc_refcount == 1) + /* Check the domain and refcount are not incompatible. */ + if (!xfs_refcount_check_domain(&irec)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (irec.rc_domain == XFS_REFC_DOMAIN_COW) { - if (irec.rc_refcount != 1) - xchk_btree_set_corrupt(bs->sc, bs->cur, 0); + + if (irec.rc_domain == XFS_REFC_DOMAIN_COW) (*cow_blocks) += irec.rc_blockcount; - } /* Check the extent. */ if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount)) From 68d0f389179a52555cfd8fa3254e4adcd7576904 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 26 Oct 2022 14:42:48 -0700 Subject: [PATCH 113/239] xfs: remove XFS_FIND_RCEXT_SHARED and _COW Now that we have an explicit enum for shared and CoW staging extents, we can get rid of the old FIND_RCEXT flags. Omit a couple of conversions that disappear in the next patches. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 48 +++++++++++++----------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 8eaa11470f469..ba2ddf177a498 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -617,8 +617,6 @@ xfs_refcount_merge_right_extent( return error; } -#define XFS_FIND_RCEXT_SHARED 1 -#define XFS_FIND_RCEXT_COW 2 /* * Find the left extent and the one after it (cleft). This function assumes * that we've already split any extent crossing agbno. @@ -628,20 +626,14 @@ xfs_refcount_find_left_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *left, struct xfs_refcount_irec *cleft, + enum xfs_refc_domain domain, xfs_agblock_t agbno, - xfs_extlen_t aglen, - int flags) + xfs_extlen_t aglen) { struct xfs_refcount_irec tmp; - enum xfs_refc_domain domain; int error; int found_rec; - if (flags & XFS_FIND_RCEXT_SHARED) - domain = XFS_REFC_DOMAIN_SHARED; - else - domain = XFS_REFC_DOMAIN_COW; - left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); if (error) @@ -659,9 +651,9 @@ xfs_refcount_find_left_extents( if (xfs_refc_next(&tmp) != agbno) return 0; - if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) + if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) return 0; - if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) + if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) return 0; /* We have a left extent; retrieve (or invent) the next right one */ *left = tmp; @@ -725,20 +717,14 @@ xfs_refcount_find_right_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *right, struct xfs_refcount_irec *cright, + enum xfs_refc_domain domain, xfs_agblock_t agbno, - xfs_extlen_t aglen, - int flags) + xfs_extlen_t aglen) { struct xfs_refcount_irec tmp; - enum xfs_refc_domain domain; int error; int found_rec; - if (flags & XFS_FIND_RCEXT_SHARED) - domain = XFS_REFC_DOMAIN_SHARED; - else - domain = XFS_REFC_DOMAIN_COW; - right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); if (error) @@ -756,9 +742,9 @@ xfs_refcount_find_right_extents( if (tmp.rc_startblock != agbno + aglen) return 0; - if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) + if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) return 0; - if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) + if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) return 0; /* We have a right extent; retrieve (or invent) the next left one */ *right = tmp; @@ -827,10 +813,10 @@ xfs_refc_valid( STATIC int xfs_refcount_merge_extents( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t *agbno, xfs_extlen_t *aglen, enum xfs_refc_adjust_op adjust, - int flags, bool *shape_changed) { struct xfs_refcount_irec left = {0}, cleft = {0}; @@ -845,12 +831,12 @@ xfs_refcount_merge_extents( * just below (agbno + aglen) [cright], and just above (agbno + aglen) * [right]. */ - error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, - *aglen, flags); + error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain, + *agbno, *aglen); if (error) return error; - error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, - *aglen, flags); + error = xfs_refcount_find_right_extents(cur, &right, &cright, domain, + *agbno, *aglen); if (error) return error; @@ -1139,8 +1125,8 @@ xfs_refcount_adjust( /* * Try to merge with the left or right extents of the range. */ - error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, - XFS_FIND_RCEXT_SHARED, &shape_changed); + error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, + new_agbno, new_aglen, adj, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1650,8 +1636,8 @@ xfs_refcount_adjust_cow( /* * Try to merge with the left or right extents of the range. */ - error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, - XFS_FIND_RCEXT_COW, &shape_changed); + error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno, + &aglen, adj, &shape_changed); if (error) goto out_error; From f62ac3e0ac33d366fe81e194fee81de9be2cd886 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 26 Oct 2022 14:16:36 -0700 Subject: [PATCH 114/239] xfs: check record domain when accessing refcount records Now that we've separated the startblock and CoW/shared extent domain in the incore refcount record structure, check the domain whenever we retrieve a record to ensure that it's still in the domain that we want. Depending on the circumstances, a change in domain either means we're done processing or that we've found a corruption and need to fail out. The refcount check in xchk_xref_is_cow_staging is redundant since _get_rec has done that for a long time now, so we can get rid of it. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 53 ++++++++++++++++++++++++++++-------- fs/xfs/scrub/refcount.c | 4 +-- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index ba2ddf177a498..27ed4c10d0d0d 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -381,6 +381,8 @@ xfs_refcount_split_extent( error = -EFSCORRUPTED; goto out_error; } + if (rcext.rc_domain != domain) + return 0; if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) return 0; @@ -432,6 +434,9 @@ xfs_refcount_merge_center_extents( trace_xfs_refcount_merge_center_extents(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, center, right); + ASSERT(left->rc_domain == center->rc_domain); + ASSERT(right->rc_domain == center->rc_domain); + /* * Make sure the center and right extents are not in the btree. * If the center extent was synthesized, the first delete call @@ -508,6 +513,8 @@ xfs_refcount_merge_left_extent( trace_xfs_refcount_merge_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, cleft); + ASSERT(left->rc_domain == cleft->rc_domain); + /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ if (cleft->rc_refcount > 1) { error = xfs_refcount_lookup_le(cur, cleft->rc_domain, @@ -569,6 +576,8 @@ xfs_refcount_merge_right_extent( trace_xfs_refcount_merge_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, cright, right); + ASSERT(right->rc_domain == cright->rc_domain); + /* * If the extent ending at agbno+aglen (cright) wasn't synthesized, * remove it. @@ -649,11 +658,9 @@ xfs_refcount_find_left_extents( goto out_error; } - if (xfs_refc_next(&tmp) != agbno) - return 0; - if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) + if (tmp.rc_domain != domain) return 0; - if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) + if (xfs_refc_next(&tmp) != agbno) return 0; /* We have a left extent; retrieve (or invent) the next right one */ *left = tmp; @@ -670,6 +677,9 @@ xfs_refcount_find_left_extents( goto out_error; } + if (tmp.rc_domain != domain) + goto not_found; + /* if tmp starts at the end of our range, just use that */ if (tmp.rc_startblock == agbno) *cleft = tmp; @@ -689,6 +699,7 @@ xfs_refcount_find_left_extents( cleft->rc_domain = domain; } } else { +not_found: /* * No extents, so pretend that there's one covering the whole * range. @@ -740,11 +751,9 @@ xfs_refcount_find_right_extents( goto out_error; } - if (tmp.rc_startblock != agbno + aglen) - return 0; - if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) + if (tmp.rc_domain != domain) return 0; - if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) + if (tmp.rc_startblock != agbno + aglen) return 0; /* We have a right extent; retrieve (or invent) the next left one */ *right = tmp; @@ -761,6 +770,9 @@ xfs_refcount_find_right_extents( goto out_error; } + if (tmp.rc_domain != domain) + goto not_found; + /* if tmp ends at the end of our range, just use that */ if (xfs_refc_next(&tmp) == agbno + aglen) *cright = tmp; @@ -780,6 +792,7 @@ xfs_refcount_find_right_extents( cright->rc_domain = domain; } } else { +not_found: /* * No extents, so pretend that there's one covering the whole * range. @@ -889,7 +902,7 @@ xfs_refcount_merge_extents( aglen); } - return error; + return 0; } /* @@ -961,7 +974,7 @@ xfs_refcount_adjust_extents( error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; - if (!found_rec) { + if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; @@ -1400,6 +1413,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) + goto done; /* If the extent ends before the start, look at the next one */ if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { @@ -1415,6 +1430,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) + goto done; } /* If the extent starts after the range we want, bail out */ @@ -1446,7 +1463,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } - if (tmp.rc_startblock >= agbno + aglen || + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || + tmp.rc_startblock >= agbno + aglen || tmp.rc_startblock != *fbno + *flen) break; *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); @@ -1537,6 +1555,11 @@ xfs_refcount_adjust_cow_extents( error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && + ext.rc_domain != XFS_REFC_DOMAIN_COW)) { + error = -EFSCORRUPTED; + goto out_error; + } if (!found_rec) { ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; @@ -1746,8 +1769,14 @@ xfs_refcount_recover_extent( rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); - list_add_tail(&rr->rr_list, debris); + if (XFS_IS_CORRUPT(cur->bc_mp, + rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { + kmem_free(rr); + return -EFSCORRUPTED; + } + + list_add_tail(&rr->rr_list, debris); return 0; } diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index fe5ffe4f478d8..a26ee0f24ef2a 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -439,8 +439,8 @@ xchk_xref_is_cow_staging( return; } - /* CoW flag must be set, refcount must be 1. */ - if (rc.rc_domain != XFS_REFC_DOMAIN_COW || rc.rc_refcount != 1) + /* CoW lookup returned a shared extent record? */ + if (rc.rc_domain != XFS_REFC_DOMAIN_COW) xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); /* Must be at least as long as what was passed in */ From f1fdc8207840672a46f26414f2c989ec078a153b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 27 Oct 2022 09:48:59 -0700 Subject: [PATCH 115/239] xfs: fix agblocks check in the cow leftover recovery function As we've seen, refcount records use the upper bit of the rc_startblock field to ensure that all the refcount records are at the right side of the refcount btree. This works because an AG is never allowed to have more than (1U << 31) blocks in it. If we ever encounter a filesystem claiming to have that many blocks, we absolutely do not want reflink touching it at all. However, this test at the start of xfs_refcount_recover_cow_leftovers is slightly incorrect -- it /should/ be checking that agblocks isn't larger than the XFS_MAX_CRC_AG_BLOCKS constant, and it should check that the constant is never large enough to conflict with that CoW flag. Note that the V5 superblock verifier has not historically rejected filesystems where agblocks >= XFS_MAX_CRC_AG_BLOCKS, which is why this ended up in the COW recovery routine. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 27ed4c10d0d0d..ad0fb6a7177b4 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1796,7 +1796,9 @@ xfs_refcount_recover_cow_leftovers( xfs_fsblock_t fsb; int error; - if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) + /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */ + BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COW_START); + if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS) return -EOPNOTSUPP; INIT_LIST_HEAD(&debris); From c1ccf967bf962b998f0c096e06a658ece27d10a0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 26 Oct 2022 14:55:04 -0700 Subject: [PATCH 116/239] xfs: fix uninitialized list head in struct xfs_refcount_recovery We're supposed to initialize the list head of an object before adding it to another list. Fix that, and stop using the kmem_{alloc,free} calls from the Irix days. Fixes: 174edb0e46e5 ("xfs: store in-progress CoW allocations in the refcount btree") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index ad0fb6a7177b4..44d4667d43016 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1767,12 +1767,14 @@ xfs_refcount_recover_extent( be32_to_cpu(rec->refc.rc_refcount) != 1)) return -EFSCORRUPTED; - rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); + rr = kmalloc(sizeof(struct xfs_refcount_recovery), + GFP_KERNEL | __GFP_NOFAIL); + INIT_LIST_HEAD(&rr->rr_list); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); if (XFS_IS_CORRUPT(cur->bc_mp, rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { - kmem_free(rr); + kfree(rr); return -EFSCORRUPTED; } @@ -1859,7 +1861,7 @@ xfs_refcount_recover_cow_leftovers( goto out_free; list_del(&rr->rr_list); - kmem_free(rr); + kfree(rr); } return error; @@ -1869,7 +1871,7 @@ xfs_refcount_recover_cow_leftovers( /* Free the leftover list */ list_for_each_entry_safe(rr, n, &debris, rr_list) { list_del(&rr->rr_list); - kmem_free(rr); + kfree(rr); } return error; } From 8b972158afcaa66c538c3ee1d394f096fcd238a8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 10 Oct 2022 11:13:20 -0700 Subject: [PATCH 117/239] xfs: rename XFS_REFC_COW_START to _COWFLAG We've been (ab)using XFS_REFC_COW_START as both an integer quantity and a bit flag, even though it's *only* a bit flag. Rename the variable to reflect its nature and update the cast target since we're not supposed to be comparing it to xfs_agblock_t now. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_format.h | 2 +- fs/xfs/libxfs/xfs_refcount.c | 6 +++--- fs/xfs/libxfs/xfs_refcount.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 005dd65b71cd6..371dc07233e05 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1612,7 +1612,7 @@ unsigned int xfs_refc_block(struct xfs_mount *mp); * on the startblock. This speeds up mount time deletion of stale * staging extents because they're all at the right side of the tree. */ -#define XFS_REFC_COW_START ((xfs_agblock_t)(1U << 31)) +#define XFS_REFC_COWFLAG (1U << 31) #define REFCNTBT_COWFLAG_BITLEN 1 #define REFCNTBT_AGBLOCK_BITLEN 31 diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 44d4667d43016..3f34bafe18dd1 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -108,8 +108,8 @@ xfs_refcount_btrec_to_irec( uint32_t start; start = be32_to_cpu(rec->refc.rc_startblock); - if (start & XFS_REFC_COW_START) { - start &= ~XFS_REFC_COW_START; + if (start & XFS_REFC_COWFLAG) { + start &= ~XFS_REFC_COWFLAG; irec->rc_domain = XFS_REFC_DOMAIN_COW; } else { irec->rc_domain = XFS_REFC_DOMAIN_SHARED; @@ -1799,7 +1799,7 @@ xfs_refcount_recover_cow_leftovers( int error; /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */ - BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COW_START); + BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG); if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS) return -EOPNOTSUPP; diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index ee32e8eb5a993..452f30556f5a9 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -34,9 +34,9 @@ xfs_refcount_encode_startblock( * query functions (which set rc_domain == -1U), so we check that the * domain is /not/ shared. */ - start = startblock & ~XFS_REFC_COW_START; + start = startblock & ~XFS_REFC_COWFLAG; if (domain != XFS_REFC_DOMAIN_SHARED) - start |= XFS_REFC_COW_START; + start |= XFS_REFC_COWFLAG; return start; } From 40ff21432883216aa440b6619d559ad8f7d7a7d9 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 31 Oct 2022 14:23:13 +0100 Subject: [PATCH 118/239] asm-generic: compat: fix compat_arg_u64() and compat_arg_u64_dual() The macros are defined backwards. This affects the following compat syscalls: - compat_sys_truncate64() - compat_sys_ftruncate64() - compat_sys_fallocate() - compat_sys_sync_file_range() - compat_sys_fadvise64_64() - compat_sys_readahead() - compat_sys_pread64() - compat_sys_pwrite64() Fixes: 43d5de2b67d7 ("asm-generic: compat: Support BE for long long args in 32-bit ABIs") Signed-off-by: Andreas Schwab [mpe: Add list of affected syscalls] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/871qqoyvni.fsf_-_@igel.home --- include/asm-generic/compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/compat.h b/include/asm-generic/compat.h index aeb257ad3d1a6..8392caea398f4 100644 --- a/include/asm-generic/compat.h +++ b/include/asm-generic/compat.h @@ -15,7 +15,7 @@ #endif #ifndef compat_arg_u64 -#ifdef CONFIG_CPU_BIG_ENDIAN +#ifndef CONFIG_CPU_BIG_ENDIAN #define compat_arg_u64(name) u32 name##_lo, u32 name##_hi #define compat_arg_u64_dual(name) u32, name##_lo, u32, name##_hi #else From ce883a2ba310cd7c291bb66ce5d207965fca6003 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 31 Oct 2022 15:47:35 +0100 Subject: [PATCH 119/239] powerpc/32: fix syscall wrappers with 64-bit arguments With the introduction of syscall wrappers all wrappers for syscalls with 64-bit arguments must be handled specially, not only those that have unaligned 64-bit arguments. This left out the fallocate() and sync_file_range2() syscalls. Fixes: 7e92e01b7245 ("powerpc: Provide syscall wrapper") Fixes: e23750623835 ("powerpc/32: fix syscall wrappers with 64-bit arguments of unaligned register-pairs") Signed-off-by: Andreas Schwab Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/87mt9cxd6g.fsf_-_@igel.home --- arch/powerpc/include/asm/syscalls.h | 7 +++++++ arch/powerpc/kernel/sys_ppc32.c | 13 ++++++++++++- arch/powerpc/kernel/syscalls/syscall.tbl | 7 +++++-- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h index a1142496cd588..6d51b007b59e5 100644 --- a/arch/powerpc/include/asm/syscalls.h +++ b/arch/powerpc/include/asm/syscalls.h @@ -104,6 +104,13 @@ long sys_ppc_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1, unsigned long len2); long sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, size_t len, int advice); +long sys_ppc_sync_file_range2(int fd, unsigned int flags, + unsigned int offset1, + unsigned int offset2, + unsigned int nbytes1, + unsigned int nbytes2); +long sys_ppc_fallocate(int fd, int mode, u32 offset1, u32 offset2, + u32 len1, u32 len2); #endif #ifdef CONFIG_COMPAT long compat_sys_mmap2(unsigned long addr, size_t len, diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 1ab4a4d95abaf..d451a8229223a 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -112,7 +112,7 @@ PPC32_SYSCALL_DEFINE6(ppc32_fadvise64, advice); } -COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2, +PPC32_SYSCALL_DEFINE6(ppc_sync_file_range2, int, fd, unsigned int, flags, unsigned int, offset1, unsigned int, offset2, unsigned int, nbytes1, unsigned int, nbytes2) @@ -122,3 +122,14 @@ COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2, return ksys_sync_file_range(fd, offset, nbytes, flags); } + +#ifdef CONFIG_PPC32 +SYSCALL_DEFINE6(ppc_fallocate, + int, fd, int, mode, + u32, offset1, u32, offset2, u32, len1, u32, len2) +{ + return ksys_fallocate(fd, mode, + merge_64(offset1, offset2), + merge_64(len1, len2)); +} +#endif diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index e9e0df4f9a61a..a0be127475b1f 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -394,8 +394,11 @@ 305 common signalfd sys_signalfd compat_sys_signalfd 306 common timerfd_create sys_timerfd_create 307 common eventfd sys_eventfd -308 common sync_file_range2 sys_sync_file_range2 compat_sys_ppc_sync_file_range2 -309 nospu fallocate sys_fallocate compat_sys_fallocate +308 32 sync_file_range2 sys_ppc_sync_file_range2 compat_sys_ppc_sync_file_range2 +308 64 sync_file_range2 sys_sync_file_range2 +308 spu sync_file_range2 sys_sync_file_range2 +309 32 fallocate sys_ppc_fallocate compat_sys_fallocate +309 64 fallocate sys_fallocate 310 nospu subpage_prot sys_subpage_prot 311 32 timerfd_settime sys_timerfd_settime32 311 64 timerfd_settime sys_timerfd_settime From 878eb6e48f240d02ed1c9298020a0b6370695f24 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 1 Nov 2022 00:54:13 +0000 Subject: [PATCH 120/239] block: blk_add_rq_to_plug(): clear stale 'last' after flush blk_mq_flush_plug_list() empties ->mq_list and request we'd peeked there before that call is gone; in any case, we are not dealing with a mix of requests for different queues now - there's no requests left in the plug. Signed-off-by: Al Viro Signed-off-by: Jens Axboe --- block/blk-mq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 21cc7c2da0f96..6a789cda68a5b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1262,6 +1262,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) (!blk_queue_nomerges(rq->q) && blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { blk_mq_flush_plug_list(plug, false); + last = NULL; trace_block_plug(rq->q); } From e230d36f7d4cf1b89614e2bb4c2f0c55a16d3259 Mon Sep 17 00:00:00 2001 From: Rick Lindsley Date: Fri, 28 Oct 2022 13:35:11 -0700 Subject: [PATCH 121/239] ibmvnic: change maintainers for vnic driver Changed maintainers for vnic driver, since Dany has new responsibilities. Also added Nick Child as reviewer. Signed-off-by: Rick Lindsley Link: https://lore.kernel.org/r/20221028203509.4070154-1-ricklind@us.ibm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9e437612dd816..9b297be5c6ed4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9775,7 +9775,10 @@ S: Supported F: drivers/pci/hotplug/rpaphp* IBM Power SRIOV Virtual NIC Device Driver -M: Dany Madden +M: Haren Myneni +M: Rick Lindsley +R: Nick Child +R: Dany Madden R: Thomas Falcon L: netdev@vger.kernel.org S: Supported From 363a5328f4b0517e59572118ccfb7c626d81dca9 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sat, 29 Oct 2022 17:41:01 +0800 Subject: [PATCH 122/239] net: tun: fix bugs for oversize packet when napi frags enabled Recently, we got two syzkaller problems because of oversize packet when napi frags enabled. One of the problems is because the first seg size of the iov_iter from user space is very big, it is 2147479538 which is bigger than the threshold value for bail out early in __alloc_pages(). And skb->pfmemalloc is true, __kmalloc_reserve() would use pfmemalloc reserves without __GFP_NOWARN flag. Thus we got a warning as following: ======================================================== WARNING: CPU: 1 PID: 17965 at mm/page_alloc.c:5295 __alloc_pages+0x1308/0x16c4 mm/page_alloc.c:5295 ... Call trace: __alloc_pages+0x1308/0x16c4 mm/page_alloc.c:5295 __alloc_pages_node include/linux/gfp.h:550 [inline] alloc_pages_node include/linux/gfp.h:564 [inline] kmalloc_large_node+0x94/0x350 mm/slub.c:4038 __kmalloc_node_track_caller+0x620/0x8e4 mm/slub.c:4545 __kmalloc_reserve.constprop.0+0x1e4/0x2b0 net/core/skbuff.c:151 pskb_expand_head+0x130/0x8b0 net/core/skbuff.c:1654 __skb_grow include/linux/skbuff.h:2779 [inline] tun_napi_alloc_frags+0x144/0x610 drivers/net/tun.c:1477 tun_get_user+0x31c/0x2010 drivers/net/tun.c:1835 tun_chr_write_iter+0x98/0x100 drivers/net/tun.c:2036 The other problem is because odd IPv6 packets without NEXTHDR_NONE extension header and have big packet length, it is 2127925 which is bigger than ETH_MAX_MTU(65535). After ipv6_gso_pull_exthdrs() in ipv6_gro_receive(), network_header offset and transport_header offset are all bigger than U16_MAX. That would trigger skb->network_header and skb->transport_header overflow error, because they are all '__u16' type. Eventually, it would affect the value for __skb_push(skb, value), and make it be a big value. After __skb_push() in ipv6_gro_receive(), skb->data would less than skb->head, an out of bounds memory bug occurred. That would trigger the problem as following: ================================================================== BUG: KASAN: use-after-free in eth_type_trans+0x100/0x260 ... Call trace: dump_backtrace+0xd8/0x130 show_stack+0x1c/0x50 dump_stack_lvl+0x64/0x7c print_address_description.constprop.0+0xbc/0x2e8 print_report+0x100/0x1e4 kasan_report+0x80/0x120 __asan_load8+0x78/0xa0 eth_type_trans+0x100/0x260 napi_gro_frags+0x164/0x550 tun_get_user+0xda4/0x1270 tun_chr_write_iter+0x74/0x130 do_iter_readv_writev+0x130/0x1ec do_iter_write+0xbc/0x1e0 vfs_writev+0x13c/0x26c To fix the problems, restrict the packet size less than (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN) which has considered reserved skb space in napi_alloc_skb() because transport_header is an offset from skb->head. Add len check in tun_napi_alloc_frags() simply. Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Ziyang Xuan Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221029094101.1653855-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 27c6d235cbda3..946628050f282 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1459,7 +1459,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, int err; int i; - if (it->nr_segs > MAX_SKB_FRAGS + 1) + if (it->nr_segs > MAX_SKB_FRAGS + 1 || + len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN)) return ERR_PTR(-EMSGSIZE); local_bh_disable(); From 02a771c9a68a9f08cce4ec5e324fb1bc4dce7202 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 1 Nov 2022 14:48:52 +1100 Subject: [PATCH 123/239] powerpc/32: Select ARCH_SPLIT_ARG64 On 32-bit kernels, 64-bit syscall arguments are split into two registers. For that to work with syscall wrappers, the prototype of the syscall must have the argument split so that the wrapper macro properly unpacks the arguments from pt_regs. The fanotify_mark() syscall is one such syscall, which already has a split prototype, guarded behind ARCH_SPLIT_ARG64. So select ARCH_SPLIT_ARG64 to get that prototype and fix fanotify_mark() on 32-bit kernels with syscall wrappers. Note also that fanotify_mark() is the only usage of ARCH_SPLIT_ARG64. Fixes: 7e92e01b7245 ("powerpc: Provide syscall wrapper") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221101034852.2340319-1-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 20fb1765238c4..2ca5418457ed2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -147,6 +147,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT + select ARCH_SPLIT_ARG64 if PPC32 select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x From d4bc8271db21ea9f1c86a1ca4d64999f184d4aae Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Oct 2022 09:52:36 +0200 Subject: [PATCH 124/239] netfilter: nf_tables: netlink notifier might race to release objects commit release path is invoked via call_rcu and it runs lockless to release the objects after rcu grace period. The netlink notifier handler might win race to remove objects that the transaction context is still referencing from the commit release path. Call rcu_barrier() to ensure pending rcu callbacks run to completion if the list of transactions to be destroyed is not empty. Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership") Reported-by: syzbot+8f747f62763bc6c32916@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 58d9cbc9ccdc7..2197118aa7b09 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10030,6 +10030,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, nft_net = nft_pernet(net); deleted = 0; mutex_lock(&nft_net->commit_mutex); + if (!list_empty(&nf_tables_destroy_list)) + rcu_barrier(); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && From 26b5934ff4194e13196bedcba373cd4915071d0e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Oct 2022 09:54:45 +0200 Subject: [PATCH 125/239] netfilter: nf_tables: release flow rule object from commit path No need to postpone this to the commit release path, since no packets are walking over this object, this is accessed from control plane only. This helped uncovered UAF triggered by races with the netlink notifier. Fixes: 9dd732e0bdf5 ("netfilter: nf_tables: memleak flow rule from commit path") Reported-by: syzbot+8f747f62763bc6c32916@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2197118aa7b09..76bd4d03dbda4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8465,9 +8465,6 @@ static void nft_commit_release(struct nft_trans *trans) nf_tables_chain_destroy(&trans->ctx); break; case NFT_MSG_DELRULE: - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) - nft_flow_rule_destroy(nft_trans_flow_rule(trans)); - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: @@ -8973,6 +8970,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); + + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: nft_clear(net, nft_trans_set(trans)); From cd73adcdbad3d9e9923b045d3643409e9c148d17 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 30 Oct 2022 22:08:54 +0100 Subject: [PATCH 126/239] ARM: dts: ux500: Add trips to battery thermal zones Recent changes to the thermal framework has made the trip points (trips) for thermal zones compulsory, which made the Ux500 DTS files break validation and also stopped probing because of similar changes to the code. Fix this by adding an "outer bounding box": battery thermal zones should not get warmer than 70 degress, then we will shut down. Fixes: 8c596324232d ("dt-bindings: thermal: Fix missing required property") Fixes: 3fd6d6e2b4e8 ("thermal/of: Rework the thermal device tree initialization") Signed-off-by: Linus Walleij Cc: Daniel Lezcano Cc: linux-pm@vger.kernel.org Link: https://lore.kernel.org/r/20221030210854.346662-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/ste-href.dtsi | 8 ++++++++ arch/arm/boot/dts/ste-snowball.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-codina-tmo.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-codina.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-gavini.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-golden.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-janice.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-kyle.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-skomer.dts | 8 ++++++++ 9 files changed, 72 insertions(+) diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi index fbaa0ce464271..8f1bb78fc1e48 100644 --- a/arch/arm/boot/dts/ste-href.dtsi +++ b/arch/arm/boot/dts/ste-href.dtsi @@ -24,6 +24,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index 1c9094f248939..e2f0cdacba7d1 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -28,6 +28,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-codina-tmo.dts b/arch/arm/boot/dts/ste-ux500-samsung-codina-tmo.dts index d6940e0afa863..27a3ab7e25e13 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-codina-tmo.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-codina-tmo.dts @@ -44,6 +44,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-codina.dts b/arch/arm/boot/dts/ste-ux500-samsung-codina.dts index 5f41256d7f4b4..b88f0c07873dd 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-codina.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-codina.dts @@ -57,6 +57,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts b/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts index 806da3fc33cd7..7231bc7452000 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts @@ -30,6 +30,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts index b0dce91aff4be..9604695edf530 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts @@ -35,6 +35,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-janice.dts b/arch/arm/boot/dts/ste-ux500-samsung-janice.dts index ed5c79c3d04b0..69387e8754a95 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-janice.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-janice.dts @@ -30,6 +30,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-kyle.dts b/arch/arm/boot/dts/ste-ux500-samsung-kyle.dts index c57676faf181b..167846df31045 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-kyle.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-kyle.dts @@ -34,6 +34,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts index 81b341a5ae451..93e5f5ed888d1 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts @@ -30,6 +30,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; From a0e215088ef4955f442af58822da5d2bcc3fbbe7 Mon Sep 17 00:00:00 2001 From: Jay Fang Date: Fri, 28 Oct 2022 18:54:34 +0800 Subject: [PATCH 127/239] MAINTAINERS: Update HiSilicon LPC BUS Driver maintainer Add Jay Fang as the maintainer of the HiSilicon LPC BUS Driver, replacing John Garry. Signed-off-by: Jay Fang Link: https://lore.kernel.org/r/20221028105434.1661264-1-f.fangjian@huawei.com' Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8ae6d1a81824f..d3b729b3cbfcf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9214,7 +9214,7 @@ W: https://www.hisilicon.com F: drivers/i2c/busses/i2c-hisi.c HISILICON LPC BUS DRIVER -M: john.garry@huawei.com +M: Jay Fang S: Maintained W: http://www.hisilicon.com F: Documentation/devicetree/bindings/arm/hisilicon/low-pin-count.yaml From 3f4071cbd2063b917486d1047a4da47718215fee Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:26 +0100 Subject: [PATCH 128/239] firmware: arm_scmi: Cleanup the core driver removal callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Platform drivers .remove callbacks are not supposed to fail and report errors. Such errors are indeed ignored by the core platform drivers and the driver unbind process is anyway completed. The SCMI core platform driver as it is now, instead, bails out reporting an error in case of an explicit unbind request. Fix the removal path by adding proper device links between the core SCMI device and the SCMI protocol devices so that a full SCMI stack unbind is triggered when the core driver is removed. The remove process does not bail out anymore on the anomalous conditions triggered by an explicit unbind but the user is still warned. Reported-by: Uwe Kleine-König Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 11 +++++++++++ drivers/firmware/arm_scmi/common.h | 1 + drivers/firmware/arm_scmi/driver.c | 31 ++++++++++++++++++------------ 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index d4e23101448ae..35bb70724d44b 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -216,9 +216,20 @@ void scmi_device_destroy(struct scmi_device *scmi_dev) device_unregister(&scmi_dev->dev); } +void scmi_device_link_add(struct device *consumer, struct device *supplier) +{ + struct device_link *link; + + link = device_link_add(consumer, supplier, DL_FLAG_AUTOREMOVE_CONSUMER); + + WARN_ON(!link); +} + void scmi_set_handle(struct scmi_device *scmi_dev) { scmi_dev->handle = scmi_handle_get(&scmi_dev->dev); + if (scmi_dev->handle) + scmi_device_link_add(&scmi_dev->dev, scmi_dev->handle->dev); } int scmi_protocol_register(const struct scmi_protocol *proto) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 61aba7447c32a..9b87b5b695355 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -97,6 +97,7 @@ static inline void unpack_scmi_header(u32 msg_hdr, struct scmi_msg_hdr *hdr) struct scmi_revision_info * scmi_revision_area_get(const struct scmi_protocol_handle *ph); int scmi_handle_put(const struct scmi_handle *handle); +void scmi_device_link_add(struct device *consumer, struct device *supplier); struct scmi_handle *scmi_handle_get(struct device *dev); void scmi_set_handle(struct scmi_device *scmi_dev); void scmi_setup_protocol_implemented(const struct scmi_protocol_handle *ph, diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 609ebedee9cb6..7e19b6055d759 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2273,10 +2273,16 @@ int scmi_protocol_device_request(const struct scmi_device_id *id_table) sdev = scmi_get_protocol_device(child, info, id_table->protocol_id, id_table->name); - /* Set handle if not already set: device existed */ - if (sdev && !sdev->handle) - sdev->handle = - scmi_handle_get_from_info_unlocked(info); + if (sdev) { + /* Set handle if not already set: device existed */ + if (!sdev->handle) + sdev->handle = + scmi_handle_get_from_info_unlocked(info); + /* Relink consumer and suppliers */ + if (sdev->handle) + scmi_device_link_add(&sdev->dev, + sdev->handle->dev); + } } else { dev_err(info->dev, "Failed. SCMI protocol %d not active.\n", @@ -2475,20 +2481,17 @@ void scmi_free_channel(struct scmi_chan_info *cinfo, struct idr *idr, int id) static int scmi_remove(struct platform_device *pdev) { - int ret = 0, id; + int ret, id; struct scmi_info *info = platform_get_drvdata(pdev); struct device_node *child; mutex_lock(&scmi_list_mutex); if (info->users) - ret = -EBUSY; - else - list_del(&info->node); + dev_warn(&pdev->dev, + "Still active SCMI users will be forcibly unbound.\n"); + list_del(&info->node); mutex_unlock(&scmi_list_mutex); - if (ret) - return ret; - scmi_notification_exit(&info->handle); mutex_lock(&info->protocols_mtx); @@ -2500,7 +2503,11 @@ static int scmi_remove(struct platform_device *pdev) idr_destroy(&info->active_protocols); /* Safe to free channels since no more users */ - return scmi_cleanup_txrx_channels(info); + ret = scmi_cleanup_txrx_channels(info); + if (ret) + dev_warn(&pdev->dev, "Failed to cleanup SCMI channels.\n"); + + return 0; } static ssize_t protocol_version_show(struct device *dev, From fd96fbc8fad35d6b1872c90df8a2f5d721f14d91 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:27 +0100 Subject: [PATCH 129/239] firmware: arm_scmi: Suppress the driver's bind attributes Suppress the capability to unbind the core SCMI driver since all the SCMI stack protocol drivers depend on it. Fixes: aa4f886f3893 ("firmware: arm_scmi: add basic driver infrastructure for SCMI") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-2-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 7e19b6055d759..94be633b55a08 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2578,6 +2578,7 @@ MODULE_DEVICE_TABLE(of, scmi_of_match); static struct platform_driver scmi_driver = { .driver = { .name = "arm-scmi", + .suppress_bind_attrs = true, .of_match_table = scmi_of_match, .dev_groups = versions_groups, }, From 59172b212ec0dbb97ceb5671d912e6e61fa802d5 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:28 +0100 Subject: [PATCH 130/239] firmware: arm_scmi: Make tx_prepare time out eventually SCMI transports based on shared memory, at start of transmissions, have to wait for the shared Tx channel area to be eventually freed by the SCMI platform before accessing the channel. In fact the channel is owned by the SCMI platform until marked as free by the platform itself and, as such, cannot be used by the agent until relinquished. As a consequence a badly misbehaving SCMI platform firmware could lock the channel indefinitely and make the kernel side SCMI stack loop forever waiting for such channel to be freed, possibly hanging the whole boot sequence. Add a timeout to the existent Tx waiting spin-loop so that, when the system ends up in this situation, the SCMI stack can at least bail-out, nosily warn the user, and abort the transmission. Reported-by: YaxiongTian Suggested-by: YaxiongTian Cc: Vincent Guittot Cc: Etienne Carriere Cc: Florian Fainelli Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-3-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/common.h | 4 +++- drivers/firmware/arm_scmi/driver.c | 1 + drivers/firmware/arm_scmi/mailbox.c | 2 +- drivers/firmware/arm_scmi/optee.c | 2 +- drivers/firmware/arm_scmi/shmem.c | 31 +++++++++++++++++++++++++---- drivers/firmware/arm_scmi/smc.c | 2 +- 6 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 9b87b5b695355..a1c0154c31c6f 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -118,6 +118,7 @@ void scmi_protocol_release(const struct scmi_handle *handle, u8 protocol_id); * * @dev: Reference to device in the SCMI hierarchy corresponding to this * channel + * @rx_timeout_ms: The configured RX timeout in milliseconds. * @handle: Pointer to SCMI entity handle * @no_completion_irq: Flag to indicate that this channel has no completion * interrupt mechanism for synchronous commands. @@ -127,6 +128,7 @@ void scmi_protocol_release(const struct scmi_handle *handle, u8 protocol_id); */ struct scmi_chan_info { struct device *dev; + unsigned int rx_timeout_ms; struct scmi_handle *handle; bool no_completion_irq; void *transport_info; @@ -233,7 +235,7 @@ void scmi_free_channel(struct scmi_chan_info *cinfo, struct idr *idr, int id); struct scmi_shared_mem; void shmem_tx_prepare(struct scmi_shared_mem __iomem *shmem, - struct scmi_xfer *xfer); + struct scmi_xfer *xfer, struct scmi_chan_info *cinfo); u32 shmem_read_header(struct scmi_shared_mem __iomem *shmem); void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem, struct scmi_xfer *xfer); diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 94be633b55a08..985775f210f99 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2013,6 +2013,7 @@ static int scmi_chan_setup(struct scmi_info *info, struct device *dev, return -ENOMEM; cinfo->dev = dev; + cinfo->rx_timeout_ms = info->desc->max_rx_timeout_ms; ret = info->desc->ops->chan_setup(cinfo, info->dev, tx); if (ret) diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index 08ff4d110beb4..1e40cb035044d 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -36,7 +36,7 @@ static void tx_prepare(struct mbox_client *cl, void *m) { struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl); - shmem_tx_prepare(smbox->shmem, m); + shmem_tx_prepare(smbox->shmem, m, smbox->cinfo); } static void rx_callback(struct mbox_client *cl, void *m) diff --git a/drivers/firmware/arm_scmi/optee.c b/drivers/firmware/arm_scmi/optee.c index f42dad997ac9a..2a7aeab40e543 100644 --- a/drivers/firmware/arm_scmi/optee.c +++ b/drivers/firmware/arm_scmi/optee.c @@ -498,7 +498,7 @@ static int scmi_optee_send_message(struct scmi_chan_info *cinfo, msg_tx_prepare(channel->req.msg, xfer); ret = invoke_process_msg_channel(channel, msg_command_size(xfer)); } else { - shmem_tx_prepare(channel->req.shmem, xfer); + shmem_tx_prepare(channel->req.shmem, xfer, cinfo); ret = invoke_process_smt_channel(channel); } diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 0e3eaea5d8526..1dfe534b85184 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -5,10 +5,13 @@ * Copyright (C) 2019 ARM Ltd. */ +#include #include #include #include +#include + #include "common.h" /* @@ -30,16 +33,36 @@ struct scmi_shared_mem { }; void shmem_tx_prepare(struct scmi_shared_mem __iomem *shmem, - struct scmi_xfer *xfer) + struct scmi_xfer *xfer, struct scmi_chan_info *cinfo) { + ktime_t stop; + /* * Ideally channel must be free by now unless OS timeout last * request and platform continued to process the same, wait * until it releases the shared memory, otherwise we may endup - * overwriting its response with new message payload or vice-versa + * overwriting its response with new message payload or vice-versa. + * Giving up anyway after twice the expected channel timeout so as + * not to bail-out on intermittent issues where the platform is + * occasionally a bit slower to answer. + * + * Note that after a timeout is detected we bail-out and carry on but + * the transport functionality is probably permanently compromised: + * this is just to ease debugging and avoid complete hangs on boot + * due to a misbehaving SCMI firmware. */ - spin_until_cond(ioread32(&shmem->channel_status) & - SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); + stop = ktime_add_ms(ktime_get(), 2 * cinfo->rx_timeout_ms); + spin_until_cond((ioread32(&shmem->channel_status) & + SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE) || + ktime_after(ktime_get(), stop)); + if (!(ioread32(&shmem->channel_status) & + SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE)) { + WARN_ON_ONCE(1); + dev_err(cinfo->dev, + "Timeout waiting for a free TX channel !\n"); + return; + } + /* Mark channel busy + clear error */ iowrite32(0x0, &shmem->channel_status); iowrite32(xfer->hdr.poll_completion ? 0 : SCMI_SHMEM_FLAG_INTR_ENABLED, diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c index 745acfdd0b3df..87a7b13cf868b 100644 --- a/drivers/firmware/arm_scmi/smc.c +++ b/drivers/firmware/arm_scmi/smc.c @@ -188,7 +188,7 @@ static int smc_send_message(struct scmi_chan_info *cinfo, */ smc_channel_lock_acquire(scmi_info, xfer); - shmem_tx_prepare(scmi_info->shmem, xfer); + shmem_tx_prepare(scmi_info->shmem, xfer, cinfo); arm_smccc_1_1_invoke(scmi_info->func_id, 0, 0, 0, 0, 0, 0, 0, &res); From be9ba1f7f9e0b565b19f4294f5871da9d654bc6d Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:29 +0100 Subject: [PATCH 131/239] firmware: arm_scmi: Make Rx chan_setup fail on memory errors SCMI Rx channels are optional and they can fail to be setup when not present but anyway channels setup routines must bail-out on memory errors. Make channels setup, and related probing, fail when memory errors are reported on Rx channels. Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-4-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 985775f210f99..f818d00bb2c69 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2045,8 +2045,12 @@ scmi_txrx_setup(struct scmi_info *info, struct device *dev, int prot_id) { int ret = scmi_chan_setup(info, dev, prot_id, true); - if (!ret) /* Rx is optional, hence no error check */ - scmi_chan_setup(info, dev, prot_id, false); + if (!ret) { + /* Rx is optional, report only memory errors */ + ret = scmi_chan_setup(info, dev, prot_id, false); + if (ret && ret != -ENOMEM) + ret = 0; + } return ret; } From 5ffc1c4cb896f8d2cf10309422da3633a616d60f Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:30 +0100 Subject: [PATCH 132/239] firmware: arm_scmi: Fix devres allocation device in virtio transport SCMI virtio transport device managed allocations must use the main platform device in devres operations instead of the channel devices. Cc: Peter Hilber Fixes: 46abe13b5e3d ("firmware: arm_scmi: Add virtio transport") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-5-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/virtio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c index 14709dbc96a1a..36b7686843a42 100644 --- a/drivers/firmware/arm_scmi/virtio.c +++ b/drivers/firmware/arm_scmi/virtio.c @@ -444,12 +444,12 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, for (i = 0; i < vioch->max_msg; i++) { struct scmi_vio_msg *msg; - msg = devm_kzalloc(cinfo->dev, sizeof(*msg), GFP_KERNEL); + msg = devm_kzalloc(dev, sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; if (tx) { - msg->request = devm_kzalloc(cinfo->dev, + msg->request = devm_kzalloc(dev, VIRTIO_SCMI_MAX_PDU_SIZE, GFP_KERNEL); if (!msg->request) @@ -458,7 +458,7 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, refcount_set(&msg->users, 1); } - msg->input = devm_kzalloc(cinfo->dev, VIRTIO_SCMI_MAX_PDU_SIZE, + msg->input = devm_kzalloc(dev, VIRTIO_SCMI_MAX_PDU_SIZE, GFP_KERNEL); if (!msg->input) return -ENOMEM; From 1eff6929aff594fba3182660f7b6213ec0ceda0c Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:31 +0100 Subject: [PATCH 133/239] firmware: arm_scmi: Fix deferred_tx_wq release on error paths Use devres to allocate the dedicated deferred_tx_wq polling workqueue so as to automatically trigger the proper resource release on error path. Reported-by: Dan Carpenter Fixes: 5a3b7185c47c ("firmware: arm_scmi: Add atomic mode support to virtio transport") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-6-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/virtio.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c index 36b7686843a42..33c9b81a55cd1 100644 --- a/drivers/firmware/arm_scmi/virtio.c +++ b/drivers/firmware/arm_scmi/virtio.c @@ -148,7 +148,6 @@ static void scmi_vio_channel_cleanup_sync(struct scmi_vio_channel *vioch) { unsigned long flags; DECLARE_COMPLETION_ONSTACK(vioch_shutdown_done); - void *deferred_wq = NULL; /* * Prepare to wait for the last release if not already released @@ -162,16 +161,11 @@ static void scmi_vio_channel_cleanup_sync(struct scmi_vio_channel *vioch) vioch->shutdown_done = &vioch_shutdown_done; virtio_break_device(vioch->vqueue->vdev); - if (!vioch->is_rx && vioch->deferred_tx_wq) { - deferred_wq = vioch->deferred_tx_wq; + if (!vioch->is_rx && vioch->deferred_tx_wq) /* Cannot be kicked anymore after this...*/ vioch->deferred_tx_wq = NULL; - } spin_unlock_irqrestore(&vioch->lock, flags); - if (deferred_wq) - destroy_workqueue(deferred_wq); - scmi_vio_channel_release(vioch); /* Let any possibly concurrent RX path release the channel */ @@ -416,6 +410,11 @@ static bool virtio_chan_available(struct device *dev, int idx) return vioch && !vioch->cinfo; } +static void scmi_destroy_tx_workqueue(void *deferred_tx_wq) +{ + destroy_workqueue(deferred_tx_wq); +} + static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, bool tx) { @@ -430,6 +429,8 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, /* Setup a deferred worker for polling. */ if (tx && !vioch->deferred_tx_wq) { + int ret; + vioch->deferred_tx_wq = alloc_workqueue(dev_name(&scmi_vdev->dev), WQ_UNBOUND | WQ_FREEZABLE | WQ_SYSFS, @@ -437,6 +438,11 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, if (!vioch->deferred_tx_wq) return -ENOMEM; + ret = devm_add_action_or_reset(dev, scmi_destroy_tx_workqueue, + vioch->deferred_tx_wq); + if (ret) + return ret; + INIT_WORK(&vioch->deferred_tx_work, scmi_vio_deferred_tx_worker); } From c4a7b9b587ca1bb4678d48d8be7132492b23a81c Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:33 +0100 Subject: [PATCH 134/239] arm64: dts: juno: Add thermal critical trip points When thermnal zones are defined, trip points definitions are mandatory. Define a couple of critical trip points for monitoring of existing PMIC and SOC thermal zones. This was lost between txt to yaml conversion and was re-enforced recently via the commit 8c596324232d ("dt-bindings: thermal: Fix missing required property") Cc: Rob Herring Cc: Krzysztof Kozlowski Cc: devicetree@vger.kernel.org Signed-off-by: Cristian Marussi Fixes: f7b636a8d83c ("arm64: dts: juno: add thermal zones for scpi sensors") Link: https://lore.kernel.org/r/20221028140833.280091-8-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- arch/arm64/boot/dts/arm/juno-base.dtsi | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index 2f27619d8abd5..8b4d280b1e7e7 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -751,12 +751,26 @@ polling-delay = <1000>; polling-delay-passive = <100>; thermal-sensors = <&scpi_sensors0 0>; + trips { + pmic_crit0: trip0 { + temperature = <90000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; soc { polling-delay = <1000>; polling-delay-passive = <100>; thermal-sensors = <&scpi_sensors0 3>; + trips { + soc_crit0: trip0 { + temperature = <80000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; big_cluster_thermal_zone: big-cluster { From 569bea74c94d37785682b11bab76f557520477cd Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Thu, 27 Oct 2022 20:13:53 +0800 Subject: [PATCH 135/239] i2c: piix4: Fix adapter not be removed in piix4_remove() In piix4_probe(), the piix4 adapter will be registered in: piix4_probe() piix4_add_adapters_sb800() / piix4_add_adapter() i2c_add_adapter() Based on the probed device type, piix4_add_adapters_sb800() or single piix4_add_adapter() will be called. For the former case, piix4_adapter_count is set as the number of adapters, while for antoher case it is not set and kept default *zero*. When piix4 is removed, piix4_remove() removes the adapters added in piix4_probe(), basing on the piix4_adapter_count value. Because the count is zero for the single adapter case, the adapter won't be removed and makes the sources allocated for adapter leaked, such as the i2c client and device. These sources can still be accessed by i2c or bus and cause problems. An easily reproduced case is that if a new adapter is registered, i2c will get the leaked adapter and try to call smbus_algorithm, which was already freed: Triggered by: rmmod i2c_piix4 && modprobe max31730 BUG: unable to handle page fault for address: ffffffffc053d860 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 3752 Comm: modprobe Tainted: G Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:i2c_default_probe (drivers/i2c/i2c-core-base.c:2259) i2c_core RSP: 0018:ffff888107477710 EFLAGS: 00000246 ... i2c_detect (drivers/i2c/i2c-core-base.c:2302) i2c_core __process_new_driver (drivers/i2c/i2c-core-base.c:1336) i2c_core bus_for_each_dev (drivers/base/bus.c:301) i2c_for_each_dev (drivers/i2c/i2c-core-base.c:1823) i2c_core i2c_register_driver (drivers/i2c/i2c-core-base.c:1861) i2c_core do_one_initcall (init/main.c:1296) do_init_module (kernel/module/main.c:2455) ... ---[ end trace 0000000000000000 ]--- Fix this problem by correctly set piix4_adapter_count as 1 for the single adapter so it can be normally removed. Fixes: 528d53a1592b ("i2c: piix4: Fix probing of reserved ports on AMD Family 16h Model 30h") Signed-off-by: Chen Zhongjin Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-piix4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 39cb1b7bb8656..809fbd014cd68 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -1080,6 +1080,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id) "", &piix4_main_adapters[0]); if (retval < 0) return retval; + piix4_adapter_count = 1; } /* Check for auxiliary SMBus on some AMD chipsets */ From cdbf26251d3b35c4ccaea0c3a6de4318f727d3d2 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 20 Oct 2022 16:39:33 +0200 Subject: [PATCH 136/239] i2c: tegra: Allocate DMA memory for DMA engine When the I2C controllers are running in DMA mode, it is the DMA engine that performs the memory accesses rather than the I2C controller. Pass the DMA engine's struct device pointer to the DMA API to make sure the correct DMA operations are used. This fixes an issue where the DMA engine's SMMU stream ID needs to be misleadingly set for the I2C controllers in device tree. Suggested-by: Robin Murphy Signed-off-by: Thierry Reding Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 954022c04cc42..3869c258a5296 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -284,6 +284,7 @@ struct tegra_i2c_dev { struct dma_chan *tx_dma_chan; struct dma_chan *rx_dma_chan; unsigned int dma_buf_size; + struct device *dma_dev; dma_addr_t dma_phys; void *dma_buf; @@ -420,7 +421,7 @@ static int tegra_i2c_dma_submit(struct tegra_i2c_dev *i2c_dev, size_t len) static void tegra_i2c_release_dma(struct tegra_i2c_dev *i2c_dev) { if (i2c_dev->dma_buf) { - dma_free_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, + dma_free_coherent(i2c_dev->dma_dev, i2c_dev->dma_buf_size, i2c_dev->dma_buf, i2c_dev->dma_phys); i2c_dev->dma_buf = NULL; } @@ -472,10 +473,13 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) i2c_dev->tx_dma_chan = chan; + WARN_ON(i2c_dev->tx_dma_chan->device != i2c_dev->rx_dma_chan->device); + i2c_dev->dma_dev = chan->device->dev; + i2c_dev->dma_buf_size = i2c_dev->hw->quirks->max_write_len + I2C_PACKET_HEADER_SIZE; - dma_buf = dma_alloc_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, + dma_buf = dma_alloc_coherent(i2c_dev->dma_dev, i2c_dev->dma_buf_size, &dma_phys, GFP_KERNEL | __GFP_NOWARN); if (!dma_buf) { dev_err(i2c_dev->dev, "failed to allocate DMA buffer\n"); @@ -1272,7 +1276,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->dma_mode) { if (i2c_dev->msg_read) { - dma_sync_single_for_device(i2c_dev->dev, + dma_sync_single_for_device(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_FROM_DEVICE); @@ -1280,7 +1284,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (err) return err; } else { - dma_sync_single_for_cpu(i2c_dev->dev, + dma_sync_single_for_cpu(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_TO_DEVICE); } @@ -1293,7 +1297,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE, msg->buf, msg->len); - dma_sync_single_for_device(i2c_dev->dev, + dma_sync_single_for_device(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_TO_DEVICE); @@ -1344,7 +1348,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, } if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) { - dma_sync_single_for_cpu(i2c_dev->dev, + dma_sync_single_for_cpu(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_FROM_DEVICE); From d6643d7207c572c1b0305ed505101f15502c6c87 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 6 Oct 2022 16:54:40 +0200 Subject: [PATCH 137/239] i2c: i801: add lis3lv02d's I2C address for Vostro 5568 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dell Vostro 5568 laptop has lis3lv02d, but its i2c address is not known to the kernel. Add this address. Output of "cat /sys/devices/platform/lis3lv02d/position" on Dell Vostro 5568 laptop: - Horizontal: (-18,0,1044) - Front elevated: (522,-18,1080) - Left elevated: (-18,-360,1080) - Upside down: (36,108,-1134) Signed-off-by: Nam Cao Reviewed-by: Jean Delvare Reviewed-by: Pali Rohár Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index e06509edc5f39..1fda1eaa6d6ab 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1243,6 +1243,7 @@ static const struct { */ { "Latitude 5480", 0x29 }, { "Vostro V131", 0x1d }, + { "Vostro 5568", 0x29 }, }; static void register_dell_lis3lv02d_i2c_device(struct i801_priv *priv) From ff52fe006fdeacee49745dabed26154db52a6343 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Sun, 9 Oct 2022 10:35:27 +0800 Subject: [PATCH 138/239] drm/imx: Kconfig: Remove duplicated 'select DRM_KMS_HELPER' line A duplicated line 'select DRM_KMS_HELPER' was introduced in Kconfig file by commit 09717af7d13d ("drm: Remove CONFIG_DRM_KMS_CMA_HELPER option"), so remove it. Fixes: 09717af7d13d ("drm: Remove CONFIG_DRM_KMS_CMA_HELPER option") Signed-off-by: Liu Ying Reviewed-by: Philipp Zabel Signed-off-by: Philipp Zabel Link: https://patchwork.freedesktop.org/patch/msgid/20221009023527.3669647-1-victor.liu@nxp.com --- drivers/gpu/drm/imx/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/imx/Kconfig b/drivers/gpu/drm/imx/Kconfig index 975de4ff7313c..fd5b2471fdf0a 100644 --- a/drivers/gpu/drm/imx/Kconfig +++ b/drivers/gpu/drm/imx/Kconfig @@ -4,7 +4,6 @@ config DRM_IMX select DRM_KMS_HELPER select VIDEOMODE_HELPERS select DRM_GEM_DMA_HELPER - select DRM_KMS_HELPER depends on DRM && (ARCH_MXC || ARCH_MULTIPLATFORM || COMPILE_TEST) depends on IMX_IPUV3_CORE help From fc007fb815ab5395c3962c09b79a1630b0fbed9c Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Tue, 13 Sep 2022 13:55:44 -0700 Subject: [PATCH 139/239] drm/imx: imx-tve: Fix return type of imx_tve_connector_mode_valid The mode_valid field in drm_connector_helper_funcs is expected to be of type: enum drm_mode_status (* mode_valid) (struct drm_connector *connector, struct drm_display_mode *mode); The mismatched return type breaks forward edge kCFI since the underlying function definition does not match the function hook definition. The return type of imx_tve_connector_mode_valid should be changed from int to enum drm_mode_status. Reported-by: Dan Carpenter Link: https://github.com/ClangBuiltLinux/linux/issues/1703 Cc: llvm@lists.linux.dev Signed-off-by: Nathan Huckleberry Reviewed-by: Nathan Chancellor Reviewed-by: Fabio Estevam Reviewed-by: Philipp Zabel Signed-off-by: Philipp Zabel Link: https://patchwork.freedesktop.org/patch/msgid/20220913205544.155106-1-nhuck@google.com --- drivers/gpu/drm/imx/imx-tve.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c index 6b34fac3f73a0..ab4d1c878fda3 100644 --- a/drivers/gpu/drm/imx/imx-tve.c +++ b/drivers/gpu/drm/imx/imx-tve.c @@ -218,8 +218,9 @@ static int imx_tve_connector_get_modes(struct drm_connector *connector) return ret; } -static int imx_tve_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +imx_tve_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { struct imx_tve *tve = con_to_tve(connector); unsigned long rate; From 6c412da54c80a54b1a8b7f89677f6e82f0fabec4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 29 Oct 2022 22:57:11 +0200 Subject: [PATCH 140/239] sfc: Fix an error handling path in efx_pci_probe() If an error occurs after the first kzalloc() the corresponding memory allocation is never freed. Add the missing kfree() in the error handling path, as already done in the remove() function. Fixes: 7e773594dada ("sfc: Separate efx_nic memory from net_device memory") Signed-off-by: Christophe JAILLET Acked-by: Martin Habets Link: https://lore.kernel.org/r/dc114193121c52c8fa3779e49bdd99d4b41344a9.1667077009.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 054d5ce6029e4..0556542d7a6b6 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1059,8 +1059,10 @@ static int efx_pci_probe(struct pci_dev *pci_dev, /* Allocate and initialise a struct net_device */ net_dev = alloc_etherdev_mq(sizeof(probe_data), EFX_MAX_CORE_TX_QUEUES); - if (!net_dev) - return -ENOMEM; + if (!net_dev) { + rc = -ENOMEM; + goto fail0; + } probe_ptr = netdev_priv(net_dev); *probe_ptr = probe_data; efx->net_dev = net_dev; @@ -1132,6 +1134,8 @@ static int efx_pci_probe(struct pci_dev *pci_dev, WARN_ON(rc > 0); netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc); free_netdev(net_dev); + fail0: + kfree(probe_data); return rc; } From a6dd6f39008bb3ef7c73ef0a2acc2a4209555bd8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 28 Oct 2022 17:12:19 +0300 Subject: [PATCH 141/239] x86/tdx: Prepare for using "INFO" call for a second purpose The TDG.VP.INFO TDCALL provides the guest with various details about the TDX system that the guest needs to run. Only one field is currently used: 'gpa_width' which tells the guest which PTE bits mark pages shared or private. A second field is now needed: the guest "TD attributes" to tell if virtualization exceptions are configured in a way that can harm the guest. Make the naming and calling convention more generic and discrete from the mask-centric one. Thanks to Sathya for the inspiration here, but there's no code, comments or changelogs left from where he started. Signed-off-by: Dave Hansen Acked-by: Kirill A. Shutemov Tested-by: Kirill A. Shutemov Cc: stable@vger.kernel.org --- arch/x86/coco/tdx/tdx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 928dcf7a20d98..3fee96931ff56 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -98,7 +98,7 @@ static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); } -static u64 get_cc_mask(void) +static void tdx_parse_tdinfo(u64 *cc_mask) { struct tdx_module_output out; unsigned int gpa_width; @@ -121,7 +121,7 @@ static u64 get_cc_mask(void) * The highest bit of a guest physical address is the "sharing" bit. * Set it for shared pages and clear it for private pages. */ - return BIT_ULL(gpa_width - 1); + *cc_mask = BIT_ULL(gpa_width - 1); } /* @@ -758,7 +758,7 @@ void __init tdx_early_init(void) setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); cc_set_vendor(CC_VENDOR_INTEL); - cc_mask = get_cc_mask(); + tdx_parse_tdinfo(&cc_mask); cc_set_mask(cc_mask); /* From 024f4b2e1f874934943eb2d3d288ebc52c79f55c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 17 Oct 2022 10:01:57 +0100 Subject: [PATCH 142/239] arm64: entry: avoid kprobe recursion The cortex_a76_erratum_1463225_debug_handler() function is called when handling debug exceptions (and synchronous exceptions from BRK instructions), and so is called when a probed function executes. If the compiler does not inline cortex_a76_erratum_1463225_debug_handler(), it can be probed. If cortex_a76_erratum_1463225_debug_handler() is probed, any debug exception or software breakpoint exception will result in recursive exceptions leading to a stack overflow. This can be triggered with the ftrace multiple_probes selftest, and as per the example splat below. This is a regression caused by commit: 6459b8469753e9fe ("arm64: entry: consolidate Cortex-A76 erratum 1463225 workaround") ... which removed the NOKPROBE_SYMBOL() annotation associated with the function. My intent was that cortex_a76_erratum_1463225_debug_handler() would be inlined into its caller, el1_dbg(), which is marked noinstr and cannot be probed. Mark cortex_a76_erratum_1463225_debug_handler() as __always_inline to ensure this. Example splat prior to this patch (with recursive entries elided): | # echo p cortex_a76_erratum_1463225_debug_handler > /sys/kernel/debug/tracing/kprobe_events | # echo p do_el0_svc >> /sys/kernel/debug/tracing/kprobe_events | # echo 1 > /sys/kernel/debug/tracing/events/kprobes/enable | Insufficient stack space to handle exception! | ESR: 0x0000000096000047 -- DABT (current EL) | FAR: 0xffff800009cefff0 | Task stack: [0xffff800009cf0000..0xffff800009cf4000] | IRQ stack: [0xffff800008000000..0xffff800008004000] | Overflow stack: [0xffff00007fbc00f0..0xffff00007fbc10f0] | CPU: 0 PID: 145 Comm: sh Not tainted 6.0.0 #2 | Hardware name: linux,dummy-virt (DT) | pstate: 604003c5 (nZCv DAIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : arm64_enter_el1_dbg+0x4/0x20 | lr : el1_dbg+0x24/0x5c | sp : ffff800009cf0000 | x29: ffff800009cf0000 x28: ffff000002c74740 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: 00000000604003c5 x22: ffff80000801745c x21: 0000aaaac95ac068 | x20: 00000000f2000004 x19: ffff800009cf0040 x18: 0000000000000000 | x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 | x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 | x11: 0000000000000010 x10: ffff800008c87190 x9 : ffff800008ca00d0 | x8 : 000000000000003c x7 : 0000000000000000 x6 : 0000000000000000 | x5 : 0000000000000000 x4 : 0000000000000000 x3 : 00000000000043a4 | x2 : 00000000f2000004 x1 : 00000000f2000004 x0 : ffff800009cf0040 | Kernel panic - not syncing: kernel stack overflow | CPU: 0 PID: 145 Comm: sh Not tainted 6.0.0 #2 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0xe4/0x104 | show_stack+0x18/0x4c | dump_stack_lvl+0x64/0x7c | dump_stack+0x18/0x38 | panic+0x14c/0x338 | test_taint+0x0/0x2c | panic_bad_stack+0x104/0x118 | handle_bad_stack+0x34/0x48 | __bad_stack+0x78/0x7c | arm64_enter_el1_dbg+0x4/0x20 | el1h_64_sync_handler+0x40/0x98 | el1h_64_sync+0x64/0x68 | cortex_a76_erratum_1463225_debug_handler+0x0/0x34 ... | el1h_64_sync_handler+0x40/0x98 | el1h_64_sync+0x64/0x68 | cortex_a76_erratum_1463225_debug_handler+0x0/0x34 ... | el1h_64_sync_handler+0x40/0x98 | el1h_64_sync+0x64/0x68 | cortex_a76_erratum_1463225_debug_handler+0x0/0x34 | el1h_64_sync_handler+0x40/0x98 | el1h_64_sync+0x64/0x68 | do_el0_svc+0x0/0x28 | el0t_64_sync_handler+0x84/0xf0 | el0t_64_sync+0x18c/0x190 | Kernel Offset: disabled | CPU features: 0x0080,00005021,19001080 | Memory Limit: none | ---[ end Kernel panic - not syncing: kernel stack overflow ]--- With this patch, cortex_a76_erratum_1463225_debug_handler() is inlined into el1_dbg(), and el1_dbg() cannot be probed: | # echo p cortex_a76_erratum_1463225_debug_handler > /sys/kernel/debug/tracing/kprobe_events | sh: write error: No such file or directory | # grep -w cortex_a76_erratum_1463225_debug_handler /proc/kallsyms | wc -l | 0 | # echo p el1_dbg > /sys/kernel/debug/tracing/kprobe_events | sh: write error: Invalid argument | # grep -w el1_dbg /proc/kallsyms | wc -l | 1 Fixes: 6459b8469753 ("arm64: entry: consolidate Cortex-A76 erratum 1463225 workaround") Cc: # 5.12.x Signed-off-by: Mark Rutland Cc: Will Deacon Link: https://lore.kernel.org/r/20221017090157.2881408-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry-common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 9173fad279af9..27369fa1c032c 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -329,7 +329,8 @@ static void cortex_a76_erratum_1463225_svc_handler(void) __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0); } -static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +static __always_inline bool +cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) { if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) return false; From 71ee71d7adcba648077997a29a91158d20c40b09 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Tue, 1 Nov 2022 01:41:00 -0600 Subject: [PATCH 143/239] cxl/region: Fix decoder allocation crash When an intermediate port's decoders have been exhausted by existing regions, and creating a new region with the port in question in it's hierarchical path is attempted, cxl_port_attach_region() fails to find a port decoder (as would be expected), and drops into the failure / cleanup path. However, during cleanup of the region reference, a sanity check attempts to dereference the decoder, which in the above case didn't exist. This causes a NULL pointer dereference BUG. To fix this, refactor the decoder allocation and de-allocation into helper routines, and in this 'free' routine, check that the decoder, @cxld, is valid before attempting any operations on it. Cc: Suggested-by: Dan Williams Signed-off-by: Vishal Verma Reviewed-by: Dave Jiang Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders") Link: https://lore.kernel.org/r/20221101074100.1732003-1-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 67 ++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c49d9a5f1091c..bb6f4fc84a3ff 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -687,18 +687,27 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, return cxl_rr; } -static void free_region_ref(struct cxl_region_ref *cxl_rr) +static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr) { - struct cxl_port *port = cxl_rr->port; struct cxl_region *cxlr = cxl_rr->region; struct cxl_decoder *cxld = cxl_rr->decoder; + if (!cxld) + return; + dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n"); if (cxld->region == cxlr) { cxld->region = NULL; put_device(&cxlr->dev); } +} +static void free_region_ref(struct cxl_region_ref *cxl_rr) +{ + struct cxl_port *port = cxl_rr->port; + struct cxl_region *cxlr = cxl_rr->region; + + cxl_rr_free_decoder(cxl_rr); xa_erase(&port->regions, (unsigned long)cxlr); xa_destroy(&cxl_rr->endpoints); kfree(cxl_rr); @@ -729,6 +738,33 @@ static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr, return 0; } +static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, + struct cxl_region_ref *cxl_rr) +{ + struct cxl_decoder *cxld; + + if (port == cxled_to_port(cxled)) + cxld = &cxled->cxld; + else + cxld = cxl_region_find_decoder(port, cxlr); + if (!cxld) { + dev_dbg(&cxlr->dev, "%s: no decoder available\n", + dev_name(&port->dev)); + return -EBUSY; + } + + if (cxld->region) { + dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", + dev_name(&port->dev), dev_name(&cxld->dev), + dev_name(&cxld->region->dev)); + return -EBUSY; + } + + cxl_rr->decoder = cxld; + return 0; +} + /** * cxl_port_attach_region() - track a region's interest in a port by endpoint * @port: port to add a new region reference 'struct cxl_region_ref' @@ -795,12 +831,6 @@ static int cxl_port_attach_region(struct cxl_port *port, cxl_rr->nr_targets++; nr_targets_inc = true; } - - /* - * The decoder for @cxlr was allocated when the region was first - * attached to @port. - */ - cxld = cxl_rr->decoder; } else { cxl_rr = alloc_region_ref(port, cxlr); if (IS_ERR(cxl_rr)) { @@ -811,26 +841,11 @@ static int cxl_port_attach_region(struct cxl_port *port, } nr_targets_inc = true; - if (port == cxled_to_port(cxled)) - cxld = &cxled->cxld; - else - cxld = cxl_region_find_decoder(port, cxlr); - if (!cxld) { - dev_dbg(&cxlr->dev, "%s: no decoder available\n", - dev_name(&port->dev)); - goto out_erase; - } - - if (cxld->region) { - dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", - dev_name(&port->dev), dev_name(&cxld->dev), - dev_name(&cxld->region->dev)); - rc = -EBUSY; + rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr); + if (rc) goto out_erase; - } - - cxl_rr->decoder = cxld; } + cxld = cxl_rr->decoder; rc = cxl_rr_ep_add(cxl_rr, cxled); if (rc) { From 373e715e31bf4e0f129befe87613a278fac228d3 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 28 Oct 2022 17:12:20 +0300 Subject: [PATCH 144/239] x86/tdx: Panic on bad configs that #VE on "private" memory access All normal kernel memory is "TDX private memory". This includes everything from kernel stacks to kernel text. Handling exceptions on arbitrary accesses to kernel memory is essentially impossible because they can happen in horribly nasty places like kernel entry/exit. But, TDX hardware can theoretically _deliver_ a virtualization exception (#VE) on any access to private memory. But, it's not as bad as it sounds. TDX can be configured to never deliver these exceptions on private memory with a "TD attribute" called ATTR_SEPT_VE_DISABLE. The guest has no way to *set* this attribute, but it can check it. Ensure ATTR_SEPT_VE_DISABLE is set in early boot. panic() if it is unset. There is no sane way for Linux to run with this attribute clear so a panic() is appropriate. There's small window during boot before the check where kernel has an early #VE handler. But the handler is only for port I/O and will also panic() as soon as it sees any other #VE, such as a one generated by a private memory access. [ dhansen: Rewrite changelog and rebase on new tdx_parse_tdinfo(). Add Kirill's tested-by because I made changes since he wrote this. ] Fixes: 9a22bf6debbf ("x86/traps: Add #VE support for TDX guest") Reported-by: ruogui.ygr@alibaba-inc.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Tested-by: Kirill A. Shutemov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20221028141220.29217-3-kirill.shutemov%40linux.intel.com --- arch/x86/coco/tdx/tdx.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 3fee96931ff56..b8998cf0508a6 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -34,6 +34,8 @@ #define VE_GET_PORT_NUM(e) ((e) >> 16) #define VE_IS_IO_STRING(e) ((e) & BIT(4)) +#define ATTR_SEPT_VE_DISABLE BIT(28) + /* * Wrapper for standard use of __tdx_hypercall with no output aside from * return code. @@ -102,6 +104,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask) { struct tdx_module_output out; unsigned int gpa_width; + u64 td_attr; /* * TDINFO TDX module call is used to get the TD execution environment @@ -109,19 +112,27 @@ static void tdx_parse_tdinfo(u64 *cc_mask) * information, etc. More details about the ABI can be found in TDX * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL * [TDG.VP.INFO]. - * - * The GPA width that comes out of this call is critical. TDX guests - * can not meaningfully run without it. */ tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out); - gpa_width = out.rcx & GENMASK(5, 0); - /* * The highest bit of a guest physical address is the "sharing" bit. * Set it for shared pages and clear it for private pages. + * + * The GPA width that comes out of this call is critical. TDX guests + * can not meaningfully run without it. */ + gpa_width = out.rcx & GENMASK(5, 0); *cc_mask = BIT_ULL(gpa_width - 1); + + /* + * The kernel can not handle #VE's when accessing normal kernel + * memory. Ensure that no #VE will be delivered for accesses to + * TD-private memory. Only VMM-shared memory (MMIO) will #VE. + */ + td_attr = out.rdx; + if (!(td_attr & ATTR_SEPT_VE_DISABLE)) + panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n"); } /* From 486c292230166c2d61701d3c984bf9143588ea28 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 30 Oct 2022 22:36:34 +0100 Subject: [PATCH 145/239] net: lan966x: Fix the MTU calculation When the MTU was changed, the lan966x didn't take in consideration the L2 header and the FCS. So the HW was configured with a smaller value than what was desired. Therefore the correct value to configure the HW would be new_mtu + ETH_HLEN + ETH_FCS_LEN. The vlan tag is not considered here, because at the time when the blamed commit was added, there was no vlan filtering support. The vlan fix will be part of the next patch. Fixes: d28d6d2e37d1 ("net: lan966x: add port module support") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 2 +- drivers/net/ethernet/microchip/lan966x/lan966x_main.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index be2fd030cccbe..b3070c3fcad0a 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -386,7 +386,7 @@ static int lan966x_port_change_mtu(struct net_device *dev, int new_mtu) int old_mtu = dev->mtu; int err; - lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(new_mtu), + lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(LAN966X_HW_MTU(new_mtu)), lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port)); dev->mtu = new_mtu; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 9656071b8289e..4ec33999e4df6 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -26,6 +26,8 @@ #define LAN966X_BUFFER_MEMORY (160 * 1024) #define LAN966X_BUFFER_MIN_SZ 60 +#define LAN966X_HW_MTU(mtu) ((mtu) + ETH_HLEN + ETH_FCS_LEN) + #define PGID_AGGR 64 #define PGID_SRC 80 #define PGID_ENTRIES 89 From 25f28bb1b4a7717a9df3aa574d210374ebb6bb23 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 30 Oct 2022 22:36:35 +0100 Subject: [PATCH 146/239] net: lan966x: Adjust maximum frame size when vlan is enabled/disabled When vlan filtering is enabled/disabled, it is required to adjust the maximum received frame size that it can received. When vlan filtering is enabled, it would all to receive extra 4 bytes, that are the vlan tag. So the maximum frame size would be 1522 with a vlan tag. If vlan filtering is disabled then the maximum frame size would be 1518 regardless if there is or not a vlan tag. Fixes: 6d2c186afa5d ("net: lan966x: Add vlan support.") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- .../net/ethernet/microchip/lan966x/lan966x_regs.h | 15 +++++++++++++++ .../net/ethernet/microchip/lan966x/lan966x_vlan.c | 6 ++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index 1d90b93dd417a..fb5087fef22e1 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -585,6 +585,21 @@ enum lan966x_target { #define DEV_MAC_MAXLEN_CFG_MAX_LEN_GET(x)\ FIELD_GET(DEV_MAC_MAXLEN_CFG_MAX_LEN, x) +/* DEV:MAC_CFG_STATUS:MAC_TAGS_CFG */ +#define DEV_MAC_TAGS_CFG(t) __REG(TARGET_DEV, t, 8, 28, 0, 1, 44, 12, 0, 1, 4) + +#define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA BIT(1) +#define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA_SET(x)\ + FIELD_PREP(DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA, x) +#define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA_GET(x)\ + FIELD_GET(DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA, x) + +#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0) +#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA_SET(x)\ + FIELD_PREP(DEV_MAC_TAGS_CFG_VLAN_AWR_ENA, x) +#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA_GET(x)\ + FIELD_GET(DEV_MAC_TAGS_CFG_VLAN_AWR_ENA, x) + /* DEV:MAC_CFG_STATUS:MAC_IFG_CFG */ #define DEV_MAC_IFG_CFG(t) __REG(TARGET_DEV, t, 8, 28, 0, 1, 44, 20, 0, 1, 4) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c index 8d7260cd7da9c..3c44660128dae 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c @@ -169,6 +169,12 @@ void lan966x_vlan_port_apply(struct lan966x_port *port) ANA_VLAN_CFG_VLAN_POP_CNT, lan966x, ANA_VLAN_CFG(port->chip_port)); + lan_rmw(DEV_MAC_TAGS_CFG_VLAN_AWR_ENA_SET(port->vlan_aware) | + DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA_SET(port->vlan_aware), + DEV_MAC_TAGS_CFG_VLAN_AWR_ENA | + DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA, + lan966x, DEV_MAC_TAGS_CFG(port->chip_port)); + /* Drop frames with multicast source address */ val = ANA_DROP_CFG_DROP_MC_SMAC_ENA_SET(1); if (port->vlan_aware && !pvid) From 872ad758f9b7fb4eb42aebaf64e50c5b29b7ffe5 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 30 Oct 2022 22:36:36 +0100 Subject: [PATCH 147/239] net: lan966x: Fix FDMA when MTU is changed When MTU is changed, FDMA is required to calculate what is the maximum size of the frame that it can received. So it can calculate what is the page order needed to allocate for the received frames. The first problem was that, when the max MTU was calculated it was reading the value from dev and not from HW, so in this way it was missing L2 header + the FCS. The other problem was that once the skb is created using __build_skb_around, it would reserve some space for skb_shared_info. So if we received a frame which size is at the limit of the page order then the creating will failed because it would not have space to put all the data. Fixes: 2ea1cbac267e ("net: lan966x: Update FDMA to change MTU.") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c | 8 ++++++-- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index a42035cec611c..c235edd2b182a 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -668,12 +668,14 @@ static int lan966x_fdma_get_max_mtu(struct lan966x *lan966x) int i; for (i = 0; i < lan966x->num_phys_ports; ++i) { + struct lan966x_port *port; int mtu; - if (!lan966x->ports[i]) + port = lan966x->ports[i]; + if (!port) continue; - mtu = lan966x->ports[i]->dev->mtu; + mtu = lan_rd(lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port)); if (mtu > max_mtu) max_mtu = mtu; } @@ -733,6 +735,8 @@ int lan966x_fdma_change_mtu(struct lan966x *lan966x) max_mtu = lan966x_fdma_get_max_mtu(lan966x); max_mtu += IFH_LEN * sizeof(u32); + max_mtu += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + max_mtu += VLAN_HLEN * 2; if (round_up(max_mtu, PAGE_SIZE) / PAGE_SIZE - 1 == lan966x->rx.page_order) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index b3070c3fcad0a..20ee5b28f70a5 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -395,7 +395,7 @@ static int lan966x_port_change_mtu(struct net_device *dev, int new_mtu) err = lan966x_fdma_change_mtu(lan966x); if (err) { - lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(old_mtu), + lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(LAN966X_HW_MTU(old_mtu)), lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port)); dev->mtu = old_mtu; } From fc57062f98b0b0ae52bc584d8fd5ac77c50df607 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 31 Oct 2022 14:34:21 +0100 Subject: [PATCH 148/239] net: lan966x: Fix unmapping of received frames using FDMA When lan966x was receiving a frame, then it was building the skb and after that it was calling dma_unmap_single with frame size as the length. This actually has 2 issues: 1. It is using a length to map and a different length to unmap. 2. When the unmap was happening, the data was sync for cpu but it could be that this will overwrite what build_skb was initializing. The fix for these two problems is to change the order of operations. First to sync the frame for cpu, then to build the skb and in the end to unmap using the correct size but without sync the frame again for cpu. Fixes: c8349639324a ("net: lan966x: Add FDMA functionality") Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20221031133421.1283196-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/lan966x/lan966x_fdma.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index c235edd2b182a..e6948939ccc2b 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -414,13 +414,15 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) /* Get the received frame and unmap it */ db = &rx->dcbs[rx->dcb_index].db[rx->db_index]; page = rx->page[rx->dcb_index][rx->db_index]; + + dma_sync_single_for_cpu(lan966x->dev, (dma_addr_t)db->dataptr, + FDMA_DCB_STATUS_BLOCKL(db->status), + DMA_FROM_DEVICE); + skb = build_skb(page_address(page), PAGE_SIZE << rx->page_order); if (unlikely(!skb)) goto unmap_page; - dma_unmap_single(lan966x->dev, (dma_addr_t)db->dataptr, - FDMA_DCB_STATUS_BLOCKL(db->status), - DMA_FROM_DEVICE); skb_put(skb, FDMA_DCB_STATUS_BLOCKL(db->status)); lan966x_ifh_get_src_port(skb->data, &src_port); @@ -429,6 +431,10 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) if (WARN_ON(src_port >= lan966x->num_phys_ports)) goto free_skb; + dma_unmap_single_attrs(lan966x->dev, (dma_addr_t)db->dataptr, + PAGE_SIZE << rx->page_order, DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + skb->dev = lan966x->ports[src_port]->dev; skb_pull(skb, IFH_LEN * sizeof(u32)); @@ -454,9 +460,9 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) free_skb: kfree_skb(skb); unmap_page: - dma_unmap_page(lan966x->dev, (dma_addr_t)db->dataptr, - FDMA_DCB_STATUS_BLOCKL(db->status), - DMA_FROM_DEVICE); + dma_unmap_single_attrs(lan966x->dev, (dma_addr_t)db->dataptr, + PAGE_SIZE << rx->page_order, DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); __free_pages(page, rx->page_order); return NULL; From ecaf75ffd5f5db320d8b1da0198eef5a5ce64a3f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 31 Oct 2022 13:34:07 +0100 Subject: [PATCH 149/239] netlink: introduce bigendian integer types Jakub reported that the addition of the "network_byte_order" member in struct nla_policy increases size of 32bit platforms. Instead of scraping the bit from elsewhere Johannes suggested to add explicit NLA_BE types instead, so do this here. NLA_POLICY_MAX_BE() macro is removed again, there is no need for it: NLA_POLICY_MAX(NLA_BE.., ..) will do the right thing. NLA_BE64 can be added later. Fixes: 08724ef69907 ("netlink: introduce NLA_POLICY_MAX_BE") Reported-by: Jakub Kicinski Suggested-by: Johannes Berg Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20221031123407.9158-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 19 +++++++++-------- lib/nlattr.c | 41 ++++++++++++++----------------------- net/netfilter/nft_payload.c | 6 +++--- 3 files changed, 28 insertions(+), 38 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index 7db13b3261fc2..6bfa972f2fbf2 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -181,6 +181,8 @@ enum { NLA_S64, NLA_BITFIELD32, NLA_REJECT, + NLA_BE16, + NLA_BE32, __NLA_TYPE_MAX, }; @@ -231,6 +233,7 @@ enum nla_policy_validation { * NLA_U32, NLA_U64, * NLA_S8, NLA_S16, * NLA_S32, NLA_S64, + * NLA_BE16, NLA_BE32, * NLA_MSECS Leaving the length field zero will verify the * given type fits, using it verifies minimum length * just like "All other" @@ -261,6 +264,8 @@ enum nla_policy_validation { * NLA_U16, * NLA_U32, * NLA_U64, + * NLA_BE16, + * NLA_BE32, * NLA_S8, * NLA_S16, * NLA_S32, @@ -349,7 +354,6 @@ struct nla_policy { struct netlink_range_validation_signed *range_signed; struct { s16 min, max; - u8 network_byte_order:1; }; int (*validate)(const struct nlattr *attr, struct netlink_ext_ack *extack); @@ -374,6 +378,8 @@ struct nla_policy { (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || tp == NLA_U64) #define __NLA_IS_SINT_TYPE(tp) \ (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64) +#define __NLA_IS_BEINT_TYPE(tp) \ + (tp == NLA_BE16 || tp == NLA_BE32) #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) #define NLA_ENSURE_UINT_TYPE(tp) \ @@ -387,6 +393,7 @@ struct nla_policy { #define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \ (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \ __NLA_IS_SINT_TYPE(tp) || \ + __NLA_IS_BEINT_TYPE(tp) || \ tp == NLA_MSECS || \ tp == NLA_BINARY) + tp) #define NLA_ENSURE_NO_VALIDATION_PTR(tp) \ @@ -394,6 +401,8 @@ struct nla_policy { tp != NLA_REJECT && \ tp != NLA_NESTED && \ tp != NLA_NESTED_ARRAY) + tp) +#define NLA_ENSURE_BEINT_TYPE(tp) \ + (__NLA_ENSURE(__NLA_IS_BEINT_TYPE(tp)) + tp) #define NLA_POLICY_RANGE(tp, _min, _max) { \ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ @@ -424,14 +433,6 @@ struct nla_policy { .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_MAX, \ .max = _max, \ - .network_byte_order = 0, \ -} - -#define NLA_POLICY_MAX_BE(tp, _max) { \ - .type = NLA_ENSURE_UINT_TYPE(tp), \ - .validation_type = NLA_VALIDATE_MAX, \ - .max = _max, \ - .network_byte_order = 1, \ } #define NLA_POLICY_MASK(tp, _mask) { \ diff --git a/lib/nlattr.c b/lib/nlattr.c index 40f22b177d690..b67a53e29b8fe 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -124,10 +124,12 @@ void nla_get_range_unsigned(const struct nla_policy *pt, range->max = U8_MAX; break; case NLA_U16: + case NLA_BE16: case NLA_BINARY: range->max = U16_MAX; break; case NLA_U32: + case NLA_BE32: range->max = U32_MAX; break; case NLA_U64: @@ -159,31 +161,6 @@ void nla_get_range_unsigned(const struct nla_policy *pt, } } -static u64 nla_get_attr_bo(const struct nla_policy *pt, - const struct nlattr *nla) -{ - switch (pt->type) { - case NLA_U16: - if (pt->network_byte_order) - return ntohs(nla_get_be16(nla)); - - return nla_get_u16(nla); - case NLA_U32: - if (pt->network_byte_order) - return ntohl(nla_get_be32(nla)); - - return nla_get_u32(nla); - case NLA_U64: - if (pt->network_byte_order) - return be64_to_cpu(nla_get_be64(nla)); - - return nla_get_u64(nla); - } - - WARN_ON_ONCE(1); - return 0; -} - static int nla_validate_range_unsigned(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack, @@ -197,9 +174,13 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt, value = nla_get_u8(nla); break; case NLA_U16: + value = nla_get_u16(nla); + break; case NLA_U32: + value = nla_get_u32(nla); + break; case NLA_U64: - value = nla_get_attr_bo(pt, nla); + value = nla_get_u64(nla); break; case NLA_MSECS: value = nla_get_u64(nla); @@ -207,6 +188,12 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt, case NLA_BINARY: value = nla_len(nla); break; + case NLA_BE16: + value = ntohs(nla_get_be16(nla)); + break; + case NLA_BE32: + value = ntohl(nla_get_be32(nla)); + break; default: return -EINVAL; } @@ -334,6 +321,8 @@ static int nla_validate_int_range(const struct nla_policy *pt, case NLA_U64: case NLA_MSECS: case NLA_BINARY: + case NLA_BE16: + case NLA_BE32: return nla_validate_range_unsigned(pt, nla, extack, validate); case NLA_S8: case NLA_S16: diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 088244f9d8383..4edd899aeb9bb 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -173,10 +173,10 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_OFFSET] = NLA_POLICY_MAX_BE(NLA_U32, 255), - [NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX_BE(NLA_U32, 255), + [NFTA_PAYLOAD_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_CSUM_OFFSET] = NLA_POLICY_MAX_BE(NLA_U32, 255), + [NFTA_PAYLOAD_CSUM_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, }; From 7a263a0402561035199cd9049baadb908a92b6b4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 24 Oct 2022 04:10:55 +0900 Subject: [PATCH 150/239] kconfig: fix segmentation fault in menuconfig search Since commit d05377e184fc ("kconfig: Create links to main menu items in search"), menuconfig shows a jump key next to "Main menu" if the nearest visible parent is the rootmenu. If you press that jump key, menuconfig crashes with a segmentation fault. For example, do this: $ make ARCH=arm64 allnoconfig menuconfig Press '/' to search for the string "ACPI". Press '1' to choose "(1) Main menu". Then, menuconfig crashed with a segmentation fault. The following code in search_conf() conf(targets[i]->parent, targets[i]); results in NULL pointer dereference because targets[i] is the rootmenu, which does not have a parent. Commit d05377e184fc tried to fix the issue of top-level items not having a jump key, but adding the "Main menu" was not the right fix. The correct fix is to show the searched item itself. This fixes another weird behavior described in the comment block. Fixes: d05377e184fc ("kconfig: Create links to main menu items in search") Reported-by: Johannes Zink Signed-off-by: Masahiro Yamada Tested-by: Bagas Sanjaya Tested-by: Johannes Zink --- scripts/kconfig/menu.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 62b6313f51c8b..109325f31bef3 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -722,8 +722,8 @@ static void get_prompt_str(struct gstr *r, struct property *prop, if (!expr_eq(prop->menu->dep, prop->visible.expr)) get_dep_str(r, prop->visible.expr, " Visible if: "); - menu = prop->menu->parent; - for (i = 0; menu && i < 8; menu = menu->parent) { + menu = prop->menu; + for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent) { bool accessible = menu_is_visible(menu); submenu[i++] = menu; @@ -733,16 +733,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, if (head && location) { jump = xmalloc(sizeof(struct jump_key)); - if (menu_is_visible(prop->menu)) { - /* - * There is not enough room to put the hint at the - * beginning of the "Prompt" line. Put the hint on the - * last "Location" line even when it would belong on - * the former. - */ - jump->target = prop->menu; - } else - jump->target = location; + jump->target = location; if (list_empty(head)) jump->index = 0; @@ -758,13 +749,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, menu = submenu[i]; if (jump && menu == location) jump->offset = strlen(r->s); - - if (menu == &rootmenu) - /* The real rootmenu prompt is ugly */ - str_printf(r, "%*cMain menu", j, ' '); - else - str_printf(r, "%*c-> %s", j, ' ', menu_get_prompt(menu)); - + str_printf(r, "%*c-> %s", j, ' ', menu_get_prompt(menu)); if (menu->sym) { str_printf(r, " (%s [=%s])", menu->sym->name ? menu->sym->name : "", From 5c26159c97b324dc5174a5713eafb8c855cf8106 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 26 Oct 2022 14:32:16 +0200 Subject: [PATCH 151/239] ipvs: use explicitly signed chars The `char` type with no explicit sign is sometimes signed and sometimes unsigned. This code will break on platforms such as arm, where char is unsigned. So mark it here as explicitly signed, so that the todrop_counter decrement and subsequent comparison is correct. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 8c04bb57dd6fe..7c4866c043437 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1265,8 +1265,8 @@ static inline int todrop_entry(struct ip_vs_conn *cp) * The drop rate array needs tuning for real environments. * Called from timer bh only => no locking */ - static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - static char todrop_counter[9] = {0}; + static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + static signed char todrop_counter[9] = {0}; int i; /* if the conn entry hasn't lasted for 60 seconds, don't drop it. From 3d00c6a0da8ddcf75213e004765e4a42acc71d5d Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 31 Oct 2022 20:07:04 +0800 Subject: [PATCH 152/239] ipvs: fix WARNING in __ip_vs_cleanup_batch() During the initialization of ip_vs_conn_net_init(), if file ip_vs_conn or ip_vs_conn_sync fails to be created, the initialization is successful by default. Therefore, the ip_vs_conn or ip_vs_conn_sync file doesn't be found during the remove. The following is the stack information: name 'ip_vs_conn_sync' WARNING: CPU: 3 PID: 9 at fs/proc/generic.c:712 remove_proc_entry+0x389/0x460 Modules linked in: Workqueue: netns cleanup_net RIP: 0010:remove_proc_entry+0x389/0x460 Call Trace: __ip_vs_cleanup_batch+0x7d/0x120 ops_exit_list+0x125/0x170 cleanup_net+0x4ea/0xb00 process_one_work+0x9bf/0x1710 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Fixes: 61b1ab4583e2 ("IPVS: netns, add basic init per netns.") Signed-off-by: Zhengchao Shao Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 7c4866c043437..13534e02346cc 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1447,20 +1447,36 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) { atomic_set(&ipvs->conn_count, 0); - proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, - &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state)); - proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, - &ip_vs_conn_sync_seq_ops, - sizeof(struct ip_vs_iter_state)); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, + &ip_vs_conn_seq_ops, + sizeof(struct ip_vs_iter_state))) + goto err_conn; + + if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, + &ip_vs_conn_sync_seq_ops, + sizeof(struct ip_vs_iter_state))) + goto err_conn_sync; +#endif + return 0; + +#ifdef CONFIG_PROC_FS +err_conn_sync: + remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); +err_conn: + return -ENOMEM; +#endif } void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) { /* flush all the connection entries first */ ip_vs_conn_flush(ipvs); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net); +#endif } int __init ip_vs_conn_init(void) From 5663ed63adb9619c98ab7479aa4606fa9b7a548c Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 31 Oct 2022 20:07:05 +0800 Subject: [PATCH 153/239] ipvs: fix WARNING in ip_vs_app_net_cleanup() During the initialization of ip_vs_app_net_init(), if file ip_vs_app fails to be created, the initialization is successful by default. Therefore, the ip_vs_app file doesn't be found during the remove in ip_vs_app_net_cleanup(). It will cause WRNING. The following is the stack information: name 'ip_vs_app' WARNING: CPU: 1 PID: 9 at fs/proc/generic.c:712 remove_proc_entry+0x389/0x460 Modules linked in: Workqueue: netns cleanup_net RIP: 0010:remove_proc_entry+0x389/0x460 Call Trace: ops_exit_list+0x125/0x170 cleanup_net+0x4ea/0xb00 process_one_work+0x9bf/0x1710 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Fixes: 457c4cbc5a3d ("[NET]: Make /proc/net per network namespace") Signed-off-by: Zhengchao Shao Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_app.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index f9b16f2b22191..fdacbc3c15bef 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -599,13 +599,19 @@ static const struct seq_operations ip_vs_app_seq_ops = { int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { INIT_LIST_HEAD(&ipvs->app_list); - proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, - sizeof(struct seq_net_private)); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, + &ip_vs_app_seq_ops, + sizeof(struct seq_net_private))) + return -ENOMEM; +#endif return 0; } void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { unregister_ip_vs_app(ipvs, NULL /* all */); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_app", ipvs->net->proc_net); +#endif } From 89c1017aac67ca81973b7c8eac5d021315811a93 Mon Sep 17 00:00:00 2001 From: Zhao Gongyi Date: Tue, 1 Nov 2022 11:56:02 +0800 Subject: [PATCH 154/239] selftests/pidfd_test: Remove the erroneous ',' Remove the erroneous ',', otherwise it might result in wrong output and report: ... Bail out! (errno %d) test: Unexpected epoll_wait result (c=4208480, events=2) ... Fixes: 740378dc7834 ("pidfd: add polling selftests") Signed-off-by: Zhao Gongyi Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index d36654265b7a5..e2dd4ed849846 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -413,7 +413,7 @@ static void poll_pidfd(const char *test_name, int pidfd) c = epoll_wait(epoll_fd, events, MAX_EVENTS, 5000); if (c != 1 || !(events[0].events & EPOLLIN)) - ksft_exit_fail_msg("%s test: Unexpected epoll_wait result (c=%d, events=%x) ", + ksft_exit_fail_msg("%s test: Unexpected epoll_wait result (c=%d, events=%x) " "(errno %d)\n", test_name, c, events[0].events, errno); From cbc1dd5b659f5a2c3cba88b197b7443679bb35a0 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 1 Nov 2022 19:52:52 +0800 Subject: [PATCH 155/239] netfilter: nf_nat: Fix possible memory leak in nf_nat_init() In nf_nat_init(), register_nf_nat_bpf() can fail and return directly without any error handling. Then nf_nat_bysource will leak and registering of &nat_net_ops, &follow_master_nat and nf_nat_hook won't be reverted. This leaves wild ops in linkedlists and when another module tries to call register_pernet_operations() or nf_ct_helper_expectfn_register() it triggers page fault: BUG: unable to handle page fault for address: fffffbfff81b964c RIP: 0010:register_pernet_operations+0x1b9/0x5f0 Call Trace: register_pernet_subsys+0x29/0x40 ebtables_init+0x58/0x1000 [ebtables] ... Fixes: 820dc0523e05 ("net: netfilter: move bpf_ct_set_nat_info kfunc in nf_nat_bpf.c") Signed-off-by: Chen Zhongjin Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 18319a6e68062..e29e4ccb5c5a3 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1152,7 +1152,16 @@ static int __init nf_nat_init(void) WARN_ON(nf_nat_hook != NULL); RCU_INIT_POINTER(nf_nat_hook, &nat_hook); - return register_nf_nat_bpf(); + ret = register_nf_nat_bpf(); + if (ret < 0) { + RCU_INIT_POINTER(nf_nat_hook, NULL); + nf_ct_helper_expectfn_unregister(&follow_master_nat); + synchronize_net(); + unregister_pernet_subsys(&nat_net_ops); + kvfree(nf_nat_bysource); + } + + return ret; } static void __exit nf_nat_cleanup(void) From 4b18cb3f74dcfc183c2434e17bfce09ce6302e37 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 26 Oct 2022 22:10:40 +0800 Subject: [PATCH 156/239] perf/hw_breakpoint: test: Skip the test if dependencies unmet Running the test currently fails on non-SMP systems, despite being enabled by default. This means that running the test with: ./tools/testing/kunit/kunit.py run --arch x86_64 hw_breakpoint results in every hw_breakpoint test failing with: # test_one_cpu: failed to initialize: -22 not ok 1 - test_one_cpu Instead, use kunit_skip(), which will mark the test as skipped, and give a more comprehensible message: ok 1 - test_one_cpu # SKIP not enough cpus This makes it more obvious that the test is not suited to the test environment, and so wasn't run, rather than having run and failed. Signed-off-by: David Gow Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Latypov Acked-by: Marco Elver Link: https://lore.kernel.org/r/20221026141040.1609203-1-davidgow@google.com --- kernel/events/hw_breakpoint_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/hw_breakpoint_test.c b/kernel/events/hw_breakpoint_test.c index 5ced822df7889..c57610f52bb4d 100644 --- a/kernel/events/hw_breakpoint_test.c +++ b/kernel/events/hw_breakpoint_test.c @@ -295,11 +295,11 @@ static int test_init(struct kunit *test) { /* Most test cases want 2 distinct CPUs. */ if (num_online_cpus() < 2) - return -EINVAL; + kunit_skip(test, "not enough cpus"); /* Want the system to not use breakpoints elsewhere. */ if (hw_breakpoint_is_used()) - return -EBUSY; + kunit_skip(test, "hw breakpoint already in use"); return 0; } From 80275ca9e525c198c7efe045c4a6cdb68a2ea763 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 24 Sep 2022 13:47:37 +0800 Subject: [PATCH 157/239] perf/x86/rapl: Use standard Energy Unit for SPR Dram RAPL domain Intel Xeon servers used to use a fixed energy resolution (15.3uj) for Dram RAPL domain. But on SPR, Dram RAPL domain follows the standard energy resolution as described in MSR_RAPL_POWER_UNIT. Remove the SPR Dram energy unit quirk. Fixes: bcfd218b6679 ("perf/x86/rapl: Add support for Intel SPR platform") Signed-off-by: Zhang Rui Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Tested-by: Wang Wendy Link: https://lkml.kernel.org/r/20220924054738.12076-3-rui.zhang@intel.com --- arch/x86/events/rapl.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index fea544e5842a1..a829492bca4c1 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -619,12 +619,8 @@ static int rapl_check_hw_unit(struct rapl_model *rm) case RAPL_UNIT_QUIRK_INTEL_HSW: rapl_hw_unit[PERF_RAPL_RAM] = 16; break; - /* - * SPR shares the same DRAM domain energy unit as HSW, plus it - * also has a fixed energy unit for Psys domain. - */ + /* SPR uses a fixed energy unit for Psys domain. */ case RAPL_UNIT_QUIRK_INTEL_SPR: - rapl_hw_unit[PERF_RAPL_RAM] = 16; rapl_hw_unit[PERF_RAPL_PSYS] = 0; break; default: From acc5568b90c19ac6375508a93b9676cd18a92a35 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 31 Oct 2022 08:41:18 -0700 Subject: [PATCH 158/239] perf/x86/intel: Fix pebs event constraints for ICL According to the latest event list, update the MEM_INST_RETIRED events which support the DataLA facility. Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support") Reported-by: Jannis Klinkenberg Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221031154119.571386-1-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7839507b38448..41e8d6591777b 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -982,8 +982,13 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */ From 0916886bb978e7eae1ca3955ba07f51c020da20c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 31 Oct 2022 08:41:19 -0700 Subject: [PATCH 159/239] perf/x86/intel: Fix pebs event constraints for SPR According to the latest event list, update the MEM_INST_RETIRED events which support the DataLA facility for SPR. Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221031154119.571386-2-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 41e8d6591777b..446d2833efa76 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1009,8 +1009,13 @@ struct event_constraint intel_spr_pebs_event_constraints[] = { INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe), INTEL_PLD_CONSTRAINT(0x1cd, 0xfe), INTEL_PSD_CONSTRAINT(0x2cd, 0x1), - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), From 6f8faf471446844bb9c318e0340221049d5c19f4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 31 Oct 2022 08:45:50 -0700 Subject: [PATCH 160/239] perf/x86/intel: Add Cooper Lake stepping to isolation_ucodes[] The intel_pebs_isolation quirk checks both model number and stepping. Cooper Lake has a different stepping (11) than the other Skylake Xeon. It cannot benefit from the optimization in commit 9b545c04abd4f ("perf/x86/kvm: Avoid unnecessary work in guest filtering"). Add the stepping of Cooper Lake into the isolation_ucodes[] table. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221031154550.571663-1-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a646a5f9a235c..1b92bf05fd652 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4911,6 +4911,7 @@ static const struct x86_cpu_desc isolation_ucodes[] = { INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), From e97c089d7a49f67027395ddf70bf327eeac2611e Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 29 Oct 2022 00:10:49 +0800 Subject: [PATCH 161/239] rose: Fix NULL pointer dereference in rose_send_frame() The syzkaller reported an issue: KASAN: null-ptr-deref in range [0x0000000000000380-0x0000000000000387] CPU: 0 PID: 4069 Comm: kworker/0:15 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Workqueue: rcu_gp srcu_invoke_callbacks RIP: 0010:rose_send_frame+0x1dd/0x2f0 net/rose/rose_link.c:101 Call Trace: rose_transmit_clear_request+0x1d5/0x290 net/rose/rose_link.c:255 rose_rx_call_request+0x4c0/0x1bc0 net/rose/af_rose.c:1009 rose_loopback_timer+0x19e/0x590 net/rose/rose_loopback.c:111 call_timer_fn+0x1a0/0x6b0 kernel/time/timer.c:1474 expire_timers kernel/time/timer.c:1519 [inline] __run_timers.part.0+0x674/0xa80 kernel/time/timer.c:1790 __run_timers kernel/time/timer.c:1768 [inline] run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1803 __do_softirq+0x1d0/0x9c8 kernel/softirq.c:571 [...] It triggers NULL pointer dereference when 'neigh->dev->dev_addr' is called in the rose_send_frame(). It's the first occurrence of the `neigh` is in rose_loopback_timer() as `rose_loopback_neigh', and the 'dev' in 'rose_loopback_neigh' is initialized sa nullptr. It had been fixed by commit 3b3fd068c56e3fbea30090859216a368398e39bf ("rose: Fix Null pointer dereference in rose_send_frame()") ever. But it's introduced by commit 3c53cd65dece47dd1f9d3a809f32e59d1d87b2b8 ("rose: check NULL rose_loopback_neigh->loopback") again. We fix it by add NULL check in rose_transmit_clear_request(). When the 'dev' in 'neigh' is NULL, we don't reply the request and just clear it. syzkaller don't provide repro, and I provide a syz repro like: r0 = syz_init_net_socket$bt_sco(0x1f, 0x5, 0x2) ioctl$sock_inet_SIOCSIFFLAGS(r0, 0x8914, &(0x7f0000000180)={'rose0\x00', 0x201}) r1 = syz_init_net_socket$rose(0xb, 0x5, 0x0) bind$rose(r1, &(0x7f00000000c0)=@full={0xb, @dev, @null, 0x0, [@null, @null, @netrom, @netrom, @default, @null]}, 0x40) connect$rose(r1, &(0x7f0000000240)=@short={0xb, @dev={0xbb, 0xbb, 0xbb, 0x1, 0x0}, @remote={0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x1}, 0x1, @netrom={0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0x0, 0x0}}, 0x1c) Fixes: 3c53cd65dece ("rose: check NULL rose_loopback_neigh->loopback") Signed-off-by: Zhang Qilong Signed-off-by: David S. Miller --- net/rose/rose_link.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index 8b96a56d3a49b..0f77ae8ef944a 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -236,6 +236,9 @@ void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, uns unsigned char *dptr; int len; + if (!neigh->dev) + return; + len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3; if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) From e7d1d4d9ac0dfa40be4c2c8abd0731659869b297 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 31 Oct 2022 20:13:40 +0800 Subject: [PATCH 162/239] mISDN: fix possible memory leak in mISDN_register_device() Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array"), the name of device is allocated dynamically, add put_device() to give up the reference, so that the name can be freed in kobject_cleanup() when the refcount is 0. Set device class before put_device() to avoid null release() function WARN message in device_release(). Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/isdn/mISDN/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index a41b4b2645941..7ea0100f218a0 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -233,11 +233,12 @@ mISDN_register_device(struct mISDNdevice *dev, if (debug & DEBUG_CORE) printk(KERN_DEBUG "mISDN_register %s %d\n", dev_name(&dev->dev), dev->id); + dev->dev.class = &mISDN_class; + err = create_stack(dev); if (err) goto error1; - dev->dev.class = &mISDN_class; dev->dev.platform_data = dev; dev->dev.parent = parent; dev_set_drvdata(&dev->dev, dev); @@ -249,8 +250,8 @@ mISDN_register_device(struct mISDNdevice *dev, error3: delete_stack(dev); - return err; error1: + put_device(&dev->dev); return err; } From bf00f5426074249058a106a6edbb89e4b25a4d79 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 31 Oct 2022 20:13:41 +0800 Subject: [PATCH 163/239] isdn: mISDN: netjet: fix wrong check of device registration The class is set in mISDN_register_device(), but if device_add() returns error, it will lead to delete a device without added, fix this by using device_is_registered() to check if the device is registered. Fixes: a900845e5661 ("mISDN: Add support for Traverse Technologies NETJet PCI cards") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/netjet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index a52f275f82634..f8447135a9022 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c @@ -956,7 +956,7 @@ nj_release(struct tiger_hw *card) } if (card->irq > 0) free_irq(card->irq, card); - if (card->isac.dch.dev.dev.class) + if (device_is_registered(&card->isac.dch.dev.dev)) mISDN_unregister_device(&card->isac.dch.dev); for (i = 0; i < 2; i++) { From 5614dc3a47e3310fbc77ea3b67eaadd1c6417bf1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:37 +0000 Subject: [PATCH 164/239] btrfs: fix inode list leak during backref walking at resolve_indirect_refs() During backref walking, at resolve_indirect_refs(), if we get an error we jump to the 'out' label and call ulist_free() on the 'parents' ulist, which frees all the elements in the ulist - however that does not free any inode lists that may be attached to elements, through the 'aux' field of a ulist node, so we end up leaking lists if we have any attached to the unodes. Fix this by calling free_leaf_list() instead of ulist_free() when we exit from resolve_indirect_refs(). The static function free_leaf_list() is moved up for this to be possible and it's slightly simplified by removing unnecessary code. Fixes: 3301958b7c1d ("Btrfs: add inodes before dropping the extent lock in find_all_leafs") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 4ec18ceb2f21d..40afae0af4e67 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -648,6 +648,18 @@ unode_aux_to_inode_list(struct ulist_node *node) return (struct extent_inode_elem *)(uintptr_t)node->aux; } +static void free_leaf_list(struct ulist *ulist) +{ + struct ulist_node *node; + struct ulist_iterator uiter; + + ULIST_ITER_INIT(&uiter); + while ((node = ulist_next(ulist, &uiter))) + free_inode_elem_list(unode_aux_to_inode_list(node)); + + ulist_free(ulist); +} + /* * We maintain three separate rbtrees: one for direct refs, one for * indirect refs which have a key, and one for indirect refs which do not @@ -762,7 +774,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, cond_resched(); } out: - ulist_free(parents); + /* + * We may have inode lists attached to refs in the parents ulist, so we + * must free them before freeing the ulist and its refs. + */ + free_leaf_list(parents); return ret; } @@ -1409,24 +1425,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, return ret; } -static void free_leaf_list(struct ulist *blocks) -{ - struct ulist_node *node = NULL; - struct extent_inode_elem *eie; - struct ulist_iterator uiter; - - ULIST_ITER_INIT(&uiter); - while ((node = ulist_next(blocks, &uiter))) { - if (!node->aux) - continue; - eie = unode_aux_to_inode_list(node); - free_inode_elem_list(eie); - node->aux = 0; - } - - ulist_free(blocks); -} - /* * Finds all leafs with a reference to the specified combination of bytenr and * offset. key_list_head will point to a list of corresponding keys (caller must From 92876eec382a0f19f33d09d2c939e9ca49038ae5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:38 +0000 Subject: [PATCH 165/239] btrfs: fix inode list leak during backref walking at find_parent_nodes() During backref walking, at find_parent_nodes(), if we are dealing with a data extent and we get an error while resolving the indirect backrefs, at resolve_indirect_refs(), or in the while loop that iterates over the refs in the direct refs rbtree, we end up leaking the inode lists attached to the direct refs we have in the direct refs rbtree that were not yet added to the refs ulist passed as argument to find_parent_nodes(). Since they were not yet added to the refs ulist and prelim_release() does not free the lists, on error the caller can only free the lists attached to the refs that were added to the refs ulist, all the remaining refs get their inode lists never freed, therefore leaking their memory. Fix this by having prelim_release() always free any attached inode list to each ref found in the rbtree, and have find_parent_nodes() set the ref's inode list to NULL once it transfers ownership of the inode list to a ref added to the refs ulist passed to find_parent_nodes(). Fixes: 86d5f9944252 ("btrfs: convert prelimary reference tracking to use rbtrees") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 40afae0af4e67..18374a6d05bdf 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -289,8 +289,10 @@ static void prelim_release(struct preftree *preftree) struct prelim_ref *ref, *next_ref; rbtree_postorder_for_each_entry_safe(ref, next_ref, - &preftree->root.rb_root, rbnode) + &preftree->root.rb_root, rbnode) { + free_inode_elem_list(ref->inode_list); free_pref(ref); + } preftree->root = RB_ROOT_CACHED; preftree->count = 0; @@ -1384,6 +1386,12 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, if (ret < 0) goto out; ref->inode_list = eie; + /* + * We transferred the list ownership to the ref, + * so set to NULL to avoid a double free in case + * an error happens after this. + */ + eie = NULL; } ret = ulist_add_merge_ptr(refs, ref->parent, ref->inode_list, @@ -1409,6 +1417,14 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, eie->next = ref->inode_list; } eie = NULL; + /* + * We have transferred the inode list ownership from + * this ref to the ref we added to the 'refs' ulist. + * So set this ref's inode list to NULL to avoid + * use-after-free when our caller uses it or double + * frees in case an error happens before we return. + */ + ref->inode_list = NULL; } cond_resched(); } From d37de92b38932d40e4a251e876cc388f9aee5f42 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:39 +0000 Subject: [PATCH 166/239] btrfs: fix ulist leaks in error paths of qgroup self tests In the test_no_shared_qgroup() and test_multiple_refs() qgroup self tests, if we fail to add the tree ref, remove the extent item or remove the extent ref, we are returning from the test function without freeing the "old_roots" ulist that was allocated by the previous calls to btrfs_find_all_roots(). Fix that by calling ulist_free() before returning. Fixes: 442244c96332 ("btrfs: qgroup: Switch self test to extent-oriented qgroup mechanism.") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tests/qgroup-tests.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index eee1e44595410..843dd3d3adbe7 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -232,8 +232,10 @@ static int test_no_shared_qgroup(struct btrfs_root *root, ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, BTRFS_FS_TREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { @@ -266,8 +268,10 @@ static int test_no_shared_qgroup(struct btrfs_root *root, } ret = remove_extent_item(root, nodesize, nodesize); - if (ret) + if (ret) { + ulist_free(old_roots); return -EINVAL; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { @@ -329,8 +333,10 @@ static int test_multiple_refs(struct btrfs_root *root, ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, BTRFS_FS_TREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { @@ -362,8 +368,10 @@ static int test_multiple_refs(struct btrfs_root *root, ret = add_tree_ref(root, nodesize, nodesize, 0, BTRFS_FIRST_FREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { @@ -401,8 +409,10 @@ static int test_multiple_refs(struct btrfs_root *root, ret = remove_extent_ref(root, nodesize, nodesize, 0, BTRFS_FIRST_FREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { From d0ea17aec12ea0f7b9d2ed727d8ef8169d1e7699 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:40 +0000 Subject: [PATCH 167/239] btrfs: remove pointless and double ulist frees in error paths of qgroup tests Several places in the qgroup self tests follow the pattern of freeing the ulist pointer they passed to btrfs_find_all_roots() if the call to that function returned an error. That is pointless because that function always frees the ulist in case it returns an error. Also In some places like at test_multiple_refs(), after a call to btrfs_qgroup_account_extent() we also leave "old_roots" and "new_roots" pointing to ulists that were freed, because btrfs_qgroup_account_extent() has freed those ulists, and if after that the next call to btrfs_find_all_roots() fails, we call ulist_free() on the "old_roots" ulist again, resulting in a double free. So remove those calls to reduce the code size and avoid double ulist free in case of an error. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tests/qgroup-tests.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 843dd3d3adbe7..63676ea19f29e 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -225,7 +225,6 @@ static int test_no_shared_qgroup(struct btrfs_root *root, */ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -240,7 +239,6 @@ static int test_no_shared_qgroup(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -252,17 +250,18 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; } + /* btrfs_qgroup_account_extent() always frees the ulists passed to it. */ + old_roots = NULL; + new_roots = NULL; + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, nodesize, nodesize)) { test_err("qgroup counts didn't match expected values"); return -EINVAL; } - old_roots = NULL; - new_roots = NULL; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -276,7 +275,6 @@ static int test_no_shared_qgroup(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -326,7 +324,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -341,7 +338,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -361,7 +357,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -376,7 +371,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -402,7 +396,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -417,7 +410,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } From a348c8d4f6cf23ef04b0edaccdfe9d94c2d335db Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 2 Nov 2022 12:46:35 +0000 Subject: [PATCH 168/239] btrfs: fix nowait buffered write returning -ENOSPC If we are doing a buffered write in NOWAIT context and we can't reserve metadata space due to -ENOSPC, then we should return -EAGAIN so that we retry the write in a context allowed to block and do metadata reservation with flushing, which might succeed this time due to the allowed flushing. Returning -ENOSPC while in NOWAIT context simply makes some writes fail with -ENOSPC when they would likely succeed after switching from NOWAIT context to blocking context. That is unexpected behaviour and even fio complains about it with a warning like this: fio: io_u error on file /mnt/sdi/task_0.0.0: No space left on device: write offset=1535705088, buflen=65536 fio: pid=592630, err=28/file:io_u.c:1846, func=io_u error, error=No space left on device The fio's job config is this: [global] bs=64K ioengine=io_uring iodepth=1 size=2236962133 nr_files=1 filesize=2236962133 direct=0 runtime=10 fallocate=posix io_size=2236962133 group_reporting time_based [task_0] rw=randwrite directory=/mnt/sdi numjobs=4 So fix this by returning -EAGAIN if we are in NOWAIT context and the metadata reservation failed with -ENOSPC. Fixes: 304e45acdb8f ("btrfs: plumb NOWAIT through the write path") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d55ad46384d17..5ce2ae9d4f723 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1598,6 +1598,9 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, write_bytes); else btrfs_check_nocow_unlock(BTRFS_I(inode)); + + if (nowait && ret == -ENOSPC) + ret = -EAGAIN; break; } From eb81b682b131642405a05c627ab08cf0967b3dd8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 2 Nov 2022 12:46:36 +0000 Subject: [PATCH 169/239] btrfs: fix inode reserve space leak due to nowait buffered write During a nowait buffered write, if we fail to balance dirty pages we exit btrfs_buffered_write() without releasing the delalloc space reserved for an extent, resulting in leaking space from the inode's block reserve. So fix that by releasing the delalloc space for the extent when balancing dirty pages fails. Reported-by: kernel test robot Link: https://lore.kernel.org/all/202210111304.d369bc32-yujie.liu@intel.com Fixes: 965f47aeb5de ("btrfs: make btrfs_buffered_write nowait compatible") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5ce2ae9d4f723..d01631d478067 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1607,8 +1607,10 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, release_bytes = reserve_bytes; again: ret = balance_dirty_pages_ratelimited_flags(inode->i_mapping, bdp_flags); - if (ret) + if (ret) { + btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); break; + } /* * This is going to setup the pages array with the number of From 510841da1fcc16f702440ab58ef0b4d82a9056b7 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 2 Nov 2022 10:40:47 +0100 Subject: [PATCH 170/239] netfilter: ipset: enforce documented limit to prevent allocating huge memory Daniel Xu reported that the hash:net,iface type of the ipset subsystem does not limit adding the same network with different interfaces to a set, which can lead to huge memory usage or allocation failure. The quick reproducer is $ ipset create ACL.IN.ALL_PERMIT hash:net,iface hashsize 1048576 timeout 0 $ for i in $(seq 0 100); do /sbin/ipset add ACL.IN.ALL_PERMIT 0.0.0.0/0,kaf_$i timeout 0 -exist; done The backtrace when vmalloc fails: [Tue Oct 25 00:13:08 2022] ipset: vmalloc error: size 1073741848, exceeds total pages <...> [Tue Oct 25 00:13:08 2022] Call Trace: [Tue Oct 25 00:13:08 2022] [Tue Oct 25 00:13:08 2022] dump_stack_lvl+0x48/0x60 [Tue Oct 25 00:13:08 2022] warn_alloc+0x155/0x180 [Tue Oct 25 00:13:08 2022] __vmalloc_node_range+0x72a/0x760 [Tue Oct 25 00:13:08 2022] ? hash_netiface4_add+0x7c0/0xb20 [Tue Oct 25 00:13:08 2022] ? __kmalloc_large_node+0x4a/0x90 [Tue Oct 25 00:13:08 2022] kvmalloc_node+0xa6/0xd0 [Tue Oct 25 00:13:08 2022] ? hash_netiface4_resize+0x99/0x710 <...> The fix is to enforce the limit documented in the ipset(8) manpage: > The internal restriction of the hash:net,iface set type is that the same > network prefix cannot be stored with more than 64 different interfaces > in a single set. Fixes: ccf0a4b7fc68 ("netfilter: ipset: Add bucketsize parameter to all hash types") Reported-by: Daniel Xu Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 30 ++++++--------------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6e391308431da..3adc291d9ce18 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -42,31 +42,8 @@ #define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE) /* Max muber of elements in the array block when tuned */ #define AHASH_MAX_TUNED 64 - #define AHASH_MAX(h) ((h)->bucketsize) -/* Max number of elements can be tuned */ -#ifdef IP_SET_HASH_WITH_MULTI -static u8 -tune_bucketsize(u8 curr, u32 multi) -{ - u32 n; - - if (multi < curr) - return curr; - - n = curr + AHASH_INIT_SIZE; - /* Currently, at listing one hash bucket must fit into a message. - * Therefore we have a hard limit here. - */ - return n > curr && n <= AHASH_MAX_TUNED ? n : curr; -} -#define TUNE_BUCKETSIZE(h, multi) \ - ((h)->bucketsize = tune_bucketsize((h)->bucketsize, multi)) -#else -#define TUNE_BUCKETSIZE(h, multi) -#endif - /* A hash bucket */ struct hbucket { struct rcu_head rcu; /* for call_rcu */ @@ -936,7 +913,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, goto set_full; /* Create a new slot */ if (n->pos >= n->size) { - TUNE_BUCKETSIZE(h, multi); +#ifdef IP_SET_HASH_WITH_MULTI + if (h->bucketsize >= AHASH_MAX_TUNED) + goto set_full; + else if (h->bucketsize < multi) + h->bucketsize += AHASH_INIT_SIZE; +#endif if (n->size >= AHASH_MAX(h)) { /* Trigger rehashing */ mtype_data_next(&h->next, d); From 8e987f1f4da92d9f1dd020418bfab9fe04b1c54c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 2 Nov 2022 21:45:59 +0800 Subject: [PATCH 171/239] Documentation: devres: add missing I2C helper Add missing devm_i2c_add_adapter() to devres.rst. It's introduced by commit 07740c92ae57 ("i2c: core: add managed function for adding i2c adapters"). Fixes: 07740c92ae57 ("i2c: core: add managed function for adding i2c adapters") Signed-off-by: Yang Yingliang Acked-by: Yicong Yang Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- Documentation/driver-api/driver-model/devres.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 687adb58048ec..56082265e8e50 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -279,6 +279,7 @@ GPIO devm_gpio_request_one() I2C + devm_i2c_add_adapter() devm_i2c_new_dummy_device() IIO From 341421084d705475817f7f0d68e130370d10b20d Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 20 Oct 2022 11:46:40 -0400 Subject: [PATCH 172/239] drm/amd/display: Update DSC capabilitie for DCN314 dcn314 has 4 DSC - conflicted hardware document updated and confirmed. Tested-by: Mark Broadworth Reviewed-by: Charlene Liu Acked-by: Rodrigo Siqueira Signed-off-by: Leo Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index d0ad72caead28..9066c511a0529 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -847,7 +847,7 @@ static const struct resource_caps res_cap_dcn314 = { .num_ddc = 5, .num_vmid = 16, .num_mpc_3dlut = 2, - .num_dsc = 3, + .num_dsc = 4, }; static const struct dc_plane_cap plane_cap = { From 14aed119942f6c2f1286022323139f7404db5d2b Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Thu, 20 Oct 2022 11:46:41 -0400 Subject: [PATCH 173/239] drm/amd/display: Ignore Cable ID Feature Ignore cable ID for DP2 receivers that does not support the feature. Tested-by: Mark Broadworth Reviewed-by: Roman Li Acked-by: Rodrigo Siqueira Signed-off-by: Fangzhi Zuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c053cb79cd063..589bee9acf162 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1549,6 +1549,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm; + /* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */ + adev->dm.dc->debug.ignore_cable_id = true; + r = dm_dmub_hw_init(adev); if (r) { DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); From e59843c4cdd68a369591630088171eeacce9859f Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Thu, 20 Oct 2022 11:46:44 -0400 Subject: [PATCH 174/239] drm/amd/display: Limit dcn32 to 1950Mhz display clock [why] Hardware team recommends we limit dispclock to 1950Mhz for all DCN3.2.x [how] Limit to 1950 when initializing clocks. Tested-by: Mark Broadworth Reviewed-by: Alvin Lee Acked-by: Rodrigo Siqueira Signed-off-by: Jun Lei Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- .../gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 1c612ccf1944a..fd0313468fdbc 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -157,6 +157,7 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); unsigned int num_levels; struct clk_limit_num_entries *num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk; + unsigned int i; memset(&(clk_mgr_base->clks), 0, sizeof(struct dc_clocks)); clk_mgr_base->clks.p_state_change_support = true; @@ -205,18 +206,17 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) clk_mgr->dpm_present = true; if (clk_mgr_base->ctx->dc->debug.min_disp_clk_khz) { - unsigned int i; - for (i = 0; i < num_levels; i++) if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz)) clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz = khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz); } + for (i = 0; i < num_levels; i++) + if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz > 1950) + clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz = 1950; if (clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz) { - unsigned int i; - for (i = 0; i < num_levels; i++) if (clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz)) From c580d758ba1b79de9ea7a475d95a6278736ae462 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 20 Oct 2022 11:46:47 -0400 Subject: [PATCH 175/239] drm/amd/display: Update latencies on DCN321 Update DF related latencies based on new measurements. Tested-by: Mark Broadworth Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Signed-off-by: Dillon Varone Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index dd90f241e9065..7352f75144101 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -119,15 +119,15 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { }, }, .num_states = 1, - .sr_exit_time_us = 12.36, - .sr_enter_plus_exit_time_us = 16.72, + .sr_exit_time_us = 19.95, + .sr_enter_plus_exit_time_us = 24.36, .sr_exit_z8_time_us = 285.0, .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, + .urgent_latency_pixel_data_only_us = 9.35, + .urgent_latency_pixel_mixed_with_vm_data_us = 9.35, + .urgent_latency_vm_data_only_us = 9.35, .fclk_change_latency_us = 20, .usr_retraining_latency_us = 2, .smn_latency_us = 2, From 6cb5cec16c380be4cf9776a8c23b72e9fe742fd1 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 20 Oct 2022 11:46:48 -0400 Subject: [PATCH 176/239] drm/amd/display: Set memclk levels to be at least 1 for dcn32 [Why] Cannot report 0 memclk levels even when SMU does not provide any. [How] When memclk levels reported by SMU is 0, set levels to 1. Tested-by: Mark Broadworth Reviewed-by: Martin Leung Acked-by: Rodrigo Siqueira Signed-off-by: Dillon Varone Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index fd0313468fdbc..6f77d8e538ab1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -669,6 +669,9 @@ static void dcn32_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) &clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz, &num_entries_per_clk->num_memclk_levels); + /* memclk must have at least one level */ + num_entries_per_clk->num_memclk_levels = num_entries_per_clk->num_memclk_levels ? num_entries_per_clk->num_memclk_levels : 1; + dcn32_init_single_clock(clk_mgr, PPCLK_FCLK, &clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz, &num_entries_per_clk->num_fclk_levels); From c3d3f35b725bf9c93bec6d3c056f6bb7cfd27403 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 20 Oct 2022 11:46:51 -0400 Subject: [PATCH 177/239] drm/amd/display: Enable timing sync on DCN32 Missed enabling timing sync on DCN32 because DCN32 has a different DML param. Tested-by: Mark Broadworth Reviewed-by: Martin Leung Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index d680f1c5b69f8..45db40c41882c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -1228,6 +1228,7 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.dcc = false; pipes[pipe_cnt].pipe.src.dcc_rate = 1; pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank; + pipes[pipe_cnt].pipe.dest.synchronize_timings = synchronized_vblank; pipes[pipe_cnt].pipe.dest.hblank_start = timing->h_total - timing->h_front_porch; pipes[pipe_cnt].pipe.dest.hblank_end = pipes[pipe_cnt].pipe.dest.hblank_start - timing->h_addressable From 9cb0dc6ccb7df9abe1407574ed4ad84895822d11 Mon Sep 17 00:00:00 2001 From: Max Tseng Date: Thu, 20 Oct 2022 11:46:52 -0400 Subject: [PATCH 178/239] drm/amd/display: cursor update command incomplete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Missing send cursor_rect width & Height into DMUB. PSR-SU would use these information. But missing these assignment in last refactor commit Reported-by: Timur Kristóf Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2227 Fixes: b73353f7f3d4 ("drm/amd/display: Use the same cursor info across features") Tested-by: Mark Broadworth Reviewed-by: Anthony Koo Acked-by: Rodrigo Siqueira Signed-off-by: Max Tseng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 4996d2810edb8..938dba5249d48 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -623,6 +623,10 @@ void hubp2_cursor_set_attributes( hubp->att.size.bits.width = attr->width; hubp->att.size.bits.height = attr->height; hubp->att.cur_ctl.bits.mode = attr->color_format; + + hubp->cur_rect.w = attr->width; + hubp->cur_rect.h = attr->height; + hubp->att.cur_ctl.bits.pitch = hw_pitch; hubp->att.cur_ctl.bits.line_per_chunk = lpc; hubp->att.cur_ctl.bits.cur_2x_magnify = attr->attribute_flags.bits.ENABLE_MAGNIFICATION; From 89b3554782e6b65894f0551e9e0a82ad02dac94d Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Mon, 24 Oct 2022 12:47:47 +0800 Subject: [PATCH 179/239] drm/amdgpu: set fb_modifiers_not_supported in vkms This patch to fix the gdm3 start failure with virual display: /usr/libexec/gdm-x-session[1711]: (II) AMDGPU(0): Setting screen physical size to 270 x 203 /usr/libexec/gdm-x-session[1711]: (EE) AMDGPU(0): Failed to make import prime FD as pixmap: 22 /usr/libexec/gdm-x-session[1711]: (EE) AMDGPU(0): failed to set mode: Invalid argument /usr/libexec/gdm-x-session[1711]: (WW) AMDGPU(0): Failed to set mode on CRTC 0 /usr/libexec/gdm-x-session[1711]: (EE) AMDGPU(0): Failed to enable any CRTC gnome-shell[1840]: Running GNOME Shell (using mutter 42.2) as a X11 window and compositing manager /usr/libexec/gdm-x-session[1711]: (EE) AMDGPU(0): failed to set mode: Invalid argument vkms doesn't have modifiers support, set fb_modifiers_not_supported to bring the gdm back. Signed-off-by: Yifan Zhang Acked-by: Guchun Chen Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index f4b5301ea2a02..500a1dc4fe029 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -500,6 +500,8 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; + adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; + r = amdgpu_display_modeset_create_props(adev); if (r) return r; From e542ca6e3e554bad53b2ea5741873b67f4585ea9 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Tue, 25 Oct 2022 14:42:13 -0400 Subject: [PATCH 180/239] drm/amdgpu: correct MES debugfs versions Use mes.sched_version, mes.kiq_version for debugfs as mes.ucode_fw_version does not contain correct versioning information. Signed-off-by: Graham Sider Reviewed-by: Jack Xiao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bf1ff8f0e7120..4e42dcb1950f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -337,12 +337,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = adev->psp.cap_feature_version; break; case AMDGPU_INFO_FW_MES_KIQ: - fw_info->ver = adev->mes.ucode_fw_version[0]; - fw_info->feature = 0; + fw_info->ver = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK; + fw_info->feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) + >> AMDGPU_MES_FEAT_VERSION_SHIFT; break; case AMDGPU_INFO_FW_MES: - fw_info->ver = adev->mes.ucode_fw_version[1]; - fw_info->feature = 0; + fw_info->ver = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + fw_info->feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) + >> AMDGPU_MES_FEAT_VERSION_SHIFT; break; case AMDGPU_INFO_FW_IMU: fw_info->ver = adev->gfx.imu_fw_version; From 5b994354af3cab770bf13386469c5725713679af Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 26 Oct 2022 10:00:54 +0800 Subject: [PATCH 181/239] drm/amdkfd: Fix NULL pointer dereference in svm_migrate_to_ram() ./drivers/gpu/drm/amd/amdkfd/kfd_migrate.c:985:58-62: ERROR: p is NULL but dereferenced. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2549 Reported-by: Abaci Robot Signed-off-by: Yang Li Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 2797029bd5001..22b077ac9a196 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -973,12 +973,10 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) out_unlock_svms: mutex_unlock(&p->svms.lock); out_unref_process: + pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); kfd_unref_process(p); out_mmput: mmput(mm); - - pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); - return r ? VM_FAULT_SIGBUS : 0; } From 8d4de331f1b24a22d18e3c6116aa25228cf54854 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 26 Oct 2022 14:03:55 -0500 Subject: [PATCH 182/239] drm/amd: Fail the suspend if resources can't be evicted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a system does not have swap and memory is under 100% usage, amdgpu will fail to evict resources. Currently the suspend carries on proceeding to reset the GPU: ``` [drm] evicting device resources failed [drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block failed -12 [drm] free PSP TMR buffer [TTM] Failed allocating page table [drm] evicting device resources failed amdgpu 0000:03:00.0: amdgpu: MODE1 reset amdgpu 0000:03:00.0: amdgpu: GPU mode1 reset amdgpu 0000:03:00.0: amdgpu: GPU smu mode1 reset ``` At this point if the suspend actually succeeded I think that amdgpu would have recovered because the GPU would have power cut off and restored. However the kernel fails to continue the suspend from the memory pressure and amdgpu fails to run the "resume" from the aborted suspend. ``` ACPI: PM: Preparing to enter system sleep state S3 SLUB: Unable to allocate memory on node -1, gfp=0xdc0(GFP_KERNEL|__GFP_ZERO) cache: Acpi-State, object size: 80, buffer size: 80, default order: 0, min order: 0 node 0: slabs: 22, objs: 1122, free: 0 ACPI Error: AE_NO_MEMORY, Could not update object reference count (20210730/utdelete-651) [drm:psp_hw_start [amdgpu]] *ERROR* PSP load kdb failed! [drm:psp_resume [amdgpu]] *ERROR* PSP resume failed [drm:amdgpu_device_fw_loading [amdgpu]] *ERROR* resume of IP block failed -62 amdgpu 0000:03:00.0: amdgpu: amdgpu_device_ip_resume failed (-62). PM: dpm_run_callback(): pci_pm_resume+0x0/0x100 returns -62 amdgpu 0000:03:00.0: PM: failed to resume async: error -62 ``` To avoid this series of unfortunate events, fail amdgpu's suspend when the memory eviction fails. This will let the system gracefully recover and the user can try suspend again when the memory pressure is relieved. Reported-by: post@davidak.de Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2223 Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ddaecb2610c94..64510898eedd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4060,15 +4060,18 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) * at suspend time. * */ -static void amdgpu_device_evict_resources(struct amdgpu_device *adev) +static int amdgpu_device_evict_resources(struct amdgpu_device *adev) { + int ret; + /* No need to evict vram on APUs for suspend to ram or s2idle */ if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) - return; + return 0; - if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM)) + ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); + if (ret) DRM_WARN("evicting device resources failed\n"); - + return ret; } /* @@ -4118,7 +4121,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); - amdgpu_device_evict_resources(adev); + r = amdgpu_device_evict_resources(adev); + if (r) + return r; amdgpu_fence_driver_hw_fini(adev); From 3aff8aaca4e36dc8b17eaa011684881a80238966 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 5 Oct 2022 00:27:18 +0300 Subject: [PATCH 183/239] Bluetooth: L2CAP: Fix use-after-free caused by l2cap_reassemble_sdu Fix the race condition between the following two flows that run in parallel: 1. l2cap_reassemble_sdu -> chan->ops->recv (l2cap_sock_recv_cb) -> __sock_queue_rcv_skb. 2. bt_sock_recvmsg -> skb_recv_datagram, skb_free_datagram. An SKB can be queued by the first flow and immediately dequeued and freed by the second flow, therefore the callers of l2cap_reassemble_sdu can't use the SKB after that function returns. However, some places continue accessing struct l2cap_ctrl that resides in the SKB's CB for a short time after l2cap_reassemble_sdu returns, leading to a use-after-free condition (the stack trace is below, line numbers for kernel 5.19.8). Fix it by keeping a local copy of struct l2cap_ctrl. BUG: KASAN: use-after-free in l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth Read of size 1 at addr ffff88812025f2f0 by task kworker/u17:3/43169 Workqueue: hci0 hci_rx_work [bluetooth] Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4)) print_report.cold (mm/kasan/report.c:314 mm/kasan/report.c:429) ? l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth kasan_report (mm/kasan/report.c:162 mm/kasan/report.c:493) ? l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth l2cap_rx (net/bluetooth/l2cap_core.c:7236 net/bluetooth/l2cap_core.c:7271) bluetooth ret_from_fork (arch/x86/entry/entry_64.S:306) Allocated by task 43169: kasan_save_stack (mm/kasan/common.c:39) __kasan_slab_alloc (mm/kasan/common.c:45 mm/kasan/common.c:436 mm/kasan/common.c:469) kmem_cache_alloc_node (mm/slab.h:750 mm/slub.c:3243 mm/slub.c:3293) __alloc_skb (net/core/skbuff.c:414) l2cap_recv_frag (./include/net/bluetooth/bluetooth.h:425 net/bluetooth/l2cap_core.c:8329) bluetooth l2cap_recv_acldata (net/bluetooth/l2cap_core.c:8442) bluetooth hci_rx_work (net/bluetooth/hci_core.c:3642 net/bluetooth/hci_core.c:3832) bluetooth process_one_work (kernel/workqueue.c:2289) worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2437) kthread (kernel/kthread.c:376) ret_from_fork (arch/x86/entry/entry_64.S:306) Freed by task 27920: kasan_save_stack (mm/kasan/common.c:39) kasan_set_track (mm/kasan/common.c:45) kasan_set_free_info (mm/kasan/generic.c:372) ____kasan_slab_free (mm/kasan/common.c:368 mm/kasan/common.c:328) slab_free_freelist_hook (mm/slub.c:1780) kmem_cache_free (mm/slub.c:3536 mm/slub.c:3553) skb_free_datagram (./include/net/sock.h:1578 ./include/net/sock.h:1639 net/core/datagram.c:323) bt_sock_recvmsg (net/bluetooth/af_bluetooth.c:295) bluetooth l2cap_sock_recvmsg (net/bluetooth/l2cap_sock.c:1212) bluetooth sock_read_iter (net/socket.c:1087) new_sync_read (./include/linux/fs.h:2052 fs/read_write.c:401) vfs_read (fs/read_write.c:482) ksys_read (fs/read_write.c:620) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) Link: https://lore.kernel.org/linux-bluetooth/CAKErNvoqga1WcmoR3-0875esY6TVWFQDandbVZncSiuGPBQXLA@mail.gmail.com/T/#u Fixes: d2a7ac5d5d3a ("Bluetooth: Add the ERTM receive state machine") Fixes: 4b51dae96731 ("Bluetooth: Add streaming mode receive and incoming packet classifier") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 48 ++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 1f34b82ca0ec9..2283871d3f013 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6885,6 +6885,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb, u8 event) { + struct l2cap_ctrl local_control; int err = 0; bool skb_in_use = false; @@ -6909,15 +6910,32 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, chan->buffer_seq = chan->expected_tx_seq; skb_in_use = true; + /* l2cap_reassemble_sdu may free skb, hence invalidate + * control, so make a copy in advance to use it after + * l2cap_reassemble_sdu returns and to avoid the race + * condition, for example: + * + * The current thread calls: + * l2cap_reassemble_sdu + * chan->ops->recv == l2cap_sock_recv_cb + * __sock_queue_rcv_skb + * Another thread calls: + * bt_sock_recvmsg + * skb_recv_datagram + * skb_free_datagram + * Then the current thread tries to access control, but + * it was freed by skb_free_datagram. + */ + local_control = *control; err = l2cap_reassemble_sdu(chan, skb, control); if (err) break; - if (control->final) { + if (local_control.final) { if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) { - control->final = 0; - l2cap_retransmit_all(chan, control); + local_control.final = 0; + l2cap_retransmit_all(chan, &local_control); l2cap_ertm_send(chan); } } @@ -7297,11 +7315,27 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb) { + /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store + * the txseq field in advance to use it after l2cap_reassemble_sdu + * returns and to avoid the race condition, for example: + * + * The current thread calls: + * l2cap_reassemble_sdu + * chan->ops->recv == l2cap_sock_recv_cb + * __sock_queue_rcv_skb + * Another thread calls: + * bt_sock_recvmsg + * skb_recv_datagram + * skb_free_datagram + * Then the current thread tries to access control, but it was freed by + * skb_free_datagram. + */ + u16 txseq = control->txseq; + BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb, chan->rx_state); - if (l2cap_classify_txseq(chan, control->txseq) == - L2CAP_TXSEQ_EXPECTED) { + if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) { l2cap_pass_to_tx(chan, control); BT_DBG("buffer_seq %u->%u", chan->buffer_seq, @@ -7324,8 +7358,8 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, } } - chan->last_acked_seq = control->txseq; - chan->expected_tx_seq = __next_seq(chan, control->txseq); + chan->last_acked_seq = txseq; + chan->expected_tx_seq = __next_seq(chan, txseq); return 0; } From b36a234dc438cb6b76fc929a8df9a0e59c8acf23 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Tue, 11 Oct 2022 22:25:33 +0300 Subject: [PATCH 184/239] Bluetooth: hci_conn: Fix CIS connection dst_type handling hci_connect_cis and iso_connect_cis call hci_bind_cis inconsistently with dst_type being either ISO socket address type or the HCI type, but these values cannot be mixed like this. Fix this by using only the HCI type. CIS connection dst_type was also not initialized in hci_bind_cis, even though it is used in hci_conn_hash_lookup_cis to find existing connections. Set the value in hci_bind_cis, so that existing CIS connections are found e.g. when doing deferred socket connections, also when dst_type is not 0 (ADDR_LE_DEV_PUBLIC). Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 7 +------ net/bluetooth/iso.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7a59c44870503..1176bad5d8333 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1761,6 +1761,7 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, if (!cis) return ERR_PTR(-ENOMEM); cis->cleanup = cis_cleanup; + cis->dst_type = dst_type; } if (cis->state == BT_CONNECTED) @@ -2140,12 +2141,6 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, struct hci_conn *le; struct hci_conn *cis; - /* Convert from ISO socket address type to HCI address type */ - if (dst_type == BDADDR_LE_PUBLIC) - dst_type = ADDR_LE_DEV_PUBLIC; - else - dst_type = ADDR_LE_DEV_RANDOM; - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) le = hci_connect_le(hdev, dst, dst_type, false, BT_SECURITY_LOW, diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 613039ba5dbf5..f825857db6d0b 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -235,6 +235,14 @@ static int iso_chan_add(struct iso_conn *conn, struct sock *sk, return err; } +static inline u8 le_addr_type(u8 bdaddr_type) +{ + if (bdaddr_type == BDADDR_LE_PUBLIC) + return ADDR_LE_DEV_PUBLIC; + else + return ADDR_LE_DEV_RANDOM; +} + static int iso_connect_bis(struct sock *sk) { struct iso_conn *conn; @@ -328,14 +336,16 @@ static int iso_connect_cis(struct sock *sk) /* Just bind if DEFER_SETUP has been set */ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { hcon = hci_bind_cis(hdev, &iso_pi(sk)->dst, - iso_pi(sk)->dst_type, &iso_pi(sk)->qos); + le_addr_type(iso_pi(sk)->dst_type), + &iso_pi(sk)->qos); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto done; } } else { hcon = hci_connect_cis(hdev, &iso_pi(sk)->dst, - iso_pi(sk)->dst_type, &iso_pi(sk)->qos); + le_addr_type(iso_pi(sk)->dst_type), + &iso_pi(sk)->qos); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto done; From 160fbcf3bfb93c3c086427f9f4c8bc70f217e9be Mon Sep 17 00:00:00 2001 From: Soenke Huster Date: Wed, 12 Oct 2022 09:45:06 +0200 Subject: [PATCH 185/239] Bluetooth: virtio_bt: Use skb_put to set length By using skb_put we ensure that skb->tail is set correctly. Currently, skb->tail is always zero, which leads to errors, such as the following page fault in rfcomm_recv_frame: BUG: unable to handle page fault for address: ffffed1021de29ff #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page RIP: 0010:rfcomm_run+0x831/0x4040 (net/bluetooth/rfcomm/core.c:1751) Fixes: afd2daa26c7a ("Bluetooth: Add support for virtio transport driver") Signed-off-by: Soenke Huster Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/virtio_bt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 67c21263f9e0f..fd281d4395055 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -219,7 +219,7 @@ static void virtbt_rx_work(struct work_struct *work) if (!skb) return; - skb->len = len; + skb_put(skb, len); virtbt_rx_handle(vbt, skb); if (virtbt_add_inbuf(vbt) < 0) From 0d0e2d032811280b927650ff3c15fe5020e82533 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 17 Oct 2022 15:58:13 +0800 Subject: [PATCH 186/239] Bluetooth: L2CAP: fix use-after-free in l2cap_conn_del() When l2cap_recv_frame() is invoked to receive data, and the cid is L2CAP_CID_A2MP, if the channel does not exist, it will create a channel. However, after a channel is created, the hold operation of the channel is not performed. In this case, the value of channel reference counting is 1. As a result, after hci_error_reset() is triggered, l2cap_conn_del() invokes the close hook function of A2MP to release the channel. Then l2cap_chan_unlock(chan) will trigger UAF issue. The process is as follows: Receive data: l2cap_data_channel() a2mp_channel_create() --->channel ref is 2 l2cap_chan_put() --->channel ref is 1 Triger event: hci_error_reset() hci_dev_do_close() ... l2cap_disconn_cfm() l2cap_conn_del() l2cap_chan_hold() --->channel ref is 2 l2cap_chan_del() --->channel ref is 1 a2mp_chan_close_cb() --->channel ref is 0, release channel l2cap_chan_unlock() --->UAF of channel The detailed Call Trace is as follows: BUG: KASAN: use-after-free in __mutex_unlock_slowpath+0xa6/0x5e0 Read of size 8 at addr ffff8880160664b8 by task kworker/u11:1/7593 Workqueue: hci0 hci_error_reset Call Trace: dump_stack_lvl+0xcd/0x134 print_report.cold+0x2ba/0x719 kasan_report+0xb1/0x1e0 kasan_check_range+0x140/0x190 __mutex_unlock_slowpath+0xa6/0x5e0 l2cap_conn_del+0x404/0x7b0 l2cap_disconn_cfm+0x8c/0xc0 hci_conn_hash_flush+0x11f/0x260 hci_dev_close_sync+0x5f5/0x11f0 hci_dev_do_close+0x2d/0x70 hci_error_reset+0x9e/0x140 process_one_work+0x98a/0x1620 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Allocated by task 7593: kasan_save_stack+0x1e/0x40 __kasan_kmalloc+0xa9/0xd0 l2cap_chan_create+0x40/0x930 amp_mgr_create+0x96/0x990 a2mp_channel_create+0x7d/0x150 l2cap_recv_frame+0x51b8/0x9a70 l2cap_recv_acldata+0xaa3/0xc00 hci_rx_work+0x702/0x1220 process_one_work+0x98a/0x1620 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Freed by task 7593: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_set_free_info+0x20/0x30 ____kasan_slab_free+0x167/0x1c0 slab_free_freelist_hook+0x89/0x1c0 kfree+0xe2/0x580 l2cap_chan_put+0x22a/0x2d0 l2cap_conn_del+0x3fc/0x7b0 l2cap_disconn_cfm+0x8c/0xc0 hci_conn_hash_flush+0x11f/0x260 hci_dev_close_sync+0x5f5/0x11f0 hci_dev_do_close+0x2d/0x70 hci_error_reset+0x9e/0x140 process_one_work+0x98a/0x1620 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Last potentially related work creation: kasan_save_stack+0x1e/0x40 __kasan_record_aux_stack+0xbe/0xd0 call_rcu+0x99/0x740 netlink_release+0xe6a/0x1cf0 __sock_release+0xcd/0x280 sock_close+0x18/0x20 __fput+0x27c/0xa90 task_work_run+0xdd/0x1a0 exit_to_user_mode_prepare+0x23c/0x250 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x42/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Second to last potentially related work creation: kasan_save_stack+0x1e/0x40 __kasan_record_aux_stack+0xbe/0xd0 call_rcu+0x99/0x740 netlink_release+0xe6a/0x1cf0 __sock_release+0xcd/0x280 sock_close+0x18/0x20 __fput+0x27c/0xa90 task_work_run+0xdd/0x1a0 exit_to_user_mode_prepare+0x23c/0x250 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x42/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: d0be8347c623 ("Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put") Signed-off-by: Zhengchao Shao Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 2283871d3f013..9a32ce6349194 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7615,6 +7615,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, return; } + l2cap_chan_hold(chan); l2cap_chan_lock(chan); } else { BT_DBG("unknown cid 0x%4.4x", cid); From 7c9524d929648935bac2bbb4c20437df8f9c3f42 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Tue, 18 Oct 2022 10:18:51 +0800 Subject: [PATCH 187/239] Bluetooth: L2CAP: Fix memory leak in vhci_write Syzkaller reports a memory leak as follows: ==================================== BUG: memory leak unreferenced object 0xffff88810d81ac00 (size 240): [...] hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] __alloc_skb+0x1f9/0x270 net/core/skbuff.c:418 [] alloc_skb include/linux/skbuff.h:1257 [inline] [] bt_skb_alloc include/net/bluetooth/bluetooth.h:469 [inline] [] vhci_get_user drivers/bluetooth/hci_vhci.c:391 [inline] [] vhci_write+0x5f/0x230 drivers/bluetooth/hci_vhci.c:511 [] call_write_iter include/linux/fs.h:2192 [inline] [] new_sync_write fs/read_write.c:491 [inline] [] vfs_write+0x42d/0x540 fs/read_write.c:578 [] ksys_write+0x9d/0x160 fs/read_write.c:631 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd ==================================== HCI core will uses hci_rx_work() to process frame, which is queued to the hdev->rx_q tail in hci_recv_frame() by HCI driver. Yet the problem is that, HCI core may not free the skb after handling ACL data packets. To be more specific, when start fragment does not contain the L2CAP length, HCI core just copies skb into conn->rx_skb and finishes frame process in l2cap_recv_acldata(), without freeing the skb, which triggers the above memory leak. This patch solves it by releasing the relative skb, after processing the above case in l2cap_recv_acldata(). Fixes: 4d7ea8ee90e4 ("Bluetooth: L2CAP: Fix handling fragmented length") Link: https://lore.kernel.org/all/0000000000000d0b1905e6aaef64@google.com/ Reported-and-tested-by: syzbot+8f819e36e01022991cfa@syzkaller.appspotmail.com Signed-off-by: Hawkins Jiawei Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9a32ce6349194..1fbe087d6ae4e 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -8461,9 +8461,8 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) * expected length. */ if (skb->len < L2CAP_LEN_SIZE) { - if (l2cap_recv_frag(conn, skb, conn->mtu) < 0) - goto drop; - return; + l2cap_recv_frag(conn, skb, conn->mtu); + break; } len = get_unaligned_le16(skb->data) + L2CAP_HDR_SIZE; @@ -8507,7 +8506,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) /* Header still could not be read just continue */ if (conn->rx_skb->len < L2CAP_LEN_SIZE) - return; + break; } if (skb->len > conn->rx_len) { From 5638d9ea9c01c77fc11693d48cf719bc7e88f224 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 17 Oct 2022 15:36:23 -0700 Subject: [PATCH 188/239] Bluetooth: hci_conn: Fix not restoring ISO buffer count on disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When disconnecting an ISO link the controller may not generate HCI_EV_NUM_COMP_PKTS for unacked packets which needs to be restored in hci_conn_del otherwise the host would assume they are still in use and would not be able to use all the buffers available. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Signed-off-by: Luiz Augusto von Dentz Tested-by: Frédéric Danis --- net/bluetooth/hci_conn.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1176bad5d8333..a6c12863a2532 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1067,10 +1067,21 @@ int hci_conn_del(struct hci_conn *conn) hdev->acl_cnt += conn->sent; } else { struct hci_conn *acl = conn->link; + if (acl) { acl->link = NULL; hci_conn_drop(acl); } + + /* Unacked ISO frames */ + if (conn->type == ISO_LINK) { + if (hdev->iso_pkts) + hdev->iso_cnt += conn->sent; + else if (hdev->le_pkts) + hdev->le_cnt += conn->sent; + else + hdev->acl_cnt += conn->sent; + } } if (conn->amp_mgr) From a3e5ce56f3d260f2ec8e5242c33f57e60ae9eba7 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Wed, 26 Oct 2022 15:08:24 -0400 Subject: [PATCH 189/239] drm/amdgpu: disable GFXOFF during compute for GFX11 Temporary workaround to fix issues observed in some compute applications when GFXOFF is enabled on GFX11. Signed-off-by: Graham Sider Acked-by: Alex Deucher Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0561812aa0a43..5d9a34601a1ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -706,6 +706,13 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { + /* Temporary workaround to fix issues observed in some + * compute applications when GFXOFF is enabled on GFX11. + */ + if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) { + pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); + amdgpu_gfx_off_ctrl(adev, idle); + } amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle); From 8fe8ce896c1cc29d6bfebb3c7b3cc948f72cd32c Mon Sep 17 00:00:00 2001 From: Gavin Wan Date: Wed, 26 Oct 2022 13:45:25 -0400 Subject: [PATCH 190/239] drm/amdgpu: Disable GPU reset on SRIOV before remove pci. The recent change brought a bug on SRIOV envrionment. It caused unloading amdgpu failed on Guest VM. The reason is that the VF FLR was requested while unloading amdgpu driver, but the VF FLR of SRIOV sequence is wrong while removing PCI device. For SRIOV, the guest driver should not trigger the whole XGMI hive to do the reset. Host driver control how the device been reset. Fixes: f5c7e7797060 ("drm/amdgpu: Adjust removal control flow for smu v13_0_2") Acked-by: Alex Deucher Reviewed-by: Shaoyun Liu Signed-off-by: Gavin Wan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 3c9fecdd6b2f3..bf2d50c8c92ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2201,7 +2201,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) pm_runtime_forbid(dev->dev); } - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)) { + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && + !amdgpu_sriov_vf(adev)) { bool need_to_reset_gpu = false; if (adev->gmc.xgmi.num_physical_nodes > 1) { From bad610c97c08eef3ed1fa769a8b08b94f95b451e Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 7 Oct 2022 12:43:26 -0400 Subject: [PATCH 191/239] drm/amd/display: Fix DCN32 DSC delay calculation [Why] DCN32 DSC delay calculation had an unintentional integer division, resulting in a mismatch against the DML spreadsheet. [How] Cast numerator to double before performing the division. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: George Shen Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index ad66e241f9ae2..4a3e7a5d2758e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1746,7 +1746,7 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, } DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * - dml_ceil(DSCDelayRequirement_val / HActive, 1); + dml_ceil((double)DSCDelayRequirement_val / HActive, 1); DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; From ab007e5db5d3b8b8975c7eec69992ff38fe2a46c Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 14 Oct 2022 17:36:32 -0400 Subject: [PATCH 192/239] drm/amd/display: Use forced DSC bpp in DML [Why] DSC config is calculated separately from DML calculations. DML should use these separately calculated DSC params. The issue is that the calculated bpp is not properly propagated into DML. [How] Correctly used forced_bpp value in DML. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: George Shen Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c | 2 +- drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 5b91660a6496b..60351b2891d07 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1627,7 +1627,7 @@ static void mode_support_configuration(struct vba_vars_st *v, && !mode_lib->vba.MSOOrODMSplitWithNonDPLink && !mode_lib->vba.NotEnoughLanesForMSO && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 - && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP + //&& !mode_lib->vba.DSCOnlyIfNecessaryWithBPP && !mode_lib->vba.DSC422NativeNotSupported && !mode_lib->vba.MPCCombineMethodIncompatible && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 03924aed8d5c7..8e6585dab20ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -625,7 +625,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.skip_dio_check[mode_lib->vba.NumberOfActivePlanes] = dout->is_virtual; - if (!dout->dsc_enable) + if (dout->dsc_enable) mode_lib->vba.ForcedOutputLinkBPP[mode_lib->vba.NumberOfActivePlanes] = dout->output_bpp; else mode_lib->vba.ForcedOutputLinkBPP[mode_lib->vba.NumberOfActivePlanes] = 0.0; From 8dc323133d74518e3b5b07242e2b2f088799ea6e Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 14 Oct 2022 17:40:08 -0400 Subject: [PATCH 193/239] drm/amd/display: Round up DST_after_scaler to nearest int [Why] The DST_after_scaler value that DML spreadsheet outputs is generally the driver value round up to the nearest int. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: George Shen Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c index a1276f6b9581b..395ae8761980f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c @@ -291,8 +291,8 @@ void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, dlg_regs->vready_after_vcount0); - dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_x_after_scaler = dml_ceil(get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1); + dst_y_after_scaler = dml_ceil(get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1); // do some adjustment on the dst_after scaler to account for odm combine mode dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler); From d5e0fb0d9dea545defb963ec1073bd9a1a8b5395 Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 14 Oct 2022 17:46:03 -0400 Subject: [PATCH 194/239] drm/amd/display: Add DSC delay factor workaround [Why] Certain 4K high refresh rate modes requiring DSC are exhibiting top of screen underflow corruption. Increasing the DSC delay by a factor of 6 percent stops the underflow for most use cases. [How] Multiply DSC delay requirement in DML by a factor. Add debug option to make this DSC delay factor configurable. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: George Shen Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 4 +++- .../gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c | 5 +++-- .../drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 5 +++-- .../drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h | 3 ++- drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 5 ++++- drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 3 +++ 7 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index bfc5474c0f4c9..737b221ca6890 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -852,6 +852,7 @@ struct dc_debug_options { bool enable_double_buffered_dsc_pg_support; bool enable_dp_dig_pixel_rate_div_policy; enum lttpr_mode lttpr_mode_override; + unsigned int dsc_delay_factor_wa_x1000; }; struct gpu_info_soc_bounding_box_v1_0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 819de0f110126..f37c9a6b3b7e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2359,9 +2359,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - } + /* DML DSC delay factor workaround */ + dcn3_2_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0; + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 60351b2891d07..47ff0a8bd1052 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -367,7 +367,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman mode_lib->vba.OutputBpp[k], mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k], mode_lib->vba.PixelClock[k], - mode_lib->vba.PixelClockBackEnd[k]); + mode_lib->vba.PixelClockBackEnd[k], mode_lib->vba.ip.dsc_delay_factor_wa); } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) @@ -2475,7 +2475,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.OutputBppPerState[i][k], mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k], - mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k]); + mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k], + mode_lib->vba.ip.dsc_delay_factor_wa); } for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 4a3e7a5d2758e..968924c491c18 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1726,7 +1726,8 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, enum output_format_class OutputFormat, enum output_encoder_class Output, double PixelClock, - double PixelClockBackEnd) + double PixelClockBackEnd, + double dsc_delay_factor_wa) { unsigned int DSCDelayRequirement_val; @@ -1764,7 +1765,7 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); #endif - return DSCDelayRequirement_val; + return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1); } void dml32_CalculateSurfaceSizeInMall( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 55cead0d42374..2c3827546ac77 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -327,7 +327,8 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, enum output_format_class OutputFormat, enum output_encoder_class Output, double PixelClock, - double PixelClockBackEnd); + double PixelClockBackEnd, + double dsc_delay_factor_wa); void dml32_CalculateSurfaceSizeInMall( unsigned int NumberOfActiveSurfaces, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 7352f75144101..ec0486efab147 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -29,6 +29,7 @@ #include "dcn321_fpu.h" #include "dcn32/dcn32_resource.h" #include "dcn321/dcn321_resource.h" +#include "dml/dcn32/display_mode_vba_util_32.h" #define DCN3_2_DEFAULT_DET_SIZE 256 @@ -538,9 +539,11 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - } + /* DML DSC delay factor workaround */ + dcn3_21_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0; + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index f33a8879b05ad..d7be01ac07514 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -364,6 +364,9 @@ struct _vcs_dpi_ip_params_st { unsigned int max_num_dp2p0_outputs; unsigned int max_num_dp2p0_streams; unsigned int VBlankNomDefaultUS; + + /* DM workarounds */ + double dsc_delay_factor_wa; // TODO: Remove after implementing root cause fix }; struct _vcs_dpi_display_xfc_params_st { From 7461016c5706eb8c477752bf69e5c9f5a38f502b Mon Sep 17 00:00:00 2001 From: Nevenko Stupar Date: Fri, 6 May 2022 16:32:38 -0400 Subject: [PATCH 195/239] drm/amd/display: Investigate tool reported FCLK P-state deviations [Why] Fix for some of the tool reported modes for FCLK P-state deviations and UCLK P-state deviations that are coming from DSC terms and/or Scaling terms causing MinActiveFCLKChangeLatencySupported and MaxActiveDRAMClockChangeLatencySupported incorrectly calculated in DML for these configurations. Reviewed-by: Chaitanya Dhere Acked-by: Jasdeep Dhillon Acked-by: Alex Hung Signed-off-by: Nevenko Stupar Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 47ff0a8bd1052..3d184679f129e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -364,7 +364,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { v->DSCDelay[k] = dml32_DSCDelayRequirement(mode_lib->vba.DSCEnabled[k], mode_lib->vba.ODMCombineEnabled[k], mode_lib->vba.DSCInputBitPerComponent[k], - mode_lib->vba.OutputBpp[k], mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k], + mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k], + mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k], mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k], mode_lib->vba.ip.dsc_delay_factor_wa); From 6640f8e5adb69a0550fe1d224d3ac64c10f00eef Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 13 Oct 2022 21:41:13 -0500 Subject: [PATCH 196/239] drm/amdkfd: update GFX11 CWSR trap handler With corresponding FW change fixes issue where triggering CWSR on a workgroup with waves in s_barrier wouldn't lead to a back-off and therefore cause a hang. Signed-off-by: Jay Cornwall Tested-by: Graham Sider Acked-by: Harish Kasiviswanathan Acked-by: Felix Kuehling Reviewed-by: Graham Sider Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 764 +++++++++--------- .../amd/amdkfd/cwsr_trap_handler_gfx10.asm | 6 + 2 files changed, 389 insertions(+), 381 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index c7118843db051..0c4c5499bb5cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -2495,442 +2495,444 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx11_hex[] = { - 0xbfa00001, 0xbfa0021e, + 0xbfa00001, 0xbfa00221, 0xb0804006, 0xb8f8f802, 0x9178ff78, 0x00020006, - 0xb8fbf803, 0xbf0d9f6d, - 0xbfa20006, 0x8b6eff78, - 0x00002000, 0xbfa10009, - 0x8b6eff6d, 0x00ff0000, - 0xbfa2001e, 0x8b6eff7b, - 0x00000400, 0xbfa20041, - 0xbf830010, 0xb8fbf803, - 0xbfa0fffa, 0x8b6eff7b, - 0x00000900, 0xbfa20015, - 0x8b6eff7b, 0x000071ff, - 0xbfa10008, 0x8b6fff7b, - 0x00007080, 0xbfa10001, - 0xbeee1287, 0xb8eff801, - 0x846e8c6e, 0x8b6e6f6e, - 0xbfa2000a, 0x8b6eff6d, - 0x00ff0000, 0xbfa20007, - 0xb8eef801, 0x8b6eff6e, - 0x00000800, 0xbfa20003, + 0xb8fbf803, 0xbf0d9e6d, + 0xbfa10001, 0xbfbd0000, + 0xbf0d9f6d, 0xbfa20006, + 0x8b6eff78, 0x00002000, + 0xbfa10009, 0x8b6eff6d, + 0x00ff0000, 0xbfa2001e, 0x8b6eff7b, 0x00000400, - 0xbfa20026, 0xbefa4d82, - 0xbf89fc07, 0x84fa887a, - 0xf4005bbd, 0xf8000010, - 0xbf89fc07, 0x846e976e, - 0x9177ff77, 0x00800000, - 0x8c776e77, 0xf4045bbd, - 0xf8000000, 0xbf89fc07, - 0xf4045ebd, 0xf8000008, - 0xbf89fc07, 0x8bee6e6e, - 0xbfa10001, 0xbe80486e, - 0x8b6eff6d, 0x01ff0000, - 0xbfa20005, 0x8c78ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xbfa00005, - 0x8b6eff6d, 0x01000000, - 0xbfa20002, 0x806c846c, - 0x826d806d, 0x8b6dff6d, - 0x0000ffff, 0x8bfe7e7e, - 0x8bea6a6a, 0xb978f802, - 0xbe804a6c, 0x8b6dff6d, - 0x0000ffff, 0xbefa0080, - 0xb97a0283, 0xbeee007e, - 0xbeef007f, 0xbefe0180, - 0xbefe4d84, 0xbf89fc07, - 0x8b7aff7f, 0x04000000, - 0x847a857a, 0x8c6d7a6d, - 0xbefa007e, 0x8b7bff7f, - 0x0000ffff, 0xbefe00c1, - 0xbeff00c1, 0xdca6c000, - 0x007a0000, 0x7e000280, - 0xbefe007a, 0xbeff007b, - 0xb8fb02dc, 0x847b997b, - 0xb8fa3b05, 0x807a817a, - 0xbf0d997b, 0xbfa20002, - 0x847a897a, 0xbfa00001, - 0x847a8a7a, 0xb8fb1e06, - 0x847b8a7b, 0x807a7b7a, + 0xbfa20041, 0xbf830010, + 0xb8fbf803, 0xbfa0fffa, + 0x8b6eff7b, 0x00000900, + 0xbfa20015, 0x8b6eff7b, + 0x000071ff, 0xbfa10008, + 0x8b6fff7b, 0x00007080, + 0xbfa10001, 0xbeee1287, + 0xb8eff801, 0x846e8c6e, + 0x8b6e6f6e, 0xbfa2000a, + 0x8b6eff6d, 0x00ff0000, + 0xbfa20007, 0xb8eef801, + 0x8b6eff6e, 0x00000800, + 0xbfa20003, 0x8b6eff7b, + 0x00000400, 0xbfa20026, + 0xbefa4d82, 0xbf89fc07, + 0x84fa887a, 0xf4005bbd, + 0xf8000010, 0xbf89fc07, + 0x846e976e, 0x9177ff77, + 0x00800000, 0x8c776e77, + 0xf4045bbd, 0xf8000000, + 0xbf89fc07, 0xf4045ebd, + 0xf8000008, 0xbf89fc07, + 0x8bee6e6e, 0xbfa10001, + 0xbe80486e, 0x8b6eff6d, + 0x01ff0000, 0xbfa20005, + 0x8c78ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbfa00005, 0x8b6eff6d, + 0x01000000, 0xbfa20002, + 0x806c846c, 0x826d806d, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb978f802, 0xbe804a6c, + 0x8b6dff6d, 0x0000ffff, + 0xbefa0080, 0xb97a0283, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbefe4d84, + 0xbf89fc07, 0x8b7aff7f, + 0x04000000, 0x847a857a, + 0x8c6d7a6d, 0xbefa007e, 0x8b7bff7f, 0x0000ffff, - 0x807aff7a, 0x00000200, - 0x807a7e7a, 0x827b807b, - 0xd7610000, 0x00010870, - 0xd7610000, 0x00010a71, - 0xd7610000, 0x00010c72, - 0xd7610000, 0x00010e73, - 0xd7610000, 0x00011074, - 0xd7610000, 0x00011275, - 0xd7610000, 0x00011476, - 0xd7610000, 0x00011677, - 0xd7610000, 0x00011a79, - 0xd7610000, 0x00011c7e, - 0xd7610000, 0x00011e7f, - 0xbefe00ff, 0x00003fff, - 0xbeff0080, 0xdca6c040, - 0x007a0000, 0xd760007a, - 0x00011d00, 0xd760007b, - 0x00011f00, 0xbefe007a, - 0xbeff007b, 0xbef4007e, - 0x8b75ff7f, 0x0000ffff, - 0x8c75ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x10807fac, 0xbef1007d, - 0xbef00080, 0xb8f302dc, - 0x84739973, 0xbefe00c1, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00002, - 0xbeff00c1, 0xbfa00009, + 0xbefe00c1, 0xbeff00c1, + 0xdca6c000, 0x007a0000, + 0x7e000280, 0xbefe007a, + 0xbeff007b, 0xb8fb02dc, + 0x847b997b, 0xb8fa3b05, + 0x807a817a, 0xbf0d997b, + 0xbfa20002, 0x847a897a, + 0xbfa00001, 0x847a8a7a, + 0xb8fb1e06, 0x847b8a7b, + 0x807a7b7a, 0x8b7bff7f, + 0x0000ffff, 0x807aff7a, + 0x00000200, 0x807a7e7a, + 0x827b807b, 0xd7610000, + 0x00010870, 0xd7610000, + 0x00010a71, 0xd7610000, + 0x00010c72, 0xd7610000, + 0x00010e73, 0xd7610000, + 0x00011074, 0xd7610000, + 0x00011275, 0xd7610000, + 0x00011476, 0xd7610000, + 0x00011677, 0xd7610000, + 0x00011a79, 0xd7610000, + 0x00011c7e, 0xd7610000, + 0x00011e7f, 0xbefe00ff, + 0x00003fff, 0xbeff0080, + 0xdca6c040, 0x007a0000, + 0xd760007a, 0x00011d00, + 0xd760007b, 0x00011f00, + 0xbefe007a, 0xbeff007b, + 0xbef4007e, 0x8b75ff7f, + 0x0000ffff, 0x8c75ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x10807fac, + 0xbef1007d, 0xbef00080, + 0xb8f302dc, 0x84739973, + 0xbefe00c1, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00002, 0xbeff00c1, + 0xbfa00009, 0xbef600ff, + 0x01000000, 0xe0685080, + 0x701d0100, 0xe0685100, + 0x701d0200, 0xe0685180, + 0x701d0300, 0xbfa00008, 0xbef600ff, 0x01000000, - 0xe0685080, 0x701d0100, - 0xe0685100, 0x701d0200, - 0xe0685180, 0x701d0300, - 0xbfa00008, 0xbef600ff, - 0x01000000, 0xe0685100, - 0x701d0100, 0xe0685200, - 0x701d0200, 0xe0685300, - 0x701d0300, 0xb8f03b05, - 0x80708170, 0xbf0d9973, - 0xbfa20002, 0x84708970, - 0xbfa00001, 0x84708a70, - 0xb8fa1e06, 0x847a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0xbef600ff, - 0x01000000, 0x7e000280, - 0x7e020280, 0x7e040280, - 0xbefd0080, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xd7610002, 0x0000fa6c, - 0x807d817d, 0x917aff6d, - 0x80000000, 0xd7610002, - 0x0000fa7a, 0x807d817d, - 0xd7610002, 0x0000fa6e, - 0x807d817d, 0xd7610002, - 0x0000fa6f, 0x807d817d, - 0xd7610002, 0x0000fa78, - 0x807d817d, 0xb8faf803, - 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xd7610002, - 0x0000fa7b, 0x807d817d, - 0xb8f1f801, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f814, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f815, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xbefe00ff, 0x0000ffff, - 0xbeff0080, 0xe0685000, - 0x701d0200, 0xbefe00c1, + 0xe0685100, 0x701d0100, + 0xe0685200, 0x701d0200, + 0xe0685300, 0x701d0300, 0xb8f03b05, 0x80708170, 0xbf0d9973, 0xbfa20002, 0x84708970, 0xbfa00001, 0x84708a70, 0xb8fa1e06, 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, 0xbef600ff, 0x01000000, - 0xbef90080, 0xbefd0080, - 0xbf800000, 0xbe804100, - 0xbe824102, 0xbe844104, - 0xbe864106, 0xbe884108, - 0xbe8a410a, 0xbe8c410c, - 0xbe8e410e, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, + 0x7e000280, 0x7e020280, + 0x7e040280, 0xbefd0080, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xd7610002, + 0x0000fa6c, 0x807d817d, + 0x917aff6d, 0x80000000, + 0xd7610002, 0x0000fa7a, + 0x807d817d, 0xd7610002, + 0x0000fa6e, 0x807d817d, + 0xd7610002, 0x0000fa6f, + 0x807d817d, 0xd7610002, + 0x0000fa78, 0x807d817d, + 0xb8faf803, 0xd7610002, + 0x0000fa7a, 0x807d817d, + 0xd7610002, 0x0000fa7b, + 0x807d817d, 0xb8f1f801, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8f1f814, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8f1f815, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xbefe00ff, + 0x0000ffff, 0xbeff0080, + 0xe0685000, 0x701d0200, + 0xbefe00c1, 0xb8f03b05, + 0x80708170, 0xbf0d9973, + 0xbfa20002, 0x84708970, + 0xbfa00001, 0x84708a70, + 0xb8fa1e06, 0x847a8a7a, + 0x80707a70, 0xbef600ff, + 0x01000000, 0xbef90080, + 0xbefd0080, 0xbf800000, + 0xbe804100, 0xbe824102, + 0xbe844104, 0xbe864106, + 0xbe884108, 0xbe8a410a, + 0xbe8c410c, 0xbe8e410e, + 0xd7610002, 0x0000f200, 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, 0x80798179, 0xd7610002, - 0x0000f20c, 0x80798179, - 0xd7610002, 0x0000f20d, + 0x0000f20b, 0x80798179, + 0xd7610002, 0x0000f20c, 0x80798179, 0xd7610002, - 0x0000f20e, 0x80798179, - 0xd7610002, 0x0000f20f, - 0x80798179, 0xbf06a079, - 0xbfa10006, 0xe0685000, - 0x701d0200, 0x8070ff70, - 0x00000080, 0xbef90080, - 0x7e040280, 0x807d907d, - 0xbf0aff7d, 0x00000060, - 0xbfa2ffbc, 0xbe804100, - 0xbe824102, 0xbe844104, - 0xbe864106, 0xbe884108, - 0xbe8a410a, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, + 0x0000f20d, 0x80798179, + 0xd7610002, 0x0000f20e, 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, + 0x0000f20f, 0x80798179, + 0xbf06a079, 0xbfa10006, + 0xe0685000, 0x701d0200, + 0x8070ff70, 0x00000080, + 0xbef90080, 0x7e040280, + 0x807d907d, 0xbf0aff7d, + 0x00000060, 0xbfa2ffbc, + 0xbe804100, 0xbe824102, + 0xbe844104, 0xbe864106, + 0xbe884108, 0xbe8a410a, + 0xd7610002, 0x0000f200, 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, - 0x80798179, 0xe0685000, - 0x701d0200, 0xbefe00c1, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8fb4306, - 0x8b7bc17b, 0xbfa10044, - 0xbfbd0000, 0x8b7aff6d, - 0x80000000, 0xbfa10040, - 0x847b867b, 0x847b827b, - 0xbef6007b, 0xb8f03b05, - 0x80708170, 0xbf0d9973, - 0xbfa20002, 0x84708970, - 0xbfa00001, 0x84708a70, - 0xb8fa1e06, 0x847a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xd71f0000, - 0x000100c1, 0xd7200000, - 0x000200c1, 0x16000084, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbefd0080, - 0xbfa20012, 0xbe8300ff, - 0x00000080, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf890000, 0xe0685000, - 0x701d0100, 0x807d037d, - 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000080, - 0xbf0a7b7d, 0xbfa2fff4, - 0xbfa00011, 0xbe8300ff, - 0x00000100, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf890000, 0xe0685000, - 0x701d0100, 0x807d037d, - 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000100, - 0xbf0a7b7d, 0xbfa2fff4, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, + 0x80798179, 0xd7610002, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, + 0x80798179, 0xd7610002, + 0x0000f20b, 0x80798179, + 0xe0685000, 0x701d0200, 0xbefe00c1, 0x857d9973, 0x8b7d817d, 0xbf06817d, - 0xbfa20004, 0xbef000ff, - 0x00000200, 0xbeff0080, - 0xbfa00003, 0xbef000ff, - 0x00000400, 0xbeff00c1, - 0xb8fb3b05, 0x807b817b, - 0x847b827b, 0x857d9973, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8fb4306, 0x8b7bc17b, + 0xbfa10044, 0xbfbd0000, + 0x8b7aff6d, 0x80000000, + 0xbfa10040, 0x847b867b, + 0x847b827b, 0xbef6007b, + 0xb8f03b05, 0x80708170, + 0xbf0d9973, 0xbfa20002, + 0x84708970, 0xbfa00001, + 0x84708a70, 0xb8fa1e06, + 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xd71f0000, 0x000100c1, + 0xd7200000, 0x000200c1, + 0x16000084, 0x857d9973, 0x8b7d817d, 0xbf06817d, - 0xbfa20017, 0xbef600ff, - 0x01000000, 0xbefd0084, - 0xbf0a7b7d, 0xbfa10037, - 0x7e008700, 0x7e028701, - 0x7e048702, 0x7e068703, - 0xe0685000, 0x701d0000, - 0xe0685080, 0x701d0100, - 0xe0685100, 0x701d0200, - 0xe0685180, 0x701d0300, - 0x807d847d, 0x8070ff70, - 0x00000200, 0xbf0a7b7d, - 0xbfa2ffef, 0xbfa00025, + 0xbefd0080, 0xbfa20012, + 0xbe8300ff, 0x00000080, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf890000, + 0xe0685000, 0x701d0100, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000080, 0xbf0a7b7d, + 0xbfa2fff4, 0xbfa00011, + 0xbe8300ff, 0x00000100, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf890000, + 0xe0685000, 0x701d0100, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a7b7d, + 0xbfa2fff4, 0xbefe00c1, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20004, + 0xbef000ff, 0x00000200, + 0xbeff0080, 0xbfa00003, + 0xbef000ff, 0x00000400, + 0xbeff00c1, 0xb8fb3b05, + 0x807b817b, 0x847b827b, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20017, 0xbef600ff, 0x01000000, 0xbefd0084, 0xbf0a7b7d, - 0xbfa10011, 0x7e008700, + 0xbfa10037, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0685000, - 0x701d0000, 0xe0685100, - 0x701d0100, 0xe0685200, - 0x701d0200, 0xe0685300, + 0x701d0000, 0xe0685080, + 0x701d0100, 0xe0685100, + 0x701d0200, 0xe0685180, 0x701d0300, 0x807d847d, - 0x8070ff70, 0x00000400, + 0x8070ff70, 0x00000200, 0xbf0a7b7d, 0xbfa2ffef, - 0xb8fb1e06, 0x8b7bc17b, - 0xbfa1000c, 0x847b837b, - 0x807b7d7b, 0xbefe00c1, - 0xbeff0080, 0x7e008700, + 0xbfa00025, 0xbef600ff, + 0x01000000, 0xbefd0084, + 0xbf0a7b7d, 0xbfa10011, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, 0xe0685000, 0x701d0000, - 0x807d817d, 0x8070ff70, - 0x00000080, 0xbf0a7b7d, - 0xbfa2fff8, 0xbfa00146, - 0xbef4007e, 0x8b75ff7f, - 0x0000ffff, 0x8c75ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x10807fac, - 0xb8f202dc, 0x84729972, - 0x8b6eff7f, 0x04000000, - 0xbfa1003a, 0xbefe00c1, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8ef4306, - 0x8b6fc16f, 0xbfa1002f, - 0x846f866f, 0x846f826f, - 0xbef6006f, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x8078ff78, - 0x00000080, 0xbef600ff, - 0x01000000, 0x857d9972, - 0x8b7d817d, 0xbf06817d, - 0xbefd0080, 0xbfa2000c, - 0xe0500000, 0x781d0000, - 0xbf8903f7, 0xdac00000, - 0x00000000, 0x807dff7d, - 0x00000080, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff5, 0xbfa0000b, - 0xe0500000, 0x781d0000, - 0xbf8903f7, 0xdac00000, - 0x00000000, 0x807dff7d, - 0x00000100, 0x8078ff78, - 0x00000100, 0xbf0a6f7d, - 0xbfa2fff5, 0xbef80080, + 0xe0685100, 0x701d0100, + 0xe0685200, 0x701d0200, + 0xe0685300, 0x701d0300, + 0x807d847d, 0x8070ff70, + 0x00000400, 0xbf0a7b7d, + 0xbfa2ffef, 0xb8fb1e06, + 0x8b7bc17b, 0xbfa1000c, + 0x847b837b, 0x807b7d7b, + 0xbefe00c1, 0xbeff0080, + 0x7e008700, 0xe0685000, + 0x701d0000, 0x807d817d, + 0x8070ff70, 0x00000080, + 0xbf0a7b7d, 0xbfa2fff8, + 0xbfa00146, 0xbef4007e, + 0x8b75ff7f, 0x0000ffff, + 0x8c75ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x10807fac, 0xb8f202dc, + 0x84729972, 0x8b6eff7f, + 0x04000000, 0xbfa1003a, 0xbefe00c1, 0x857d9972, 0x8b7d817d, 0xbf06817d, 0xbfa20002, 0xbeff0080, 0xbfa00001, 0xbeff00c1, - 0xb8ef3b05, 0x806f816f, - 0x846f826f, 0x857d9972, - 0x8b7d817d, 0xbf06817d, - 0xbfa20024, 0xbef600ff, - 0x01000000, 0xbeee0078, + 0xb8ef4306, 0x8b6fc16f, + 0xbfa1002f, 0x846f866f, + 0x846f826f, 0xbef6006f, + 0xb8f83b05, 0x80788178, + 0xbf0d9972, 0xbfa20002, + 0x84788978, 0xbfa00001, + 0x84788a78, 0xb8ee1e06, + 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0xbefd0084, 0xbf0a6f7d, - 0xbfa10050, 0xe0505000, - 0x781d0000, 0xe0505080, - 0x781d0100, 0xe0505100, - 0x781d0200, 0xe0505180, - 0x781d0300, 0xbf8903f7, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807d847d, 0x8078ff78, - 0x00000200, 0xbf0a6f7d, - 0xbfa2ffee, 0xe0505000, - 0x6e1d0000, 0xe0505080, - 0x6e1d0100, 0xe0505100, - 0x6e1d0200, 0xe0505180, - 0x6e1d0300, 0xbf8903f7, - 0xbfa00034, 0xbef600ff, - 0x01000000, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefd0084, 0xbf0a6f7d, - 0xbfa10012, 0xe0505000, - 0x781d0000, 0xe0505100, - 0x781d0100, 0xe0505200, - 0x781d0200, 0xe0505300, - 0x781d0300, 0xbf8903f7, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807d847d, 0x8078ff78, - 0x00000400, 0xbf0a6f7d, - 0xbfa2ffee, 0xb8ef1e06, - 0x8b6fc16f, 0xbfa1000e, - 0x846f836f, 0x806f7d6f, - 0xbefe00c1, 0xbeff0080, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbefd0080, + 0xbfa2000c, 0xe0500000, + 0x781d0000, 0xbf8903f7, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000080, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff5, + 0xbfa0000b, 0xe0500000, + 0x781d0000, 0xbf8903f7, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000100, + 0x8078ff78, 0x00000100, + 0xbf0a6f7d, 0xbfa2fff5, + 0xbef80080, 0xbefe00c1, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbfa20002, + 0xbeff0080, 0xbfa00001, + 0xbeff00c1, 0xb8ef3b05, + 0x806f816f, 0x846f826f, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbfa20024, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000200, 0xbefd0084, + 0xbf0a6f7d, 0xbfa10050, 0xe0505000, 0x781d0000, + 0xe0505080, 0x781d0100, + 0xe0505100, 0x781d0200, + 0xe0505180, 0x781d0300, 0xbf8903f7, 0x7e008500, - 0x807d817d, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff7, 0xbeff00c1, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807d847d, + 0x8078ff78, 0x00000200, + 0xbf0a6f7d, 0xbfa2ffee, 0xe0505000, 0x6e1d0000, - 0xe0505100, 0x6e1d0100, - 0xe0505200, 0x6e1d0200, - 0xe0505300, 0x6e1d0300, - 0xbf8903f7, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x80f8ff78, - 0x00000050, 0xbef600ff, - 0x01000000, 0xbefd00ff, - 0x0000006c, 0x80f89078, - 0xf428403a, 0xf0000000, - 0xbf89fc07, 0x80fd847d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0x80f8a078, - 0xf42c403a, 0xf0000000, - 0xbf89fc07, 0x80fd887d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0x80f8c078, - 0xf430403a, 0xf0000000, - 0xbf89fc07, 0x80fd907d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0xbe884308, - 0xbe8a430a, 0xbe8c430c, - 0xbe8e430e, 0xbf06807d, - 0xbfa1fff0, 0xb980f801, - 0x00000000, 0xbfbd0000, + 0xe0505080, 0x6e1d0100, + 0xe0505100, 0x6e1d0200, + 0xe0505180, 0x6e1d0300, + 0xbf8903f7, 0xbfa00034, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefd0084, + 0xbf0a6f7d, 0xbfa10012, + 0xe0505000, 0x781d0000, + 0xe0505100, 0x781d0100, + 0xe0505200, 0x781d0200, + 0xe0505300, 0x781d0300, + 0xbf8903f7, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807d847d, + 0x8078ff78, 0x00000400, + 0xbf0a6f7d, 0xbfa2ffee, + 0xb8ef1e06, 0x8b6fc16f, + 0xbfa1000e, 0x846f836f, + 0x806f7d6f, 0xbefe00c1, + 0xbeff0080, 0xe0505000, + 0x781d0000, 0xbf8903f7, + 0x7e008500, 0x807d817d, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff7, + 0xbeff00c1, 0xe0505000, + 0x6e1d0000, 0xe0505100, + 0x6e1d0100, 0xe0505200, + 0x6e1d0200, 0xe0505300, + 0x6e1d0300, 0xbf8903f7, 0xb8f83b05, 0x80788178, 0xbf0d9972, 0xbfa20002, 0x84788978, 0xbfa00001, 0x84788a78, 0xb8ee1e06, 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, + 0x80f8ff78, 0x00000050, 0xbef600ff, 0x01000000, - 0xf4205bfa, 0xf0000000, - 0x80788478, 0xf4205b3a, + 0xbefd00ff, 0x0000006c, + 0x80f89078, 0xf428403a, + 0xf0000000, 0xbf89fc07, + 0x80fd847d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0x80f8a078, 0xf42c403a, + 0xf0000000, 0xbf89fc07, + 0x80fd887d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0x80f8c078, 0xf430403a, + 0xf0000000, 0xbf89fc07, + 0x80fd907d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0xbe884308, 0xbe8a430a, + 0xbe8c430c, 0xbe8e430e, + 0xbf06807d, 0xbfa1fff0, + 0xb980f801, 0x00000000, + 0xbfbd0000, 0xb8f83b05, + 0x80788178, 0xbf0d9972, + 0xbfa20002, 0x84788978, + 0xbfa00001, 0x84788a78, + 0xb8ee1e06, 0x846e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0xbef600ff, + 0x01000000, 0xf4205bfa, 0xf0000000, 0x80788478, - 0xf4205b7a, 0xf0000000, - 0x80788478, 0xf4205c3a, + 0xf4205b3a, 0xf0000000, + 0x80788478, 0xf4205b7a, 0xf0000000, 0x80788478, - 0xf4205c7a, 0xf0000000, - 0x80788478, 0xf4205eba, + 0xf4205c3a, 0xf0000000, + 0x80788478, 0xf4205c7a, 0xf0000000, 0x80788478, - 0xf4205efa, 0xf0000000, - 0x80788478, 0xf4205e7a, + 0xf4205eba, 0xf0000000, + 0x80788478, 0xf4205efa, 0xf0000000, 0x80788478, - 0xf4205cfa, 0xf0000000, - 0x80788478, 0xf4205bba, + 0xf4205e7a, 0xf0000000, + 0x80788478, 0xf4205cfa, 0xf0000000, 0x80788478, - 0xbf89fc07, 0xb96ef814, 0xf4205bba, 0xf0000000, 0x80788478, 0xbf89fc07, - 0xb96ef815, 0xbefd006f, - 0xbefe0070, 0xbeff0071, - 0x8b6f7bff, 0x000003ff, - 0xb96f4803, 0x8b6f7bff, - 0xfffff800, 0x856f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee3b05, 0x806e816e, - 0xbf0d9972, 0xbfa20002, - 0x846e896e, 0xbfa00001, - 0x846e8a6e, 0xb8ef1e06, - 0x846f8a6f, 0x806e6f6e, - 0x806eff6e, 0x00000200, - 0x806e746e, 0x826f8075, - 0x8b6fff6f, 0x0000ffff, - 0xf4085c37, 0xf8000050, - 0xf4085d37, 0xf8000060, - 0xf4005e77, 0xf8000074, - 0xbf89fc07, 0x8b6dff6d, - 0x0000ffff, 0x8bfe7e7e, - 0x8bea6a6a, 0xb8eef802, - 0xbf0d866e, 0xbfa20002, - 0xb97af802, 0xbe80486c, - 0xb97af802, 0xbe804a6c, - 0xbfb00000, 0xbf9f0000, + 0xb96ef814, 0xf4205bba, + 0xf0000000, 0x80788478, + 0xbf89fc07, 0xb96ef815, + 0xbefd006f, 0xbefe0070, + 0xbeff0071, 0x8b6f7bff, + 0x000003ff, 0xb96f4803, + 0x8b6f7bff, 0xfffff800, + 0x856f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee3b05, + 0x806e816e, 0xbf0d9972, + 0xbfa20002, 0x846e896e, + 0xbfa00001, 0x846e8a6e, + 0xb8ef1e06, 0x846f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x8b6fff6f, + 0x0000ffff, 0xf4085c37, + 0xf8000050, 0xf4085d37, + 0xf8000060, 0xf4005e77, + 0xf8000074, 0xbf89fc07, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb8eef802, 0xbf0d866e, + 0xbfa20002, 0xb97af802, + 0xbe80486c, 0xb97af802, + 0xbe804a6c, 0xbfb00000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0x00000000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 0f81670f6f9c6..8b92c33c2a7c5 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -186,6 +186,12 @@ L_SKIP_RESTORE: s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) #if SW_SA_TRAP + // If ttmp1[30] is set then issue s_barrier to unblock dependent waves. + s_bitcmp1_b32 s_save_pc_hi, 30 + s_cbranch_scc0 L_TRAP_NO_BARRIER + s_barrier + +L_TRAP_NO_BARRIER: // If ttmp1[31] is set then trap may occur early. // Spin wait until SAVECTX exception is raised. s_bitcmp1_b32 s_save_pc_hi, 31 From 711f8c3fb3db61897080468586b970c87c61d9e4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:32 -0700 Subject: [PATCH 197/239] Bluetooth: L2CAP: Fix accepting connection request for invalid SPSM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Bluetooth spec states that the valid range for SPSM is from 0x0001-0x00ff so it is invalid to accept values outside of this range: BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A page 1059: Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges CVE: CVE-2022-42896 CC: stable@vger.kernel.org Reported-by: Tamás Koczka Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An --- net/bluetooth/l2cap_core.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 1fbe087d6ae4e..3eee915fb2456 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5813,6 +5813,19 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm), scid, mtu, mps); + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A + * page 1059: + * + * Valid range: 0x0001-0x00ff + * + * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges + */ + if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { + result = L2CAP_CR_LE_BAD_PSM; + chan = NULL; + goto response; + } + /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); @@ -6001,6 +6014,18 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, psm = req->psm; + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A + * page 1059: + * + * Valid range: 0x0001-0x00ff + * + * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges + */ + if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { + result = L2CAP_CR_LE_BAD_PSM; + goto response; + } + BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); memset(&pdu, 0, sizeof(pdu)); From f937b758a188d6fd328a81367087eddbb2fce50f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:33 -0700 Subject: [PATCH 198/239] Bluetooth: L2CAP: Fix l2cap_global_chan_by_psm l2cap_global_chan_by_psm shall not return fixed channels as they are not meant to be connected by (S)PSM. Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 3eee915fb2456..4d8a1d862f6bb 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1990,7 +1990,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) continue; - if (c->psm == psm) { + if (c->chan_type != L2CAP_CHAN_FIXED && c->psm == psm) { int src_match, dst_match; int src_any, dst_any; From b1a2cd50c0357f243b7435a732b4e62ba3157a2e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:52 -0700 Subject: [PATCH 199/239] Bluetooth: L2CAP: Fix attempting to access uninitialized memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On l2cap_parse_conf_req the variable efs is only initialized if remote_efs has been set. CVE: CVE-2022-42895 CC: stable@vger.kernel.org Reported-by: Tamás Koczka Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4d8a1d862f6bb..9c24947aa41ef 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3764,7 +3764,8 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); - if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { + if (remote_efs && + test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->remote_id = efs.id; chan->remote_stype = efs.stype; chan->remote_msdu = le16_to_cpu(efs.msdu); From 40e4eb324c59e11fcb927aa46742d28aba6ecb8a Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 21:26:45 +0800 Subject: [PATCH 200/239] net: mdio: fix undefined behavior in bit shift for __mdiobus_register Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in drivers/net/phy/mdio_bus.c:586:27 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c __mdiobus_register+0x49d/0x4e0 fixed_mdio_bus_init+0xd8/0x12d do_one_initcall+0x76/0x430 kernel_init_freeable+0x3b3/0x422 kernel_init+0x24/0x1e0 ret_from_fork+0x1f/0x30 Fixes: 4fd5f812c23c ("phylib: allow incremental scanning of an mii bus") Signed-off-by: Gaosheng Cui Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221031132645.168421-1-cuigaosheng1@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index f82090bdf7ab8..1cd604cd1fa1b 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -583,7 +583,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) } for (i = 0; i < PHY_MAX_ADDR; i++) { - if ((bus->phy_mask & (1 << i)) == 0) { + if ((bus->phy_mask & BIT(i)) == 0) { struct phy_device *phydev; phydev = mdiobus_scan(bus, i); From d6dd2fe71153f0ff748bf188bd4af076fe09a0a6 Mon Sep 17 00:00:00 2001 From: Nick Child Date: Mon, 31 Oct 2022 10:06:42 -0500 Subject: [PATCH 201/239] ibmvnic: Free rwi on reset success Free the rwi structure in the event that the last rwi in the list processed successfully. The logic in commit 4f408e1fa6e1 ("ibmvnic: retry reset if there are no other resets") introduces an issue that results in a 32 byte memory leak whenever the last rwi in the list gets processed. Fixes: 4f408e1fa6e1 ("ibmvnic: retry reset if there are no other resets") Signed-off-by: Nick Child Link: https://lore.kernel.org/r/20221031150642.13356-1-nnac123@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 65dbfbec487a3..9282381a438fe 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3007,19 +3007,19 @@ static void __ibmvnic_reset(struct work_struct *work) rwi = get_next_rwi(adapter); /* - * If there is another reset queued, free the previous rwi - * and process the new reset even if previous reset failed - * (the previous reset could have failed because of a fail - * over for instance, so process the fail over). - * * If there are no resets queued and the previous reset failed, * the adapter would be in an undefined state. So retry the * previous reset as a hard reset. + * + * Else, free the previous rwi and, if there is another reset + * queued, process the new reset even if previous reset failed + * (the previous reset could have failed because of a fail + * over for instance, so process the fail over). */ - if (rwi) - kfree(tmprwi); - else if (rc) + if (!rwi && rc) rwi = tmprwi; + else + kfree(tmprwi); if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER || rwi->reset_reason == VNIC_RESET_MOBILITY || rc)) From 2ae34111fe4eebb69986f6490015b57c88804373 Mon Sep 17 00:00:00 2001 From: Liu Peibao Date: Tue, 1 Nov 2022 14:02:18 +0800 Subject: [PATCH 202/239] stmmac: dwmac-loongson: fix invalid mdio_node In current code "plat->mdio_node" is always NULL, the mdio support is lost as there is no "mdio_bus_data". The original driver could work as the "mdio" variable is never set to false, which is described in commit ("stmmac: dwmac-loongson: fix uninitialized variable ......"). And after this commit merged, the "mdio" variable is always false, causing the mdio supoort logic lost. Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") Signed-off-by: Liu Peibao Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221101060218.16453-1-liupeibao@loongson.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 017dbbda0c1c4..79fa7870563b8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -51,7 +51,6 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id struct stmmac_resources res; struct device_node *np; int ret, i, phy_mode; - bool mdio = false; np = dev_of_node(&pdev->dev); @@ -69,12 +68,10 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id if (!plat) return -ENOMEM; + plat->mdio_node = of_get_child_by_name(np, "mdio"); if (plat->mdio_node) { - dev_err(&pdev->dev, "Found MDIO subnode\n"); - mdio = true; - } + dev_info(&pdev->dev, "Found MDIO subnode\n"); - if (mdio) { plat->mdio_bus_data = devm_kzalloc(&pdev->dev, sizeof(*plat->mdio_bus_data), GFP_KERNEL); From 62ff373da2534534c55debe6c724c7fe14adb97f Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 1 Nov 2022 17:37:22 +0800 Subject: [PATCH 203/239] net/smc: Fix possible leaked pernet namespace in smc_init() In smc_init(), register_pernet_subsys(&smc_net_stat_ops) is called without any error handling. If it fails, registering of &smc_net_ops won't be reverted. And if smc_nl_init() fails, &smc_net_stat_ops itself won't be reverted. This leaves wild ops in subsystem linkedlist and when another module tries to call register_pernet_operations() it triggers page fault: BUG: unable to handle page fault for address: fffffbfff81b964c RIP: 0010:register_pernet_operations+0x1b9/0x5f0 Call Trace: register_pernet_subsys+0x29/0x40 ebtables_init+0x58/0x1000 [ebtables] ... Fixes: 194730a9beb5 ("net/smc: Make SMC statistics network namespace aware") Signed-off-by: Chen Zhongjin Reviewed-by: Tony Lu Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/20221101093722.127223-1-chenzhongjin@huawei.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 3ccbf3c201cd2..e12d4fa5aece6 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3380,14 +3380,14 @@ static int __init smc_init(void) rc = register_pernet_subsys(&smc_net_stat_ops); if (rc) - return rc; + goto out_pernet_subsys; smc_ism_init(); smc_clc_init(); rc = smc_nl_init(); if (rc) - goto out_pernet_subsys; + goto out_pernet_subsys_stat; rc = smc_pnet_init(); if (rc) @@ -3480,6 +3480,8 @@ static int __init smc_init(void) smc_pnet_exit(); out_nl: smc_nl_exit(); +out_pernet_subsys_stat: + unregister_pernet_subsys(&smc_net_stat_ops); out_pernet_subsys: unregister_pernet_subsys(&smc_net_ops); From f8017317cb0b279b8ab98b0f3901a2e0ac880dad Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 1 Nov 2022 20:15:52 +0800 Subject: [PATCH 204/239] net, neigh: Fix null-ptr-deref in neigh_table_clear() When IPv6 module gets initialized but hits an error in the middle, kenel panic with: KASAN: null-ptr-deref in range [0x0000000000000598-0x000000000000059f] CPU: 1 PID: 361 Comm: insmod Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:__neigh_ifdown.isra.0+0x24b/0x370 RSP: 0018:ffff888012677908 EFLAGS: 00000202 ... Call Trace: neigh_table_clear+0x94/0x2d0 ndisc_cleanup+0x27/0x40 [ipv6] inet6_init+0x21c/0x2cb [ipv6] do_one_initcall+0xd3/0x4d0 do_init_module+0x1ae/0x670 ... Kernel panic - not syncing: Fatal exception When ipv6 initialization fails, it will try to cleanup and calls: neigh_table_clear() neigh_ifdown(tbl, NULL) pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev == NULL)) # dev_net(NULL) triggers null-ptr-deref. Fix it by passing NULL to pneigh_queue_purge() in neigh_ifdown() if dev is NULL, to make kernel not panic immediately. Fixes: 66ba215cb513 ("neigh: fix possible DoS due to net iface start/stop loop") Signed-off-by: Chen Zhongjin Reviewed-by: Eric Dumazet Reviewed-by: Denis V. Lunev Link: https://lore.kernel.org/r/20221101121552.21890-1-chenzhongjin@huawei.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3c4786b999070..a77a85e357e0a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -409,7 +409,7 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev)); + pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL); if (skb_queue_empty_lockless(&tbl->proxy_queue)) del_timer_sync(&tbl->proxy_timer); return 0; From 628ac04a75ed5ff13647e725f40192da22ef2be8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 1 Nov 2022 20:57:53 +0200 Subject: [PATCH 205/239] bridge: Fix flushing of dynamic FDB entries The following commands should result in all the dynamic FDB entries being flushed, but instead all the non-local (non-permanent) entries are flushed: # bridge fdb add 00:aa:bb:cc:dd:ee dev dummy1 master static # bridge fdb add 00:11:22:33:44:55 dev dummy1 master dynamic # ip link set dev br0 type bridge fdb_flush # bridge fdb show brport dummy1 00:00:00:00:00:01 master br0 permanent 33:33:00:00:00:01 self permanent 01:00:5e:00:00:01 self permanent This is because br_fdb_flush() works with FDB flags and not the corresponding enumerator values. Fix by passing the FDB flag instead. After the fix: # bridge fdb add 00:aa:bb:cc:dd:ee dev dummy1 master static # bridge fdb add 00:11:22:33:44:55 dev dummy1 master dynamic # ip link set dev br0 type bridge fdb_flush # bridge fdb show brport dummy1 00:aa:bb:cc:dd:ee master br0 static 00:00:00:00:00:01 master br0 permanent 33:33:00:00:00:01 self permanent 01:00:5e:00:00:01 self permanent Fixes: 1f78ee14eeac ("net: bridge: fdb: add support for fine-grained flushing") Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20221101185753.2120691-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_netlink.c | 2 +- net/bridge/br_sysfs_br.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5aeb3646e74c7..d087fd4c784ac 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1332,7 +1332,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_FDB_FLUSH]) { struct net_bridge_fdb_flush_desc desc = { - .flags_mask = BR_FDB_STATIC + .flags_mask = BIT(BR_FDB_STATIC) }; br_fdb_flush(br, &desc); diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 612e367fff20d..ea733542244c7 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -345,7 +345,7 @@ static int set_flush(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { struct net_bridge_fdb_flush_desc desc = { - .flags_mask = BR_FDB_STATIC + .flags_mask = BIT(BR_FDB_STATIC) }; br_fdb_flush(br, &desc); From 768b3c745fe5789f2430bdab02f35a9ad1148d97 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 2 Nov 2022 10:06:10 +0800 Subject: [PATCH 206/239] ipv6: fix WARNING in ip6_route_net_exit_late() During the initialization of ip6_route_net_init_late(), if file ipv6_route or rt6_stats fails to be created, the initialization is successful by default. Therefore, the ipv6_route or rt6_stats file doesn't be found during the remove in ip6_route_net_exit_late(). It will cause WRNING. The following is the stack information: name 'rt6_stats' WARNING: CPU: 0 PID: 9 at fs/proc/generic.c:712 remove_proc_entry+0x389/0x460 Modules linked in: Workqueue: netns cleanup_net RIP: 0010:remove_proc_entry+0x389/0x460 PKRU: 55555554 Call Trace: ops_exit_list+0xb0/0x170 cleanup_net+0x4ea/0xb00 process_one_work+0x9bf/0x1710 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Fixes: cdb1876192db ("[NETNS][IPV6] route6 - create route6 proc files for the namespace") Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221102020610.351330-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 69252eb462b2d..2f355f0ec32ac 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6555,10 +6555,16 @@ static void __net_exit ip6_route_net_exit(struct net *net) static int __net_init ip6_route_net_init_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops, - sizeof(struct ipv6_route_iter)); - proc_create_net_single("rt6_stats", 0444, net->proc_net, - rt6_stats_seq_show, NULL); + if (!proc_create_net("ipv6_route", 0, net->proc_net, + &ipv6_route_seq_ops, + sizeof(struct ipv6_route_iter))) + return -ENOMEM; + + if (!proc_create_net_single("rt6_stats", 0444, net->proc_net, + rt6_stats_seq_show, NULL)) { + remove_proc_entry("ipv6_route", net->proc_net); + return -ENOMEM; + } #endif return 0; } From 7433632c9ff68a991bd0bc38cabf354e9d2de410 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 1 Nov 2022 19:10:09 -0400 Subject: [PATCH 207/239] ring-buffer: Check for NULL cpu_buffer in ring_buffer_wake_waiters() On some machines the number of listed CPUs may be bigger than the actual CPUs that exist. The tracing subsystem allocates a per_cpu directory with access to the per CPU ring buffer via a cpuX file. But to save space, the ring buffer will only allocate buffers for online CPUs, even though the CPU array will be as big as the nr_cpu_ids. With the addition of waking waiters on the ring buffer when closing the file, the ring_buffer_wake_waiters() now needs to make sure that the buffer is allocated (with the irq_work allocated with it) before trying to wake waiters, as it will cause a NULL pointer dereference. While debugging this, I added a NULL check for the buffer itself (which is OK to do), and also NULL pointer checks against buffer->buffers (which is not fine, and will WARN) as well as making sure the CPU number passed in is within the nr_cpu_ids (which is also not fine if it isn't). Link: https://lore.kernel.org/all/87h6zklb6n.wl-tiwai@suse.de/ Link: https://lore.kernel.org/all/CAM6Wdxc0KRJMXVAA0Y=u6Jh2V=uWB-_Fn6M4xRuNppfXzL1mUg@mail.gmail.com/ Link: https://lkml.kernel.org/linux-trace-kernel/20221101191009.1e7378c8@rorschach.local.home Cc: stable@vger.kernel.org Cc: Steven Noonan Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=1204705 Reported-by: Takashi Iwai Reported-by: Roland Ruckerbauer Fixes: f3ddb74ad079 ("tracing: Wake up ring buffer waiters on closing of the file") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 199759c735196..9712083832f41 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -937,6 +937,9 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *rbwork; + if (!buffer) + return; + if (cpu == RING_BUFFER_ALL_CPUS) { /* Wake up individual ones too. One level recursion */ @@ -945,7 +948,15 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) rbwork = &buffer->irq_work; } else { + if (WARN_ON_ONCE(!buffer->buffers)) + return; + if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) + return; + cpu_buffer = buffer->buffers[cpu]; + /* The CPU buffer may not have been initialized yet */ + if (!cpu_buffer) + return; rbwork = &cpu_buffer->irq_work; } From 0e792b89e6800cd9cb4757a76a96f7ef3e8b6294 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Thu, 3 Nov 2022 11:10:10 +0800 Subject: [PATCH 208/239] ftrace: Fix use-after-free for dynamic ftrace_ops KASAN reported a use-after-free with ftrace ops [1]. It was found from vmcore that perf had registered two ops with the same content successively, both dynamic. After unregistering the second ops, a use-after-free occurred. In ftrace_shutdown(), when the second ops is unregistered, the FTRACE_UPDATE_CALLS command is not set because there is another enabled ops with the same content. Also, both ops are dynamic and the ftrace callback function is ftrace_ops_list_func, so the FTRACE_UPDATE_TRACE_FUNC command will not be set. Eventually the value of 'command' will be 0 and ftrace_shutdown() will skip the rcu synchronization. However, ftrace may be activated. When the ops is released, another CPU may be accessing the ops. Add the missing synchronization to fix this problem. [1] BUG: KASAN: use-after-free in __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline] BUG: KASAN: use-after-free in ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049 Read of size 8 at addr ffff56551965bbc8 by task syz-executor.2/14468 CPU: 1 PID: 14468 Comm: syz-executor.2 Not tainted 5.10.0 #7 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x40c arch/arm64/kernel/stacktrace.c:132 show_stack+0x30/0x40 arch/arm64/kernel/stacktrace.c:196 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b4/0x248 lib/dump_stack.c:118 print_address_description.constprop.0+0x28/0x48c mm/kasan/report.c:387 __kasan_report mm/kasan/report.c:547 [inline] kasan_report+0x118/0x210 mm/kasan/report.c:564 check_memory_region_inline mm/kasan/generic.c:187 [inline] __asan_load8+0x98/0xc0 mm/kasan/generic.c:253 __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline] ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049 ftrace_graph_call+0x0/0x4 __might_sleep+0x8/0x100 include/linux/perf_event.h:1170 __might_fault mm/memory.c:5183 [inline] __might_fault+0x58/0x70 mm/memory.c:5171 do_strncpy_from_user lib/strncpy_from_user.c:41 [inline] strncpy_from_user+0x1f4/0x4b0 lib/strncpy_from_user.c:139 getname_flags+0xb0/0x31c fs/namei.c:149 getname+0x2c/0x40 fs/namei.c:209 [...] Allocated by task 14445: kasan_save_stack+0x24/0x50 mm/kasan/common.c:48 kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:479 [inline] __kasan_kmalloc.constprop.0+0x110/0x13c mm/kasan/common.c:449 kasan_kmalloc+0xc/0x14 mm/kasan/common.c:493 kmem_cache_alloc_trace+0x440/0x924 mm/slub.c:2950 kmalloc include/linux/slab.h:563 [inline] kzalloc include/linux/slab.h:675 [inline] perf_event_alloc.part.0+0xb4/0x1350 kernel/events/core.c:11230 perf_event_alloc kernel/events/core.c:11733 [inline] __do_sys_perf_event_open kernel/events/core.c:11831 [inline] __se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723 __arm64_sys_perf_event_open+0x6c/0x80 kernel/events/core.c:11723 [...] Freed by task 14445: kasan_save_stack+0x24/0x50 mm/kasan/common.c:48 kasan_set_track+0x24/0x34 mm/kasan/common.c:56 kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:358 __kasan_slab_free.part.0+0x11c/0x1b0 mm/kasan/common.c:437 __kasan_slab_free mm/kasan/common.c:445 [inline] kasan_slab_free+0x2c/0x40 mm/kasan/common.c:446 slab_free_hook mm/slub.c:1569 [inline] slab_free_freelist_hook mm/slub.c:1608 [inline] slab_free mm/slub.c:3179 [inline] kfree+0x12c/0xc10 mm/slub.c:4176 perf_event_alloc.part.0+0xa0c/0x1350 kernel/events/core.c:11434 perf_event_alloc kernel/events/core.c:11733 [inline] __do_sys_perf_event_open kernel/events/core.c:11831 [inline] __se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723 [...] Link: https://lore.kernel.org/linux-trace-kernel/20221103031010.166498-1-lihuafei1@huawei.com Fixes: edb096e00724f ("ftrace: Fix memleak when unregistering dynamic ops when tracing disabled") Cc: stable@vger.kernel.org Suggested-by: Steven Rostedt Signed-off-by: Li Huafei Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fbf2543111c05..7dc023641bf10 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3028,18 +3028,8 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) command |= FTRACE_UPDATE_TRACE_FUNC; } - if (!command || !ftrace_enabled) { - /* - * If these are dynamic or per_cpu ops, they still - * need their data freed. Since, function tracing is - * not currently active, we can just free them - * without synchronizing all CPUs. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - goto free_ops; - - return 0; - } + if (!command || !ftrace_enabled) + goto out; /* * If the ops uses a trampoline, then it needs to be @@ -3076,6 +3066,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) removed_ops = NULL; ops->flags &= ~FTRACE_OPS_FL_REMOVING; +out: /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. @@ -3103,7 +3094,6 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) if (IS_ENABLED(CONFIG_PREEMPTION)) synchronize_rcu_tasks(); - free_ops: ftrace_trampoline_free(ops); } From 354d8a4b165697e6da9585b3f651d87735f30415 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 20 Oct 2022 13:21:43 +0200 Subject: [PATCH 209/239] x86/xen: silence smatch warning in pmu_msr_chk_emulated() Commit 8714f7bcd3c2 ("xen/pv: add fault recovery control to pmu msr accesses") introduced code resulting in a warning issued by the smatch static checker, claiming to use an uninitialized variable. This is a false positive, but work around the warning nevertheless. Fixes: 8714f7bcd3c2 ("xen/pv: add fault recovery control to pmu msr accesses") Reported-by: Dan Carpenter Signed-off-by: Juergen Gross --- arch/x86/xen/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 68aff13828728..246d67dab5109 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -302,7 +302,7 @@ static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read, bool *emul) { - int type, index; + int type, index = 0; if (is_amd_pmu_msr(msr)) *emul = xen_amd_pmu_emulate(msr, val, is_read); From 4bff677b30156435afa2cc4c3601b542b4ddd439 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 20 Oct 2022 13:25:12 +0200 Subject: [PATCH 210/239] x86/xen: simplify sysenter and syscall setup xen_enable_sysenter() and xen_enable_syscall() can be simplified a lot. While at it, switch to use cpu_feature_enabled() instead of boot_cpu_has(). Signed-off-by: Juergen Gross --- arch/x86/xen/setup.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index cfa99e8f054be..4f4309500559a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -910,17 +910,9 @@ static int register_callback(unsigned type, const void *func) void xen_enable_sysenter(void) { - int ret; - unsigned sysenter_feature; - - sysenter_feature = X86_FEATURE_SYSENTER32; - - if (!boot_cpu_has(sysenter_feature)) - return; - - ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); - if(ret != 0) - setup_clear_cpu_cap(sysenter_feature); + if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) && + register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat)) + setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); } void xen_enable_syscall(void) @@ -934,12 +926,9 @@ void xen_enable_syscall(void) mechanism for syscalls. */ } - if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { - ret = register_callback(CALLBACKTYPE_syscall32, - xen_entry_SYSCALL_compat); - if (ret != 0) - setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); - } + if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) && + register_callback(CALLBACKTYPE_syscall32, xen_entry_SYSCALL_compat)) + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } static void __init xen_pvmmu_arch_setup(void) From cf6ff0df0fd123493e57278a1bd4414a97511a34 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 31 Oct 2022 19:17:05 -0700 Subject: [PATCH 211/239] vsock: remove the unused 'wait' in vsock_connectible_recvmsg() Remove the unused variable introduced by 19c1b90e1979. Fixes: 19c1b90e1979 ("af_vsock: separate receive data loop") Signed-off-by: Dexuan Cui Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/vmw_vsock/af_vsock.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ee418701cdee9..d258fd43092ef 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2092,8 +2092,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, const struct vsock_transport *transport; int err; - DEFINE_WAIT(wait); - sk = sock->sk; vsk = vsock_sk(sk); err = 0; From 466a85336fee6e3b35eb97b8405a28302fd25809 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 31 Oct 2022 19:17:06 -0700 Subject: [PATCH 212/239] vsock: fix possible infinite sleep in vsock_connectible_wait_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently vsock_connectible_has_data() may miss a wakeup operation between vsock_connectible_has_data() == 0 and the prepare_to_wait(). Fix the race by adding the process to the wait queue before checking vsock_connectible_has_data(). Fixes: b3f7fd54881b ("af_vsock: separate wait data loop") Signed-off-by: Dexuan Cui Reviewed-by: Stefano Garzarella Reported-by: Frédéric Dalleau Tested-by: Frédéric Dalleau Signed-off-by: Paolo Abeni --- net/vmw_vsock/af_vsock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d258fd43092ef..884eca7f6743a 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1905,8 +1905,11 @@ static int vsock_connectible_wait_data(struct sock *sk, err = 0; transport = vsk->transport; - while ((data = vsock_connectible_has_data(vsk)) == 0) { + while (1) { prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE); + data = vsock_connectible_has_data(vsk); + if (data != 0) + break; if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) || From 23715a26c8d812912a70c6ac1ce67af649b95914 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 28 Oct 2022 16:39:14 +0200 Subject: [PATCH 213/239] arm64: efi: Recover from synchronous exceptions occurring in firmware Unlike x86, which has machinery to deal with page faults that occur during the execution of EFI runtime services, arm64 has nothing like that, and a synchronous exception raised by firmware code brings down the whole system. With more EFI based systems appearing that were not built to run Linux (such as the Windows-on-ARM laptops based on Qualcomm SOCs), as well as the introduction of PRM (platform specific firmware routines that are callable just like EFI runtime services), we are more likely to run into issues of this sort, and it is much more likely that we can identify and work around such issues if they don't bring down the system entirely. Since we already use a EFI runtime services call wrapper in assembler, we can quite easily add some code that captures the execution state at the point where the call is made, allowing us to revert to this state and proceed execution if the call triggered a synchronous exception. Given that the kernel and the firmware don't share any data structures that could end up in an indeterminate state, we can happily continue running, as long as we mark the EFI runtime services as unavailable from that point on. Signed-off-by: Ard Biesheuvel Acked-by: Catalin Marinas --- arch/arm64/include/asm/efi.h | 8 ++++++++ arch/arm64/kernel/efi-rt-wrapper.S | 33 ++++++++++++++++++++++++++++-- arch/arm64/kernel/efi.c | 26 +++++++++++++++++++++++ arch/arm64/mm/fault.c | 4 ++++ 4 files changed, 69 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 439e2bc5d5d8b..d6cf535d8352b 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,8 +14,16 @@ #ifdef CONFIG_EFI extern void efi_init(void); + +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() + +static inline +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) +{ + return false; +} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 75691a2641c1c..67babd5f04c27 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -6,7 +6,7 @@ #include SYM_FUNC_START(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-32]! + stp x29, x30, [sp, #-112]! mov x29, sp /* @@ -16,6 +16,20 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) */ stp x1, x18, [sp, #16] + /* + * Preserve all callee saved registers and record the stack pointer + * value in a per-CPU variable so we can recover from synchronous + * exceptions occurring while running the firmware routines. + */ + stp x19, x20, [sp, #32] + stp x21, x22, [sp, #48] + stp x23, x24, [sp, #64] + stp x25, x26, [sp, #80] + stp x27, x28, [sp, #96] + + adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 + str x29, [x8] + /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the @@ -31,7 +45,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ldp x1, x2, [sp, #16] cmp x2, x18 - ldp x29, x30, [sp], #32 + ldp x29, x30, [sp], #112 b.ne 0f ret 0: @@ -45,3 +59,18 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x18, x2 b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) + +SYM_FUNC_START(__efi_rt_asm_recover) + ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 + mov sp, x8 + + ldp x0, x18, [sp, #16] + ldp x19, x20, [sp, #32] + ldp x21, x22, [sp, #48] + ldp x23, x24, [sp, #64] + ldp x25, x26, [sp, #80] + ldp x27, x28, [sp, #96] + ldp x29, x30, [sp], #112 + + b efi_handle_runtime_exception +SYM_FUNC_END(__efi_rt_asm_recover) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index e1be6c429810d..8d36e66a6e64c 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,6 +9,7 @@ #include #include +#include #include @@ -128,3 +129,28 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } + +asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); + +asmlinkage efi_status_t __efi_rt_asm_recover(void); + +asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) +{ + pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return EFI_ABORTED; +} + +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) +{ + /* Check whether the exception occurred while running the firmware */ + if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) + return false; + + pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + dump_stack(); + + regs->pc = (u64)__efi_rt_asm_recover; + return true; +} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 5b391490e045b..3e9cf9826417a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -391,6 +392,9 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "paging request"; } + if (efi_runtime_fixup_exception(regs, msg)) + return; + die_kernel_fault(msg, addr, esr, regs); } From 85f1506337f0c79a4955edfeee86a18628e3735f Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 3 Nov 2022 13:52:32 +0530 Subject: [PATCH 214/239] arm64: cpufeature: Fix the visibility of compat hwcaps Commit 237405ebef58 ("arm64: cpufeature: Force HWCAP to be based on the sysreg visible to user-space") forced the hwcaps to use sanitised user-space view of the id registers. However, the ID register structures used to select few compat cpufeatures (vfp, crc32, ...) are masked and hence such hwcaps do not appear in /proc/cpuinfo anymore for PER_LINUX32 personality. Add the ID register structures explicitly and set the relevant entry as visible. As these ID registers are now of type visible so make them available in 64-bit userspace by making necessary changes in register emulation logic and documentation. While at it, update the comment for structure ftr_generic_32bits[] which lists the ID register that use it. Fixes: 237405ebef58 ("arm64: cpufeature: Force HWCAP to be based on the sysreg visible to user-space") Cc: Suzuki K Poulose Reviewed-by: James Morse Signed-off-by: Amit Daniel Kachhap Link: https://lore.kernel.org/r/20221103082232.19189-1-amit.kachhap@arm.com Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.rst | 38 ++++++++++++++++- arch/arm64/kernel/cpufeature.c | 42 +++++++++++++++---- 2 files changed, 70 insertions(+), 10 deletions(-) diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 04ba83e1965fb..c7adc7897df60 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -92,7 +92,7 @@ operation if the source belongs to the supported system register space. The infrastructure emulates only the following system register space:: - Op0=3, Op1=0, CRn=0, CRm=0,4,5,6,7 + Op0=3, Op1=0, CRn=0, CRm=0,2,3,4,5,6,7 (See Table C5-6 'System instruction encodings for non-Debug System register accesses' in ARMv8 ARM DDI 0487A.h, for the list of @@ -293,6 +293,42 @@ infrastructure: | WFXT | [3-0] | y | +------------------------------+---------+---------+ + 10) MVFR0_EL1 - AArch32 Media and VFP Feature Register 0 + + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | FPDP | [11-8] | y | + +------------------------------+---------+---------+ + + 11) MVFR1_EL1 - AArch32 Media and VFP Feature Register 1 + + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | SIMDFMAC | [31-28] | y | + +------------------------------+---------+---------+ + | SIMDSP | [19-16] | y | + +------------------------------+---------+---------+ + | SIMDInt | [15-12] | y | + +------------------------------+---------+---------+ + | SIMDLS | [11-8] | y | + +------------------------------+---------+---------+ + + 12) ID_ISAR5_EL1 - AArch32 Instruction Set Attribute Register 5 + + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | CRC32 | [19-16] | y | + +------------------------------+---------+---------+ + | SHA2 | [15-12] | y | + +------------------------------+---------+---------+ + | SHA1 | [11-8] | y | + +------------------------------+---------+---------+ + | AES | [7-4] | y | + +------------------------------+---------+---------+ + Appendix I: Example ------------------- diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6062454a90674..b3f37e2209ad3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -428,6 +428,30 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_mvfr0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPROUND_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSHVEC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSQRT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPDIVIDE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPTRAP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPDP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_SIMD_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static const struct arm64_ftr_bits ftr_mvfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDFMAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPHP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDHP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDSP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDINT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDLS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPDNAN_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPFTZ_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_mvfr2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_FPMISC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_SIMDMISC_SHIFT, 4, 0), @@ -458,10 +482,10 @@ static const struct arm64_ftr_bits ftr_id_isar0[] = { static const struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SEVL_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -574,7 +598,7 @@ static const struct arm64_ftr_bits ftr_smcr[] = { * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of * 0. Covers the following 32bit registers: - * id_isar[1-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] + * id_isar[1-3], id_mmfr[1-3] */ static const struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), @@ -645,8 +669,8 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6), /* Op1 = 0, CRn = 0, CRm = 3 */ - ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits), - ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_mvfr0), + ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_mvfr1), ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), ARM64_FTR_REG(SYS_ID_PFR2_EL1, ftr_id_pfr2), ARM64_FTR_REG(SYS_ID_DFR1_EL1, ftr_id_dfr1), @@ -3339,7 +3363,7 @@ static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *c /* * We emulate only the following system register space. - * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] + * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 2 - 7] * See Table C5-6 System instruction encodings for System register accesses, * ARMv8 ARM(ARM DDI 0487A.f) for more details. */ @@ -3349,7 +3373,7 @@ static inline bool __attribute_const__ is_emulated(u32 id) sys_reg_CRn(id) == 0x0 && sys_reg_Op1(id) == 0x0 && (sys_reg_CRm(id) == 0 || - ((sys_reg_CRm(id) >= 4) && (sys_reg_CRm(id) <= 7)))); + ((sys_reg_CRm(id) >= 2) && (sys_reg_CRm(id) <= 7)))); } /* From 4f1aa35f1fb7d51b125487c835982af792697ecb Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Tue, 27 Sep 2022 15:02:47 +0800 Subject: [PATCH 215/239] cxl/pmem: Use size_add() against integer overflow "struct_size() + n" may cause a integer overflow, use size_add() to handle it. Signed-off-by: Yu Zhe Link: https://lore.kernel.org/r/20220927070247.23148-1-yuzhe@nfschina.com Signed-off-by: Dan Williams --- drivers/cxl/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 115a7b79f3439..0bac05d804bc5 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -148,7 +148,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, return -EINVAL; /* 4-byte status follows the input data in the payload */ - if (struct_size(cmd, in_buf, cmd->in_length) + 4 > buf_len) + if (size_add(struct_size(cmd, in_buf, cmd->in_length), 4) > buf_len) return -EINVAL; set_lsa = From 4a6f316d6855a434f56dbbeba05e14c01acde8f8 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Fri, 4 Nov 2022 08:49:31 +0900 Subject: [PATCH 216/239] kprobe: reverse kp->flags when arm_kprobe failed In aggregate kprobe case, when arm_kprobe failed, we need set the kp->flags with KPROBE_FLAG_DISABLED again. If not, the 'kp' kprobe will been considered as enabled but it actually not enabled. Link: https://lore.kernel.org/all/20220902155820.34755-1-liq3ea@163.com/ Fixes: 12310e343755 ("kprobes: Propagate error from arm_kprobe_ftrace()") Cc: stable@vger.kernel.org Signed-off-by: Li Qiang Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/kprobes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3220b0a2fb4a3..cd9f5a66a6909 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2429,8 +2429,11 @@ int enable_kprobe(struct kprobe *kp) if (!kprobes_all_disarmed && kprobe_disabled(p)) { p->flags &= ~KPROBE_FLAG_DISABLED; ret = arm_kprobe(p); - if (ret) + if (ret) { p->flags |= KPROBE_FLAG_DISABLED; + if (p != kp) + kp->flags |= KPROBE_FLAG_DISABLED; + } } out: mutex_unlock(&kprobe_mutex); From d05ea35e7eea14d32f29fd688d3daeb9089de1a5 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Tue, 25 Oct 2022 00:12:08 -0300 Subject: [PATCH 217/239] fprobe: Check rethook_alloc() return in rethook initialization Check if fp->rethook succeeded to be allocated. Otherwise, if rethook_alloc() fails, then we end up dereferencing a NULL pointer in rethook_add_node(). Link: https://lore.kernel.org/all/20221025031209.954836-1-rafaelmendsr@gmail.com/ Fixes: 5b0ab78998e3 ("fprobe: Add exit_handler support") Cc: stable@vger.kernel.org Signed-off-by: Rafael Mendonca Acked-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/fprobe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index aac63ca9c3d1e..71614b2a67ff3 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -141,6 +141,8 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) return -E2BIG; fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler); + if (!fp->rethook) + return -ENOMEM; for (i = 0; i < size; i++) { struct fprobe_rethook_node *node; From 61b304b73ab4b48b1cd7796efe42a570e2a0e0fc Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 23 Oct 2022 11:11:43 +0900 Subject: [PATCH 218/239] tracing/fprobe: Fix to check whether fprobe is registered correctly Since commit ab51e15d535e ("fprobe: Introduce FPROBE_FL_KPROBE_SHARED flag for fprobe") introduced fprobe_kprobe_handler() for fprobe::ops::func, unregister_fprobe() fails to unregister the registered if user specifies FPROBE_FL_KPROBE_SHARED flag. Moreover, __register_ftrace_function() is possible to change the ftrace_ops::func, thus we have to check fprobe::ops::saved_func instead. To check it correctly, it should confirm the fprobe::ops::saved_func is either fprobe_handler() or fprobe_kprobe_handler(). Link: https://lore.kernel.org/all/166677683946.1459107.15997653945538644683.stgit@devnote3/ Fixes: cad9931f64dc ("fprobe: Add ftrace based probe APIs") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/fprobe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 71614b2a67ff3..e8143e3680744 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -303,7 +303,8 @@ int unregister_fprobe(struct fprobe *fp) { int ret; - if (!fp || fp->ops.func != fprobe_handler) + if (!fp || (fp->ops.saved_func != fprobe_handler && + fp->ops.saved_func != fprobe_kprobe_handler)) return -EINVAL; /* From 66f0919c953ef7b55e5ab94389a013da2ce80a2c Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Wed, 2 Nov 2022 15:29:54 +0800 Subject: [PATCH 219/239] tracing: kprobe: Fix memory leak in test_gen_kprobe/kretprobe_cmd() test_gen_kprobe_cmd() only free buf in fail path, hence buf will leak when there is no failure. Move kfree(buf) from fail path to common path to prevent the memleak. The same reason and solution in test_gen_kretprobe_cmd(). unreferenced object 0xffff888143b14000 (size 2048): comm "insmod", pid 52490, jiffies 4301890980 (age 40.553s) hex dump (first 32 bytes): 70 3a 6b 70 72 6f 62 65 73 2f 67 65 6e 5f 6b 70 p:kprobes/gen_kp 72 6f 62 65 5f 74 65 73 74 20 64 6f 5f 73 79 73 robe_test do_sys backtrace: [<000000006d7b836b>] kmalloc_trace+0x27/0xa0 [<0000000009528b5b>] 0xffffffffa059006f [<000000008408b580>] do_one_initcall+0x87/0x2a0 [<00000000c4980a7e>] do_init_module+0xdf/0x320 [<00000000d775aad0>] load_module+0x3006/0x3390 [<00000000e9a74b80>] __do_sys_finit_module+0x113/0x1b0 [<000000003726480d>] do_syscall_64+0x35/0x80 [<000000003441e93b>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Link: https://lore.kernel.org/all/20221102072954.26555-1-shangxiaojing@huawei.com/ Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") Cc: stable@vger.kernel.org Signed-off-by: Shang XiaoJing Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/kprobe_event_gen_test.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index 80e04a1e19772..d81f7c51025c7 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -100,20 +100,20 @@ static int __init test_gen_kprobe_cmd(void) KPROBE_GEN_TEST_FUNC, KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1); if (ret) - goto free; + goto out; /* Use kprobe_event_add_fields to add the rest of the fields */ ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3); if (ret) - goto free; + goto out; /* * This actually creates the event. */ ret = kprobe_event_gen_cmd_end(&cmd); if (ret) - goto free; + goto out; /* * Now get the gen_kprobe_test event file. We need to prevent @@ -136,13 +136,11 @@ static int __init test_gen_kprobe_cmd(void) goto delete; } out: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kprobe_test"); - free: - kfree(buf); - goto out; } @@ -170,14 +168,14 @@ static int __init test_gen_kretprobe_cmd(void) KPROBE_GEN_TEST_FUNC, "$retval"); if (ret) - goto free; + goto out; /* * This actually creates the event. */ ret = kretprobe_event_gen_cmd_end(&cmd); if (ret) - goto free; + goto out; /* * Now get the gen_kretprobe_test event file. We need to @@ -201,13 +199,11 @@ static int __init test_gen_kretprobe_cmd(void) goto delete; } out: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kretprobe_test"); - free: - kfree(buf); - goto out; } From 7beade0dd41d42d797ccb7791b134a77fcebf35b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 3 Nov 2022 13:33:10 -0700 Subject: [PATCH 220/239] x86/cpu: Add several Intel server CPU model numbers These servers are all on the public versions of the roadmap. The model numbers for Grand Ridge, Granite Rapids, and Sierra Forest were included in the September 2022 edition of the Instruction Set Extensions document. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20221103203310.5058-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 5d75fe2293421..347707d459c67 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -107,6 +107,11 @@ #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ +#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF + +#define INTEL_FAM6_GRANITERAPIDS_X 0xAD +#define INTEL_FAM6_GRANITERAPIDS_D 0xAE + #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_N 0xBE @@ -118,7 +123,7 @@ #define INTEL_FAM6_METEORLAKE 0xAC #define INTEL_FAM6_METEORLAKE_L 0xAA -/* "Small Core" Processors (Atom) */ +/* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ @@ -145,6 +150,10 @@ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ +#define INTEL_FAM6_SIERRAFOREST_X 0xAF + +#define INTEL_FAM6_GRANDRIDGE 0xB6 + /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ From a90accb358ae33ea982a35595573f7a045993f8b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:24 -0700 Subject: [PATCH 221/239] cxl/region: Fix region HPA ordering validation Some regions may not have any address space allocated. Skip them when validating HPA order otherwise a crash like the following may result: devm_cxl_add_region: cxl_acpi cxl_acpi.0: decoder3.4: created region9 BUG: kernel NULL pointer dereference, address: 0000000000000000 [..] RIP: 0010:store_targetN+0x655/0x1740 [cxl_core] [..] Call Trace: kernfs_fop_write_iter+0x144/0x200 vfs_write+0x24a/0x4d0 ksys_write+0x69/0xf0 do_syscall_64+0x3a/0x90 store_targetN+0x655/0x1740: alloc_region_ref at drivers/cxl/core/region.c:676 (inlined by) cxl_port_attach_region at drivers/cxl/core/region.c:850 (inlined by) cxl_region_attach at drivers/cxl/core/region.c:1290 (inlined by) attach_target at drivers/cxl/core/region.c:1410 (inlined by) store_targetN at drivers/cxl/core/region.c:1453 Cc: Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders") Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/166752182461.947915.497032805239915067.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index bb6f4fc84a3ff..d26ca7a6beaee 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -658,6 +658,9 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, xa_for_each(&port->regions, index, iter) { struct cxl_region_params *ip = &iter->region->params; + if (!ip->res) + continue; + if (ip->res->start > p->res->start) { dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n", From 0d9e734018d70cecf79e2e4c6082167160a0f13f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:30 -0700 Subject: [PATCH 222/239] cxl/region: Fix cxl_region leak, cleanup targets at region delete When a region is deleted any targets that have been previously assigned to that region hold references to it. Trigger those references to drop by detaching all targets at unregister_region() time. Otherwise that region object will leak as userspace has lost the ability to detach targets once region sysfs is torn down. Cc: Fixes: b9686e8c8e39 ("cxl/region: Enable the assignment of endpoint decoders to regions") Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752183055.947915.17681995648556534844.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index d26ca7a6beaee..c52465e09f268 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1557,8 +1557,19 @@ static struct cxl_region *to_cxl_region(struct device *dev) static void unregister_region(void *dev) { struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + int i; device_del(dev); + + /* + * Now that region sysfs is shutdown, the parameter block is now + * read-only, so no need to hold the region rwsem to access the + * region parameters. + */ + for (i = 0; i < p->interleave_ways; i++) + detach_target(cxlr, i); + cxl_region_iomem_release(cxlr); put_device(dev); } From 4d07ae22e79ebc2d7528bbc69daa53b86981cb3a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:36 -0700 Subject: [PATCH 223/239] cxl/pmem: Fix cxl_pmem_region and cxl_memdev leak When a cxl_nvdimm object goes through a ->remove() event (device physically removed, nvdimm-bridge disabled, or nvdimm device disabled), then any associated regions must also be disabled. As highlighted by the cxl-create-region.sh test [1], a single device may host multiple regions, but the driver was only tracking one region at a time. This leads to a situation where only the last enabled region per nvdimm device is cleaned up properly. Other regions are leaked, and this also causes cxl_memdev reference leaks. Fix the tracking by allowing cxl_nvdimm objects to track multiple region associations. Cc: Link: https://github.com/pmem/ndctl/blob/main/test/cxl-create-region.sh [1] Reported-by: Vishal Verma Fixes: 04ad63f086d1 ("cxl/region: Introduce cxl_pmem_region objects") Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752183647.947915.2045230911503793901.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pmem.c | 2 + drivers/cxl/cxl.h | 2 +- drivers/cxl/pmem.c | 101 ++++++++++++++++++++++++++-------------- 3 files changed, 68 insertions(+), 37 deletions(-) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 1d12a8206444e..36aa5070d9024 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -188,6 +188,7 @@ static void cxl_nvdimm_release(struct device *dev) { struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); + xa_destroy(&cxl_nvd->pmem_regions); kfree(cxl_nvd); } @@ -230,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) dev = &cxl_nvd->dev; cxl_nvd->cxlmd = cxlmd; + xa_init(&cxl_nvd->pmem_regions); device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); device_set_pm_not_required(dev); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f680450f0b16c..1164ad49f3d3a 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -423,7 +423,7 @@ struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; struct cxl_nvdimm_bridge *bridge; - struct cxl_pmem_region *region; + struct xarray pmem_regions; }; struct cxl_pmem_region_mapping { diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 0bac05d804bc5..4c627d67281a1 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -30,17 +30,20 @@ static void unregister_nvdimm(void *nvdimm) struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge; struct cxl_pmem_region *cxlr_pmem; + unsigned long index; device_lock(&cxl_nvb->dev); - cxlr_pmem = cxl_nvd->region; dev_set_drvdata(&cxl_nvd->dev, NULL); - cxl_nvd->region = NULL; - device_unlock(&cxl_nvb->dev); + xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) { + get_device(&cxlr_pmem->dev); + device_unlock(&cxl_nvb->dev); - if (cxlr_pmem) { device_release_driver(&cxlr_pmem->dev); put_device(&cxlr_pmem->dev); + + device_lock(&cxl_nvb->dev); } + device_unlock(&cxl_nvb->dev); nvdimm_delete(nvdimm); cxl_nvd->bridge = NULL; @@ -366,25 +369,49 @@ static int match_cxl_nvdimm(struct device *dev, void *data) static void unregister_nvdimm_region(void *nd_region) { - struct cxl_nvdimm_bridge *cxl_nvb; - struct cxl_pmem_region *cxlr_pmem; + nvdimm_region_delete(nd_region); +} + +static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd, + struct cxl_pmem_region *cxlr_pmem) +{ + int rc; + + rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem, + cxlr_pmem, GFP_KERNEL); + if (rc) + return rc; + + get_device(&cxlr_pmem->dev); + return 0; +} + +static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, + struct cxl_pmem_region *cxlr_pmem) +{ + /* + * It is possible this is called without a corresponding + * cxl_nvdimm_add_region for @cxlr_pmem + */ + cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem); + if (cxlr_pmem) + put_device(&cxlr_pmem->dev); +} + +static void release_mappings(void *data) +{ int i; + struct cxl_pmem_region *cxlr_pmem = data; + struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge; - cxlr_pmem = nd_region_provider_data(nd_region); - cxl_nvb = cxlr_pmem->bridge; device_lock(&cxl_nvb->dev); for (i = 0; i < cxlr_pmem->nr_mappings; i++) { struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; - if (cxl_nvd->region) { - put_device(&cxlr_pmem->dev); - cxl_nvd->region = NULL; - } + cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem); } device_unlock(&cxl_nvb->dev); - - nvdimm_region_delete(nd_region); } static void cxlr_pmem_remove_resource(void *res) @@ -422,7 +449,7 @@ static int cxl_pmem_region_probe(struct device *dev) if (!cxl_nvb->nvdimm_bus) { dev_dbg(dev, "nvdimm bus not found\n"); rc = -ENXIO; - goto err; + goto out_nvb; } memset(&mappings, 0, sizeof(mappings)); @@ -431,7 +458,7 @@ static int cxl_pmem_region_probe(struct device *dev) res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); if (!res) { rc = -ENOMEM; - goto err; + goto out_nvb; } res->name = "Persistent Memory"; @@ -442,11 +469,11 @@ static int cxl_pmem_region_probe(struct device *dev) rc = insert_resource(&iomem_resource, res); if (rc) - goto err; + goto out_nvb; rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); if (rc) - goto err; + goto out_nvb; ndr_desc.res = res; ndr_desc.provider_data = cxlr_pmem; @@ -462,7 +489,7 @@ static int cxl_pmem_region_probe(struct device *dev) nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); if (!nd_set) { rc = -ENOMEM; - goto err; + goto out_nvb; } ndr_desc.memregion = cxlr->id; @@ -472,9 +499,13 @@ static int cxl_pmem_region_probe(struct device *dev) info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); if (!info) { rc = -ENOMEM; - goto err; + goto out_nvb; } + rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem); + if (rc) + goto out_nvd; + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; struct cxl_memdev *cxlmd = m->cxlmd; @@ -486,7 +517,7 @@ static int cxl_pmem_region_probe(struct device *dev) dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, dev_name(&cxlmd->dev)); rc = -ENODEV; - goto err; + goto out_nvd; } /* safe to drop ref now with bridge lock held */ @@ -498,10 +529,17 @@ static int cxl_pmem_region_probe(struct device *dev) dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, dev_name(&cxlmd->dev)); rc = -ENODEV; - goto err; + goto out_nvd; } - cxl_nvd->region = cxlr_pmem; - get_device(&cxlr_pmem->dev); + + /* + * Pin the region per nvdimm device as those may be released + * out-of-order with respect to the region, and a single nvdimm + * maybe associated with multiple regions + */ + rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem); + if (rc) + goto out_nvd; m->cxl_nvd = cxl_nvd; mappings[i] = (struct nd_mapping_desc) { .nvdimm = nvdimm, @@ -527,27 +565,18 @@ static int cxl_pmem_region_probe(struct device *dev) nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc); if (!cxlr_pmem->nd_region) { rc = -ENOMEM; - goto err; + goto out_nvd; } rc = devm_add_action_or_reset(dev, unregister_nvdimm_region, cxlr_pmem->nd_region); -out: +out_nvd: kfree(info); +out_nvb: device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; - -err: - dev_dbg(dev, "failed to create nvdimm region\n"); - for (i--; i >= 0; i--) { - nvdimm = mappings[i].nvdimm; - cxl_nvd = nvdimm_provider_data(nvdimm); - put_device(&cxl_nvd->region->dev); - cxl_nvd->region = NULL; - } - goto out; } static struct cxl_driver cxl_pmem_region_driver = { From 86e86c3cb63325c12ea99fbce2cc5bafba86bb40 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:42 -0700 Subject: [PATCH 224/239] tools/testing/cxl: Fix some error exits Fix a few typos where 'goto err_port' was used rather than the object specific cleanup. Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752184255.947915.16163477849330181425.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index a072b2d3e726a..133e4c73d3702 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -695,7 +695,7 @@ static __init int cxl_test_init(void) pdev = platform_device_alloc("cxl_switch_uport", i); if (!pdev) - goto err_port; + goto err_uport; pdev->dev.parent = &root_port->dev; rc = platform_device_add(pdev); @@ -713,7 +713,7 @@ static __init int cxl_test_init(void) pdev = platform_device_alloc("cxl_switch_dport", i); if (!pdev) - goto err_port; + goto err_dport; pdev->dev.parent = &uport->dev; rc = platform_device_add(pdev); From e41c8452b9b204689e68756a3836d1d37b617ad5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:48 -0700 Subject: [PATCH 225/239] tools/testing/cxl: Add a single-port host-bridge regression config Jonathan reports that region creation fails when a single-port host-bridge connects to a multi-port switch. Mock up that configuration so a fix can be tested and regression tested going forward. Reported-by: Bobo WL Reported-by: Jonathan Cameron Link: http://lore.kernel.org/r/20221010172057.00001559@huawei.com Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752184838.947915.2167957540894293891.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 297 ++++++++++++++++++++++++++++++++--- 1 file changed, 278 insertions(+), 19 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 133e4c73d3702..7edce12fd2ce5 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -12,30 +12,62 @@ #include "mock.h" #define NR_CXL_HOST_BRIDGES 2 +#define NR_CXL_SINGLE_HOST 1 #define NR_CXL_ROOT_PORTS 2 #define NR_CXL_SWITCH_PORTS 2 #define NR_CXL_PORT_DECODERS 8 static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; -static struct platform_device - *cxl_root_port[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; -static struct platform_device - *cxl_switch_uport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; -static struct platform_device - *cxl_switch_dport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * - NR_CXL_SWITCH_PORTS]; -struct platform_device - *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS]; +#define NR_MULTI_ROOT (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS) +static struct platform_device *cxl_root_port[NR_MULTI_ROOT]; +static struct platform_device *cxl_switch_uport[NR_MULTI_ROOT]; +#define NR_MEM_MULTI \ + (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS) +static struct platform_device *cxl_switch_dport[NR_MEM_MULTI]; + +static struct platform_device *cxl_hb_single[NR_CXL_SINGLE_HOST]; +static struct platform_device *cxl_root_single[NR_CXL_SINGLE_HOST]; +static struct platform_device *cxl_swu_single[NR_CXL_SINGLE_HOST]; +#define NR_MEM_SINGLE (NR_CXL_SINGLE_HOST * NR_CXL_SWITCH_PORTS) +static struct platform_device *cxl_swd_single[NR_MEM_SINGLE]; + +struct platform_device *cxl_mem[NR_MEM_MULTI]; +struct platform_device *cxl_mem_single[NR_MEM_SINGLE]; + + +static inline bool is_multi_bridge(struct device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) + if (&cxl_host_bridge[i]->dev == dev) + return true; + return false; +} + +static inline bool is_single_bridge(struct device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) + if (&cxl_hb_single[i]->dev == dev) + return true; + return false; +} static struct acpi_device acpi0017_mock; -static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = { +static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST] = { [0] = { .handle = &host_bridge[0], }, [1] = { .handle = &host_bridge[1], }, + [2] = { + .handle = &host_bridge[2], + }, + }; static bool is_mock_dev(struct device *dev) @@ -45,6 +77,9 @@ static bool is_mock_dev(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) if (dev == &cxl_mem[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) + if (dev == &cxl_mem_single[i]->dev) + return true; if (dev == &cxl_acpi->dev) return true; return false; @@ -66,7 +101,7 @@ static bool is_mock_adev(struct acpi_device *adev) static struct { struct acpi_table_cedt cedt; - struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES]; + struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST]; struct { struct acpi_cedt_cfmws cfmws; u32 target[1]; @@ -83,6 +118,10 @@ static struct { struct acpi_cedt_cfmws cfmws; u32 target[2]; } cfmws3; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws4; } __packed mock_cedt = { .cedt = { .header = { @@ -107,6 +146,14 @@ static struct { .uid = 1, .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, }, + .chbs[2] = { + .header = { + .type = ACPI_CEDT_TYPE_CHBS, + .length = sizeof(mock_cedt.chbs[0]), + }, + .uid = 2, + .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, + }, .cfmws0 = { .cfmws = { .header = { @@ -167,13 +214,29 @@ static struct { }, .target = { 0, 1, }, }, + .cfmws4 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws4), + }, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 4, + .window_size = SZ_256M * 4UL, + }, + .target = { 2 }, + }, }; -struct acpi_cedt_cfmws *mock_cfmws[4] = { +struct acpi_cedt_cfmws *mock_cfmws[] = { [0] = &mock_cedt.cfmws0.cfmws, [1] = &mock_cedt.cfmws1.cfmws, [2] = &mock_cedt.cfmws2.cfmws, [3] = &mock_cedt.cfmws3.cfmws, + [4] = &mock_cedt.cfmws4.cfmws, }; struct cxl_mock_res { @@ -304,6 +367,9 @@ static bool is_mock_bridge(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) if (dev == &cxl_host_bridge[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) + if (dev == &cxl_hb_single[i]->dev) + return true; return false; } @@ -326,6 +392,18 @@ static bool is_mock_port(struct device *dev) if (dev == &cxl_switch_dport[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_root_single); i++) + if (dev == &cxl_root_single[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_swu_single); i++) + if (dev == &cxl_swu_single[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_swd_single); i++) + if (dev == &cxl_swd_single[i]->dev) + return true; + if (is_cxl_memdev(dev)) return is_mock_dev(dev->parent); @@ -561,11 +639,31 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) int i, array_size; if (port->depth == 1) { - array_size = ARRAY_SIZE(cxl_root_port); - array = cxl_root_port; + if (is_multi_bridge(port->uport)) { + array_size = ARRAY_SIZE(cxl_root_port); + array = cxl_root_port; + } else if (is_single_bridge(port->uport)) { + array_size = ARRAY_SIZE(cxl_root_single); + array = cxl_root_single; + } else { + dev_dbg(&port->dev, "%s: unknown bridge type\n", + dev_name(port->uport)); + return -ENXIO; + } } else if (port->depth == 2) { - array_size = ARRAY_SIZE(cxl_switch_dport); - array = cxl_switch_dport; + struct cxl_port *parent = to_cxl_port(port->dev.parent); + + if (is_multi_bridge(parent->uport)) { + array_size = ARRAY_SIZE(cxl_switch_dport); + array = cxl_switch_dport; + } else if (is_single_bridge(parent->uport)) { + array_size = ARRAY_SIZE(cxl_swd_single); + array = cxl_swd_single; + } else { + dev_dbg(&port->dev, "%s: unknown bridge type\n", + dev_name(port->uport)); + return -ENXIO; + } } else { dev_WARN_ONCE(&port->dev, 1, "unexpected depth %d\n", port->depth); @@ -576,8 +674,12 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) struct platform_device *pdev = array[i]; struct cxl_dport *dport; - if (pdev->dev.parent != port->uport) + if (pdev->dev.parent != port->uport) { + dev_dbg(&port->dev, "%s: mismatch parent %s\n", + dev_name(port->uport), + dev_name(pdev->dev.parent)); continue; + } dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); @@ -627,6 +729,157 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_512G (SZ_64G * 8) #endif +static __init int cxl_single_init(void) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) { + struct acpi_device *adev = + &host_bridge[NR_CXL_HOST_BRIDGES + i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_host_bridge", + NR_CXL_HOST_BRIDGES + i); + if (!pdev) + goto err_bridge; + + mock_companion(adev, &pdev->dev); + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_bridge; + } + + cxl_hb_single[i] = pdev; + rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, + "physical_node"); + if (rc) + goto err_bridge; + } + + for (i = 0; i < ARRAY_SIZE(cxl_root_single); i++) { + struct platform_device *bridge = + cxl_hb_single[i % ARRAY_SIZE(cxl_hb_single)]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_root_port", + NR_MULTI_ROOT + i); + if (!pdev) + goto err_port; + pdev->dev.parent = &bridge->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_port; + } + cxl_root_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_swu_single); i++) { + struct platform_device *root_port = cxl_root_single[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_uport", + NR_MULTI_ROOT + i); + if (!pdev) + goto err_uport; + pdev->dev.parent = &root_port->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_uport; + } + cxl_swu_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_swd_single); i++) { + struct platform_device *uport = + cxl_swu_single[i % ARRAY_SIZE(cxl_swu_single)]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_dport", + i + NR_MEM_MULTI); + if (!pdev) + goto err_dport; + pdev->dev.parent = &uport->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_dport; + } + cxl_swd_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) { + struct platform_device *dport = cxl_swd_single[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_mem", NR_MEM_MULTI + i); + if (!pdev) + goto err_mem; + pdev->dev.parent = &dport->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_mem; + } + cxl_mem_single[i] = pdev; + } + + return 0; + +err_mem: + for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem_single[i]); +err_dport: + for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swd_single[i]); +err_uport: + for (i = ARRAY_SIZE(cxl_swu_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swu_single[i]); +err_port: + for (i = ARRAY_SIZE(cxl_root_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_root_single[i]); +err_bridge: + for (i = ARRAY_SIZE(cxl_hb_single) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_hb_single[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); + platform_device_unregister(cxl_hb_single[i]); + } + + return rc; +} + +static void cxl_single_exit(void) +{ + int i; + + for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem_single[i]); + for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swd_single[i]); + for (i = ARRAY_SIZE(cxl_swu_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swu_single[i]); + for (i = ARRAY_SIZE(cxl_root_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_root_single[i]); + for (i = ARRAY_SIZE(cxl_hb_single) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_hb_single[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); + platform_device_unregister(cxl_hb_single[i]); + } +} + static __init int cxl_test_init(void) { int rc, i; @@ -724,7 +977,6 @@ static __init int cxl_test_init(void) cxl_switch_dport[i] = pdev; } - BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_switch_dport)); for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { struct platform_device *dport = cxl_switch_dport[i]; struct platform_device *pdev; @@ -743,9 +995,13 @@ static __init int cxl_test_init(void) cxl_mem[i] = pdev; } + rc = cxl_single_init(); + if (rc) + goto err_mem; + cxl_acpi = platform_device_alloc("cxl_acpi", 0); if (!cxl_acpi) - goto err_mem; + goto err_single; mock_companion(&acpi0017_mock, &cxl_acpi->dev); acpi0017_mock.dev.bus = &platform_bus_type; @@ -758,6 +1014,8 @@ static __init int cxl_test_init(void) err_add: platform_device_put(cxl_acpi); +err_single: + cxl_single_exit(); err_mem: for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); @@ -793,6 +1051,7 @@ static __exit void cxl_test_exit(void) int i; platform_device_unregister(cxl_acpi); + cxl_single_exit(); for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) From e4f6dfa9ef756a3934a4caf618b1e86e9e8e21d0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:54 -0700 Subject: [PATCH 226/239] cxl/region: Fix 'distance' calculation with passthrough ports When programming port decode targets, the algorithm wants to ensure that two devices are compatible to be programmed as peers beneath a given port. A compatible peer is a target that shares the same dport, and where that target's interleave position also routes it to the same dport. Compatibility is determined by the device's interleave position being >= to distance. For example, if a given dport can only map every Nth position then positions less than N away from the last target programmed are incompatible. The @distance for the host-bridge's cxl_port in a simple dual-ported host-bridge configuration with 2 direct-attached devices is 1, i.e. An x2 region divided by 2 dports to reach 2 region targets. An x4 region under an x2 host-bridge would need 2 intervening switches where the @distance at the host bridge level is 2 (x4 region divided by 2 switches to reach 4 devices). However, the distance between peers underneath a single ported host-bridge is always zero because there is no limit to the number of devices that can be mapped. In other words, there are no decoders to program in a passthrough, all descendants are mapped and distance only starts matters for the intervening descendant ports of the passthrough port. Add tracking for the number of dports mapped to a port, and use that to detect the passthrough case for calculating @distance. Cc: Reported-by: Bobo WL Reported-by: Jonathan Cameron Link: http://lore.kernel.org/r/20221010172057.00001559@huawei.com Fixes: 27b3f8d13830 ("cxl/region: Program target lists") Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752185440.947915.6617495912508299445.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 11 +++++++++-- drivers/cxl/core/region.c | 9 ++++++++- drivers/cxl/cxl.h | 2 ++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index bffde862de0bf..e7556864ea808 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -811,6 +811,7 @@ static struct cxl_dport *find_dport(struct cxl_port *port, int id) static int add_dport(struct cxl_port *port, struct cxl_dport *new) { struct cxl_dport *dup; + int rc; device_lock_assert(&port->dev); dup = find_dport(port, new->port_id); @@ -821,8 +822,14 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) dev_name(dup->dport)); return -EBUSY; } - return xa_insert(&port->dports, (unsigned long)new->dport, new, - GFP_KERNEL); + + rc = xa_insert(&port->dports, (unsigned long)new->dport, new, + GFP_KERNEL); + if (rc) + return rc; + + port->nr_dports++; + return 0; } /* diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c52465e09f268..c0253de749458 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -990,7 +990,14 @@ static int cxl_port_setup_targets(struct cxl_port *port, if (cxl_rr->nr_targets_set) { int i, distance; - distance = p->nr_targets / cxl_rr->nr_targets; + /* + * Passthrough ports impose no distance requirements between + * peers + */ + if (port->nr_dports == 1) + distance = 0; + else + distance = p->nr_targets / cxl_rr->nr_targets; for (i = 0; i < cxl_rr->nr_targets_set; i++) if (ep->dport == cxlsd->target[i]) { rc = check_last_peer(cxled, ep, cxl_rr, diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 1164ad49f3d3a..ac75554b5d763 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -457,6 +457,7 @@ struct cxl_pmem_region { * @regions: cxl_region_ref instances, regions mapped by this port * @parent_dport: dport that points to this port in the parent * @decoder_ida: allocator for decoder ids + * @nr_dports: number of entries in @dports * @hdm_end: track last allocated HDM decoder instance for allocation ordering * @commit_end: cursor to track highest committed decoder for commit ordering * @component_reg_phys: component register capability base address (optional) @@ -475,6 +476,7 @@ struct cxl_port { struct xarray regions; struct cxl_dport *parent_dport; struct ida decoder_ida; + int nr_dports; int hdm_end; int commit_end; resource_size_t component_reg_phys; From 8f401ec1c8975eabfe4c089de91cbe058deabf71 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:31:00 -0700 Subject: [PATCH 227/239] cxl/region: Recycle region ids At region creation time the next region-id is atomically cached so that there is predictability of region device names. If that region is destroyed and then a new one is created the region id increments. That ends up looking like a memory leak, or is otherwise surprising that identifiers roll forward even after destroying all previously created regions. Try to reuse rather than free old region ids at region release time. While this fixes a cosmetic issue, the needlessly advancing memory region-id gives the appearance of a memory leak, hence the "Fixes" tag, but no "Cc: stable" tag. Cc: Ben Widawsky Cc: Jonathan Cameron Fixes: 779dd20cfb56 ("cxl/region: Add region creation support") Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752186062.947915.13200195701224993317.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c0253de749458..f9ae5ad284ffb 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1534,9 +1534,24 @@ static const struct attribute_group *region_groups[] = { static void cxl_region_release(struct device *dev) { + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); struct cxl_region *cxlr = to_cxl_region(dev); + int id = atomic_read(&cxlrd->region_id); + + /* + * Try to reuse the recently idled id rather than the cached + * next id to prevent the region id space from increasing + * unnecessarily. + */ + if (cxlr->id < id) + if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) { + memregion_free(id); + goto out; + } memregion_free(cxlr->id); +out: + put_device(dev->parent); kfree(cxlr); } @@ -1598,6 +1613,11 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_region_key); dev->parent = &cxlrd->cxlsd.cxld.dev; + /* + * Keep root decoder pinned through cxl_region_release to fixup + * region id allocations + */ + get_device(dev->parent); device_set_pm_not_required(dev); dev->bus = &cxl_bus_type; dev->type = &cxl_region_type; From c4f683731db330460d909bd0ca8d5af876fcdc97 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 31 Oct 2022 11:40:18 +0000 Subject: [PATCH 228/239] hwmon: (scmi) Register explicitly with Thermal Framework Available sensors are enumerated and reported by the SCMI platform server using a 16bit identification number; not all such sensors are of a type supported by hwmon subsystem and, among the supported ones, only a subset could be temperature sensors that have to be registered with the Thermal Framework. Potential clashes between hwmon channels indexes and the underlying real sensors IDs do not play well with the hwmon<-->thermal bridge automatic registration routines and could need a sensible number of fake dummy sensors to be made up in order to keep indexes and IDs in sync. Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ attribute and instead explicit register temperature sensors directly with the Thermal Framework. Cc: Daniel Lezcano Cc: Guenter Roeck Cc: linux-hwmon@vger.kernel.org Signed-off-by: Cristian Marussi Acked-by: Sudeep Holla Link: https://lore.kernel.org/r/20221031114018.59048-1-cristian.marussi@arm.com Signed-off-by: Guenter Roeck --- drivers/hwmon/scmi-hwmon.c | 116 ++++++++++++++++++++++++++++++++----- 1 file changed, 103 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c index b1329a58ce403..e192f0c671465 100644 --- a/drivers/hwmon/scmi-hwmon.c +++ b/drivers/hwmon/scmi-hwmon.c @@ -20,6 +20,11 @@ struct scmi_sensors { const struct scmi_sensor_info **info[hwmon_max]; }; +struct scmi_thermal_sensor { + const struct scmi_protocol_handle *ph; + const struct scmi_sensor_info *info; +}; + static inline u64 __pow10(u8 x) { u64 r = 1; @@ -64,16 +69,14 @@ static int scmi_hwmon_scale(const struct scmi_sensor_info *sensor, u64 *value) return 0; } -static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type, - u32 attr, int channel, long *val) +static int scmi_hwmon_read_scaled_value(const struct scmi_protocol_handle *ph, + const struct scmi_sensor_info *sensor, + long *val) { int ret; u64 value; - const struct scmi_sensor_info *sensor; - struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev); - sensor = *(scmi_sensors->info[type] + channel); - ret = sensor_ops->reading_get(scmi_sensors->ph, sensor->id, &value); + ret = sensor_ops->reading_get(ph, sensor->id, &value); if (ret) return ret; @@ -84,6 +87,17 @@ static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type, return ret; } +static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + const struct scmi_sensor_info *sensor; + struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev); + + sensor = *(scmi_sensors->info[type] + channel); + + return scmi_hwmon_read_scaled_value(scmi_sensors->ph, sensor, val); +} + static int scmi_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, const char **str) @@ -122,6 +136,25 @@ static struct hwmon_chip_info scmi_chip_info = { .info = NULL, }; +static int scmi_hwmon_thermal_get_temp(struct thermal_zone_device *tz, + int *temp) +{ + int ret; + long value; + struct scmi_thermal_sensor *th_sensor = tz->devdata; + + ret = scmi_hwmon_read_scaled_value(th_sensor->ph, th_sensor->info, + &value); + if (!ret) + *temp = value; + + return ret; +} + +static const struct thermal_zone_device_ops scmi_hwmon_thermal_ops = { + .get_temp = scmi_hwmon_thermal_get_temp, +}; + static int scmi_hwmon_add_chan_info(struct hwmon_channel_info *scmi_hwmon_chan, struct device *dev, int num, enum hwmon_sensor_types type, u32 config) @@ -149,7 +182,6 @@ static enum hwmon_sensor_types scmi_types[] = { }; static u32 hwmon_attributes[hwmon_max] = { - [hwmon_chip] = HWMON_C_REGISTER_TZ, [hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL, [hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL, [hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL, @@ -157,6 +189,43 @@ static u32 hwmon_attributes[hwmon_max] = { [hwmon_energy] = HWMON_E_INPUT | HWMON_E_LABEL, }; +static int scmi_thermal_sensor_register(struct device *dev, + const struct scmi_protocol_handle *ph, + const struct scmi_sensor_info *sensor) +{ + struct scmi_thermal_sensor *th_sensor; + struct thermal_zone_device *tzd; + + th_sensor = devm_kzalloc(dev, sizeof(*th_sensor), GFP_KERNEL); + if (!th_sensor) + return -ENOMEM; + + th_sensor->ph = ph; + th_sensor->info = sensor; + + /* + * Try to register a temperature sensor with the Thermal Framework: + * skip sensors not defined as part of any thermal zone (-ENODEV) but + * report any other errors related to misconfigured zones/sensors. + */ + tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor, + &scmi_hwmon_thermal_ops); + if (IS_ERR(tzd)) { + devm_kfree(dev, th_sensor); + + if (PTR_ERR(tzd) != -ENODEV) + return PTR_ERR(tzd); + + dev_dbg(dev, "Sensor '%s' not attached to any thermal zone.\n", + sensor->name); + } else { + dev_dbg(dev, "Sensor '%s' attached to thermal zone ID:%d\n", + sensor->name, tzd->id); + } + + return 0; +} + static int scmi_hwmon_probe(struct scmi_device *sdev) { int i, idx; @@ -164,7 +233,7 @@ static int scmi_hwmon_probe(struct scmi_device *sdev) enum hwmon_sensor_types type; struct scmi_sensors *scmi_sensors; const struct scmi_sensor_info *sensor; - int nr_count[hwmon_max] = {0}, nr_types = 0; + int nr_count[hwmon_max] = {0}, nr_types = 0, nr_count_temp = 0; const struct hwmon_chip_info *chip_info; struct device *hwdev, *dev = &sdev->dev; struct hwmon_channel_info *scmi_hwmon_chan; @@ -208,10 +277,8 @@ static int scmi_hwmon_probe(struct scmi_device *sdev) } } - if (nr_count[hwmon_temp]) { - nr_count[hwmon_chip]++; - nr_types++; - } + if (nr_count[hwmon_temp]) + nr_count_temp = nr_count[hwmon_temp]; scmi_hwmon_chan = devm_kcalloc(dev, nr_types, sizeof(*scmi_hwmon_chan), GFP_KERNEL); @@ -262,8 +329,31 @@ static int scmi_hwmon_probe(struct scmi_device *sdev) hwdev = devm_hwmon_device_register_with_info(dev, "scmi_sensors", scmi_sensors, chip_info, NULL); + if (IS_ERR(hwdev)) + return PTR_ERR(hwdev); - return PTR_ERR_OR_ZERO(hwdev); + for (i = 0; i < nr_count_temp; i++) { + int ret; + + sensor = *(scmi_sensors->info[hwmon_temp] + i); + if (!sensor) + continue; + + /* + * Warn on any misconfiguration related to thermal zones but + * bail out of probing only on memory errors. + */ + ret = scmi_thermal_sensor_register(dev, ph, sensor); + if (ret) { + if (ret == -ENOMEM) + return ret; + dev_warn(dev, + "Thermal zone misconfigured for %s. err=%d\n", + sensor->name, ret); + } + } + + return 0; } static const struct scmi_device_id scmi_id_table[] = { From 1e699e177e339e462cdc8571e3d0fcf29665608e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 4 Nov 2022 16:37:30 -0700 Subject: [PATCH 229/239] Revert "hwmon: (pmbus) Add regulator supply into macro" This reverts commit 54cc3dbfc10dc3db7cb1cf49aee4477a8398fbde. Zev Weiss reports that the reverted patch may cause a regulator undercount. Here is his report: ... having regulator-dummy set as a supply on my PMBus regulators (instead of having them as their own top-level regulators without an upstream supply) leads to enable-count underflow errors when disabling them: # echo 0 > /sys/bus/platform/devices/efuse01/state [ 906.094477] regulator-dummy: Underflow of regulator enable count [ 906.100563] Failed to disable vout: -EINVAL [ 136.992676] reg-userspace-consumer efuse01: Failed to configure state: -22 Zev reports that reverting the patch fixes the problem. So let's do that for now. Fixes: 54cc3dbfc10d ("hwmon: (pmbus) Add regulator supply into macro") Cc: Marcello Sylvester Bauer Reported-by: Zev Weiss Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index 7daaf0caf4d30..10fb17879f8ed 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -467,7 +467,6 @@ extern const struct regulator_ops pmbus_regulator_ops; #define PMBUS_REGULATOR_STEP(_name, _id, _voltages, _step) \ [_id] = { \ .name = (_name # _id), \ - .supply_name = "vin", \ .id = (_id), \ .of_match = of_match_ptr(_name # _id), \ .regulators_node = of_match_ptr("regulators"), \ From 8abcaeaed38109e5ccaf40218e0e9e387f07bfe6 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 28 Oct 2022 09:52:26 +0000 Subject: [PATCH 230/239] cifs: always iterate smb sessions using primary channel smb sessions and tcons currently hang off primary channel only. Secondary channels have the lists as empty. Whenever there's a need to iterate sessions or tcons, we should use the list in the corresponding primary channel. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/misc.c | 6 +++++- fs/cifs/smb2misc.c | 12 ++++++++++-- fs/cifs/smb2ops.c | 6 +++++- fs/cifs/smb2transport.c | 6 +++++- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index da51ffd029280..3e68d8208cf5e 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -400,6 +400,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) { struct smb_hdr *buf = (struct smb_hdr *)buffer; struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifsInodeInfo *pCifsInode; @@ -464,9 +465,12 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) return false; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(srv) ? srv->primary_server : srv; + /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &srv->smb_ses_list, smb_ses_list) { + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid != buf->Tid) continue; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index a387204779660..e73a3c649b877 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -135,6 +135,7 @@ static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, int smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) { + struct TCP_Server_Info *pserver; struct smb2_hdr *shdr = (struct smb2_hdr *)buf; struct smb2_pdu *pdu = (struct smb2_pdu *)shdr; int hdr_size = sizeof(struct smb2_hdr); @@ -143,6 +144,9 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) __u32 calc_len; /* calculated length */ __u64 mid; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + /* * Add function to do table lookup of StructureSize by command * ie Validate the wct via smb2_struct_sizes table above @@ -155,7 +159,7 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) /* decrypt frame now that it is completely read in */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(iter, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(iter, &pserver->smb_ses_list, smb_ses_list) { if (iter->Suid == le64_to_cpu(thdr->SessionId)) { ses = iter; break; @@ -671,6 +675,7 @@ bool smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) { struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifsInodeInfo *cinode; @@ -691,9 +696,12 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) cifs_dbg(FYI, "oplock level 0x%x\n", rsp->OplockLevel); + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 4f53fa012936e..cca12eadbb073 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2302,14 +2302,18 @@ static void smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) { struct smb2_hdr *shdr = (struct smb2_hdr *)buf; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; if (shdr->Status != STATUS_NETWORK_NAME_DELETED) return; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) { spin_lock(&tcon->tc_lock); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 8e3f26e6f6b9b..8a9d9d08cf19e 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -136,9 +136,13 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) static struct cifs_ses * smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) { + struct TCP_Server_Info *pserver; struct cifs_ses *ses; - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { if (ses->Suid != ses_id) continue; ++ses->ses_count; From 23d9b9b757e8007204d8f71448ab55d5ef2ae8e5 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 28 Oct 2022 10:01:45 +0000 Subject: [PATCH 231/239] cifs: avoid unnecessary iteration of tcp sessions In a few places, we do unnecessary iterations of tcp sessions, even when the server struct is provided. The change avoids it and uses the server struct provided. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2misc.c | 69 +++++++++++++++++++++-------------------- fs/cifs/smb2ops.c | 24 +++++++------- fs/cifs/smb2transport.c | 13 ++++---- 3 files changed, 55 insertions(+), 51 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index e73a3c649b877..572293c18e16f 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -612,51 +612,52 @@ smb2_tcon_find_pending_open_lease(struct cifs_tcon *tcon, } static bool -smb2_is_valid_lease_break(char *buffer) +smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) { struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; - struct TCP_Server_Info *server; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifs_pending_open *open; cifs_dbg(FYI, "Checking for lease break\n"); + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { - list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { - spin_lock(&tcon->open_file_lock); - cifs_stats_inc( - &tcon->stats.cifs_stats.num_oplock_brks); - if (smb2_tcon_has_lease(tcon, rsp)) { - spin_unlock(&tcon->open_file_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; - } - open = smb2_tcon_find_pending_open_lease(tcon, - rsp); - if (open) { - __u8 lease_key[SMB2_LEASE_KEY_SIZE]; - struct tcon_link *tlink; - - tlink = cifs_get_tlink(open->tlink); - memcpy(lease_key, open->lease_key, - SMB2_LEASE_KEY_SIZE); - spin_unlock(&tcon->open_file_lock); - spin_unlock(&cifs_tcp_ses_lock); - smb2_queue_pending_open_break(tlink, - lease_key, - rsp->NewLeaseState); - return true; - } + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + spin_lock(&tcon->open_file_lock); + cifs_stats_inc( + &tcon->stats.cifs_stats.num_oplock_brks); + if (smb2_tcon_has_lease(tcon, rsp)) { spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } + open = smb2_tcon_find_pending_open_lease(tcon, + rsp); + if (open) { + __u8 lease_key[SMB2_LEASE_KEY_SIZE]; + struct tcon_link *tlink; + + tlink = cifs_get_tlink(open->tlink); + memcpy(lease_key, open->lease_key, + SMB2_LEASE_KEY_SIZE); + spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_tcp_ses_lock); + smb2_queue_pending_open_break(tlink, + lease_key, + rsp->NewLeaseState); + return true; + } + spin_unlock(&tcon->open_file_lock); - if (cached_dir_lease_break(tcon, rsp->LeaseKey)) { - spin_unlock(&cifs_tcp_ses_lock); - return true; - } + if (cached_dir_lease_break(tcon, rsp->LeaseKey)) { + spin_unlock(&cifs_tcp_ses_lock); + return true; } } } @@ -689,7 +690,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (rsp->StructureSize != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { if (le16_to_cpu(rsp->StructureSize) == 44) - return smb2_is_valid_lease_break(buffer); + return smb2_is_valid_lease_break(buffer, server); else return false; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index cca12eadbb073..880cd494afea4 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4268,21 +4268,23 @@ init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign) static int smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) { + struct TCP_Server_Info *pserver; struct cifs_ses *ses; u8 *ses_enc_key; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { - if (ses->Suid == ses_id) { - spin_lock(&ses->ses_lock); - ses_enc_key = enc ? ses->smb3encryptionkey : - ses->smb3decryptionkey; - memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE); - spin_unlock(&ses->ses_lock); - spin_unlock(&cifs_tcp_ses_lock); - return 0; - } + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (ses->Suid == ses_id) { + spin_lock(&ses->ses_lock); + ses_enc_key = enc ? ses->smb3encryptionkey : + ses->smb3decryptionkey; + memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE); + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); + return 0; } } spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 8a9d9d08cf19e..381babc1212c9 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -77,18 +77,19 @@ static int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { struct cifs_chan *chan; + struct TCP_Server_Info *pserver; struct cifs_ses *ses = NULL; - struct TCP_Server_Info *it = NULL; int i; int rc = 0; spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(it, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &it->smb_ses_list, smb_ses_list) { - if (ses->Suid == ses_id) - goto found; - } + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (ses->Suid == ses_id) + goto found; } cifs_server_dbg(VFS, "%s: Could not find session 0x%llx\n", __func__, ses_id); From 542228db2f28fdf775b301f2843e1fe486e7c797 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Fri, 4 Nov 2022 15:44:41 +0800 Subject: [PATCH 232/239] cifs: fix use-after-free on the link name xfstests generic/011 reported use-after-free bug as follows: BUG: KASAN: use-after-free in __d_alloc+0x269/0x859 Read of size 15 at addr ffff8880078933a0 by task dirstress/952 CPU: 1 PID: 952 Comm: dirstress Not tainted 6.1.0-rc3+ #77 Call Trace: __dump_stack+0x23/0x29 dump_stack_lvl+0x51/0x73 print_address_description+0x67/0x27f print_report+0x3e/0x5c kasan_report+0x7b/0xa8 kasan_check_range+0x1b2/0x1c1 memcpy+0x22/0x5d __d_alloc+0x269/0x859 d_alloc+0x45/0x20c d_alloc_parallel+0xb2/0x8b2 lookup_open+0x3b8/0x9f9 open_last_lookups+0x63d/0xc26 path_openat+0x11a/0x261 do_filp_open+0xcc/0x168 do_sys_openat2+0x13b/0x3f7 do_sys_open+0x10f/0x146 __se_sys_creat+0x27/0x2e __x64_sys_creat+0x55/0x6a do_syscall_64+0x40/0x96 entry_SYSCALL_64_after_hwframe+0x63/0xcd Allocated by task 952: kasan_save_stack+0x1f/0x42 kasan_set_track+0x21/0x2a kasan_save_alloc_info+0x17/0x1d __kasan_kmalloc+0x7e/0x87 __kmalloc_node_track_caller+0x59/0x155 kstrndup+0x60/0xe6 parse_mf_symlink+0x215/0x30b check_mf_symlink+0x260/0x36a cifs_get_inode_info+0x14e1/0x1690 cifs_revalidate_dentry_attr+0x70d/0x964 cifs_revalidate_dentry+0x36/0x62 cifs_d_revalidate+0x162/0x446 lookup_open+0x36f/0x9f9 open_last_lookups+0x63d/0xc26 path_openat+0x11a/0x261 do_filp_open+0xcc/0x168 do_sys_openat2+0x13b/0x3f7 do_sys_open+0x10f/0x146 __se_sys_creat+0x27/0x2e __x64_sys_creat+0x55/0x6a do_syscall_64+0x40/0x96 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 950: kasan_save_stack+0x1f/0x42 kasan_set_track+0x21/0x2a kasan_save_free_info+0x1c/0x34 ____kasan_slab_free+0x1c1/0x1d5 __kasan_slab_free+0xe/0x13 __kmem_cache_free+0x29a/0x387 kfree+0xd3/0x10e cifs_fattr_to_inode+0xb6a/0xc8c cifs_get_inode_info+0x3cb/0x1690 cifs_revalidate_dentry_attr+0x70d/0x964 cifs_revalidate_dentry+0x36/0x62 cifs_d_revalidate+0x162/0x446 lookup_open+0x36f/0x9f9 open_last_lookups+0x63d/0xc26 path_openat+0x11a/0x261 do_filp_open+0xcc/0x168 do_sys_openat2+0x13b/0x3f7 do_sys_open+0x10f/0x146 __se_sys_creat+0x27/0x2e __x64_sys_creat+0x55/0x6a do_syscall_64+0x40/0x96 entry_SYSCALL_64_after_hwframe+0x63/0xcd When opened a symlink, link name is from 'inode->i_link', but it may be reset to a new value when revalidate the dentry. If some processes get the link name on the race scenario, then UAF will happen on link name. Fix this by implementing 'get_link' interface to duplicate the link name. Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Signed-off-by: ChenXiaoSong Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 26 +++++++++++++++++++++++++- fs/cifs/inode.c | 5 ----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d0b9fec111aac..fe220686bba4f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1143,8 +1143,32 @@ const struct inode_operations cifs_file_inode_ops = { .fiemap = cifs_fiemap, }; +const char *cifs_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) +{ + char *target_path; + + target_path = kmalloc(PATH_MAX, GFP_KERNEL); + if (!target_path) + return ERR_PTR(-ENOMEM); + + spin_lock(&inode->i_lock); + if (likely(CIFS_I(inode)->symlink_target)) { + strscpy(target_path, CIFS_I(inode)->symlink_target, PATH_MAX); + } else { + kfree(target_path); + target_path = ERR_PTR(-EOPNOTSUPP); + } + spin_unlock(&inode->i_lock); + + if (!IS_ERR(target_path)) + set_delayed_call(done, kfree_link, target_path); + + return target_path; +} + const struct inode_operations cifs_symlink_inode_ops = { - .get_link = simple_get_link, + .get_link = cifs_get_link, .permission = cifs_permission, .listxattr = cifs_listxattr, }; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9bde08d44617f..4e2ca3c6e5c00 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -215,11 +215,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) kfree(cifs_i->symlink_target); cifs_i->symlink_target = fattr->cf_symlink_target; fattr->cf_symlink_target = NULL; - - if (unlikely(!cifs_i->symlink_target)) - inode->i_link = ERR_PTR(-EOPNOTSUPP); - else - inode->i_link = cifs_i->symlink_target; } spin_unlock(&inode->i_lock); From 17a0bc9bd697f75cfdf9b378d5eb2d7409c91340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= Date: Wed, 12 Oct 2022 14:13:30 +0100 Subject: [PATCH 233/239] ext4: fix BUG_ON() when directory entry has invalid rec_len MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rec_len field in the directory entry has to be a multiple of 4. A corrupted filesystem image can be used to hit a BUG() in ext4_rec_len_to_disk(), called from make_indexed_dir(). ------------[ cut here ]------------ kernel BUG at fs/ext4/ext4.h:2413! ... RIP: 0010:make_indexed_dir+0x53f/0x5f0 ... Call Trace: ? add_dirent_to_buf+0x1b2/0x200 ext4_add_entry+0x36e/0x480 ext4_add_nondir+0x2b/0xc0 ext4_create+0x163/0x200 path_openat+0x635/0xe90 do_filp_open+0xb4/0x160 ? __create_object.isra.0+0x1de/0x3b0 ? _raw_spin_unlock+0x12/0x30 do_sys_openat2+0x91/0x150 __x64_sys_open+0x6c/0xa0 do_syscall_64+0x3c/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The fix simply adds a call to ext4_check_dir_entry() to validate the directory entry, returning -EFSCORRUPTED if the entry is invalid. CC: stable@kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=216540 Signed-off-by: Luís Henriques Link: https://lore.kernel.org/r/20221012131330.32456-1-lhenriques@suse.de Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4183a4cb4a21e..be8136aafa22c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2259,8 +2259,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, memset(de, 0, len); /* wipe old data */ de = (struct ext4_dir_entry_2 *) data2; top = data2 + len; - while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) + while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) { + if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len, + (data2 + (blocksize - csum_size) - + (char *) de))) { + brelse(bh2); + brelse(bh); + return -EFSCORRUPTED; + } de = de2; + } de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - (char *) de, blocksize); From 1b8f787ef547230a3249bcf897221ef0cc78481b Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 18 Oct 2022 10:27:01 +0800 Subject: [PATCH 234/239] ext4: fix warning in 'ext4_da_release_space' Syzkaller report issue as follows: EXT4-fs (loop0): Free/Dirty block details EXT4-fs (loop0): free_blocks=0 EXT4-fs (loop0): dirty_blocks=0 EXT4-fs (loop0): Block reservation details EXT4-fs (loop0): i_reserved_data_blocks=0 EXT4-fs warning (device loop0): ext4_da_release_space:1527: ext4_da_release_space: ino 18, to_free 1 with only 0 reserved data blocks ------------[ cut here ]------------ WARNING: CPU: 0 PID: 92 at fs/ext4/inode.c:1528 ext4_da_release_space+0x25e/0x370 fs/ext4/inode.c:1524 Modules linked in: CPU: 0 PID: 92 Comm: kworker/u4:4 Not tainted 6.0.0-syzkaller-09423-g493ffd6605b2 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Workqueue: writeback wb_workfn (flush-7:0) RIP: 0010:ext4_da_release_space+0x25e/0x370 fs/ext4/inode.c:1528 RSP: 0018:ffffc900015f6c90 EFLAGS: 00010296 RAX: 42215896cd52ea00 RBX: 0000000000000000 RCX: 42215896cd52ea00 RDX: 0000000000000000 RSI: 0000000080000001 RDI: 0000000000000000 RBP: 1ffff1100e907d96 R08: ffffffff816aa79d R09: fffff520002bece5 R10: fffff520002bece5 R11: 1ffff920002bece4 R12: ffff888021fd2000 R13: ffff88807483ecb0 R14: 0000000000000001 R15: ffff88807483e740 FS: 0000000000000000(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005555569ba628 CR3: 000000000c88e000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ext4_es_remove_extent+0x1ab/0x260 fs/ext4/extents_status.c:1461 mpage_release_unused_pages+0x24d/0xef0 fs/ext4/inode.c:1589 ext4_writepages+0x12eb/0x3be0 fs/ext4/inode.c:2852 do_writepages+0x3c3/0x680 mm/page-writeback.c:2469 __writeback_single_inode+0xd1/0x670 fs/fs-writeback.c:1587 writeback_sb_inodes+0xb3b/0x18f0 fs/fs-writeback.c:1870 wb_writeback+0x41f/0x7b0 fs/fs-writeback.c:2044 wb_do_writeback fs/fs-writeback.c:2187 [inline] wb_workfn+0x3cb/0xef0 fs/fs-writeback.c:2227 process_one_work+0x877/0xdb0 kernel/workqueue.c:2289 worker_thread+0xb14/0x1330 kernel/workqueue.c:2436 kthread+0x266/0x300 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 Above issue may happens as follows: ext4_da_write_begin ext4_create_inline_data ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); __ext4_ioctl ext4_ext_migrate -> will lead to eh->eh_entries not zero, and set extent flag ext4_da_write_begin ext4_da_convert_inline_data_to_extent ext4_da_write_inline_data_begin ext4_da_map_blocks ext4_insert_delayed_block if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) if (!ext4_es_scan_clu(inode, &ext4_es_is_mapped, lblk)) ext4_clu_mapped(inode, EXT4_B2C(sbi, lblk)); -> will return 1 allocated = true; ext4_es_insert_delayed_block(inode, lblk, allocated); ext4_writepages mpage_map_and_submit_extent(handle, &mpd, &give_up_on_write); -> return -ENOSPC mpage_release_unused_pages(&mpd, give_up_on_write); -> give_up_on_write == 1 ext4_es_remove_extent ext4_da_release_space(inode, reserved); if (unlikely(to_free > ei->i_reserved_data_blocks)) -> to_free == 1 but ei->i_reserved_data_blocks == 0 -> then trigger warning as above To solve above issue, forbid inode do migrate which has inline data. Cc: stable@kernel.org Reported-by: syzbot+c740bb18df70ad00952e@syzkaller.appspotmail.com Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221018022701.683489-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/migrate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 0a220ec9862de..a19a9661646eb 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -424,7 +424,8 @@ int ext4_ext_migrate(struct inode *inode) * already is extent-based, error out. */ if (!ext4_has_feature_extents(inode->i_sb) || - (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) + ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || + ext4_has_inline_data(inode)) return -EINVAL; if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) From 9f2a1d9fb33a2129a9ba29bc61d3f14adb28ddc2 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 25 Oct 2022 12:02:06 +0800 Subject: [PATCH 235/239] ext4: fix wrong return err in ext4_load_and_init_journal() The return value is wrong in ext4_load_and_init_journal(). The local variable 'err' need to be initialized before goto out. The original code in __ext4_fill_super() is fine because it has two return values 'ret' and 'err' and 'ret' is initialized as -EINVAL. After we factor out ext4_load_and_init_journal(), this code is broken. So fix it by directly returning -EINVAL in the error handler path. Cc: stable@kernel.org Fixes: 9c1dd22d7422 ("ext4: factor out ext4_load_and_init_journal()") Signed-off-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221025040206.3134773-1-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d733db8a0b026..6ddebc9f1b90e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4886,7 +4886,7 @@ static int ext4_load_and_init_journal(struct super_block *sb, flush_work(&sbi->s_error_work); jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; - return err; + return -EINVAL; } static int ext4_journal_data_mode_check(struct super_block *sb) From 0d043351e5baf3857f915367deba2a518b6a0809 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 5 Nov 2022 23:42:36 -0400 Subject: [PATCH 236/239] ext4: fix fortify warning in fs/ext4/fast_commit.c:1551 With the new fortify string system, rework the memcpy to avoid this warning: memcpy: detected field-spanning write (size 60) of single field "&raw_inode->i_generation" at fs/ext4/fast_commit.c:1551 (size 4) Cc: stable@kernel.org Fixes: 54d9469bc515 ("fortify: Add run-time WARN for cross-field memcpy()") Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index ef05bfa87798c..0f6d0a80467d7 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1521,6 +1521,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, struct ext4_iloc iloc; int inode_len, ino, ret, tag = tl->fc_tag; struct ext4_extent_header *eh; + size_t off_gen = offsetof(struct ext4_inode, i_generation); memcpy(&fc_inode, val, sizeof(fc_inode)); @@ -1548,8 +1549,8 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, raw_inode = ext4_raw_inode(&iloc); memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); - memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation, - inode_len - offsetof(struct ext4_inode, i_generation)); + memcpy((u8 *)raw_inode + off_gen, (u8 *)raw_fc_inode + off_gen, + inode_len - off_gen); if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); if (eh->eh_magic != EXT4_EXT_MAGIC) { From f0c4d9fc9cc9462659728d168387191387e903cc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Nov 2022 15:07:11 -0800 Subject: [PATCH 237/239] Linux 6.1-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 226ca8a312d94..ac2ec990422d9 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 6973091d1b50ab4042f6a2d495f59e9db3662ab8 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Tue, 11 Oct 2022 18:07:12 +0200 Subject: [PATCH 238/239] KVM: s390: pv: don't allow userspace to set the clock under PV When running under PV, the guest's TOD clock is under control of the ultravisor and the hypervisor isn't allowed to change it. Hence, don't allow userspace to change the guest's TOD clock by returning -EOPNOTSUPP. When userspace changes the guest's TOD clock, KVM updates its kvm.arch.epoch field and, in addition, the epoch field in all state descriptions of all VCPUs. But, under PV, the ultravisor will ignore the epoch field in the state description and simply overwrite it on next SIE exit with the actual guest epoch. This leads to KVM having an incorrect view of the guest's TOD clock: it has updated its internal kvm.arch.epoch field, but the ultravisor ignores the field in the state description. Whenever a guest is now waiting for a clock comparator, KVM will incorrectly calculate the time when the guest should wake up, possibly causing the guest to sleep for much longer than expected. With this change, kvm_s390_set_tod() will now take the kvm->lock to be able to call kvm_s390_pv_is_protected(). Since kvm_s390_set_tod_clock() also takes kvm->lock, use __kvm_s390_set_tod_clock() instead. The function kvm_s390_set_tod_clock is now unused, hence remove it. Update the documentation to indicate the TOD clock attr calls can now return -EOPNOTSUPP. Fixes: 0f3035047140 ("KVM: s390: protvirt: Do only reset registers that are accessible") Reported-by: Marc Hartmayer Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20221011160712.928239-2-nrb@linux.ibm.com Message-Id: <20221011160712.928239-2-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- Documentation/virt/kvm/devices/vm.rst | 3 +++ arch/s390/kvm/kvm-s390.c | 26 +++++++++++++++++--------- arch/s390/kvm/kvm-s390.h | 1 - 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst index 0aa5b1cfd700c..60acc39e0e937 100644 --- a/Documentation/virt/kvm/devices/vm.rst +++ b/Documentation/virt/kvm/devices/vm.rst @@ -215,6 +215,7 @@ KVM_S390_VM_TOD_EXT). :Parameters: address of a buffer in user space to store the data (u8) to :Returns: -EFAULT if the given address is not accessible from kernel space; -EINVAL if setting the TOD clock extension to != 0 is not supported + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW ----------------------------------- @@ -224,6 +225,7 @@ the POP (u64). :Parameters: address of a buffer in user space to store the data (u64) to :Returns: -EFAULT if the given address is not accessible from kernel space + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT ----------------------------------- @@ -237,6 +239,7 @@ it, it is stored as 0 and not allowed to be set to a value != 0. (kvm_s390_vm_tod_clock) to :Returns: -EFAULT if the given address is not accessible from kernel space; -EINVAL if setting the TOD clock extension to != 0 is not supported + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 4. GROUP: KVM_S390_VM_CRYPTO ============================ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 45d4b8182b073..bc491a73815c3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1207,6 +1207,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, return 0; } +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); + static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) { struct kvm_s390_vm_tod_clock gtod; @@ -1216,7 +1218,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) return -EINVAL; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", gtod.epoch_idx, gtod.tod); @@ -1247,7 +1249,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) sizeof(gtod.tod))) return -EFAULT; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); return 0; } @@ -1259,6 +1261,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) if (attr->flags) return -EINVAL; + mutex_lock(&kvm->lock); + /* + * For protected guests, the TOD is managed by the ultravisor, so trying + * to change it will never bring the expected results. + */ + if (kvm_s390_pv_is_protected(kvm)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + switch (attr->attr) { case KVM_S390_VM_TOD_EXT: ret = kvm_s390_set_tod_ext(kvm, attr); @@ -1273,6 +1285,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) ret = -ENXIO; break; } + +out_unlock: + mutex_unlock(&kvm->lock); return ret; } @@ -4377,13 +4392,6 @@ static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_t preempt_enable(); } -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) -{ - mutex_lock(&kvm->lock); - __kvm_s390_set_tod_clock(kvm, gtod); - mutex_unlock(&kvm->lock); -} - int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { if (!mutex_trylock(&kvm->lock)) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index f6fd668f887e8..4755492dfabc6 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -363,7 +363,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); From b6662e37772715447aeff2538444ff291e02ea31 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Tue, 25 Oct 2022 22:32:33 -0300 Subject: [PATCH 239/239] KVM: s390: pci: Fix allocation size of aift kzdev elements The 'kzdev' field of struct 'zpci_aift' is an array of pointers to 'kvm_zdev' structs. Allocate the proper size accordingly. Reported by Coccinelle: WARNING: Use correct pointer type argument for sizeof Fixes: 98b1d33dac5f ("KVM: s390: pci: do initial setup for AEN interpretation") Signed-off-by: Rafael Mendonca Reviewed-by: Christian Borntraeger Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221026013234.960859-1-rafaelmendsr@gmail.com Message-Id: <20221026013234.960859-1-rafaelmendsr@gmail.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index c50c1645c0aec..ded1af2ddae99 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -126,7 +126,7 @@ int kvm_s390_pci_aen_init(u8 nisc) return -EPERM; mutex_lock(&aift->aift_lock); - aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev), + aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *), GFP_KERNEL); if (!aift->kzdev) { rc = -ENOMEM;