From 9739ff4887c77a38575c23b12766b0a37c8be13c Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Tue, 16 Jul 2024 17:22:04 +0530 Subject: [PATCH 01/59] KVM: PPC: Book3S HV: Refactor HFSCR emulation for KVM guests Refactor HFSCR emulation for KVM guests when they exit out with H_FAC_UNAVAIL to use a switch case instead of checking all "cause" values, since the "cause" values are mutually exclusive; and this is better expressed with a switch case. Signed-off-by: Gautam Menghani Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://msgid.link/20240716115206.70210-1-gautam@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8f7d7e37bc8c6..0c06d2a598611 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1922,14 +1922,22 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, r = EMULATE_FAIL; if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (cause == FSCR_MSGP_LG) + switch (cause) { + case FSCR_MSGP_LG: r = kvmppc_emulate_doorbell_instr(vcpu); - if (cause == FSCR_PM_LG) + break; + case FSCR_PM_LG: r = kvmppc_pmu_unavailable(vcpu); - if (cause == FSCR_EBB_LG) + break; + case FSCR_EBB_LG: r = kvmppc_ebb_unavailable(vcpu); - if (cause == FSCR_TM_LG) + break; + case FSCR_TM_LG: r = kvmppc_tm_unavailable(vcpu); + break; + default: + break; + } } if (r == EMULATE_FAIL) { kvmppc_core_queue_program(vcpu, SRR1_PROGILL | From 8a93960abed960b9f6097b6471b7fb34120ffc6a Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 13:12:47 -0600 Subject: [PATCH 02/59] powerpc: Use of_property_present() Use of_property_present() to test for property presence rather than of_get_property(). This is part of a larger effort to remove callers of of_get_property() and similar functions. of_get_property() leaks the DT property data pointer which is a problem for dynamically allocated nodes which may be freed. Signed-off-by: Rob Herring (Arm) Signed-off-by: Michael Ellerman Link: https://msgid.link/20240731191312.1710417-9-robh@kernel.org --- arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +- arch/powerpc/platforms/powernv/opal-lpc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index af3a5d37a1496..3d072a7455bf6 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -860,7 +860,7 @@ static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option) int64_t rc; /* Hot reset to the bus if firmware cannot handle */ - if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL)) + if (!dn || !of_property_present(dn, "ibm,reset-by-firmware")) return __pnv_eeh_bridge_reset(pdev, option); pr_debug("%s: FW reset PCI bus %04x:%02x with option %d\n", diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index a16f07cdab267..8a7f39e106bdb 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -393,7 +393,7 @@ void __init opal_lpc_init(void) for_each_compatible_node(np, NULL, "ibm,power8-lpc") { if (!of_device_is_available(np)) continue; - if (!of_get_property(np, "primary", NULL)) + if (!of_property_present(np, "primary")) continue; opal_lpc_chip_id = of_get_ibm_chip_id(np); of_node_put(np); From 28455894bb99a1afe541cb7c987cc7e193c41fc6 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 31 Jul 2024 15:06:14 +0200 Subject: [PATCH 03/59] powerpc/traps: Use backlight power constants Replace FB_BLANK_ constants with their counterparts from the backlight subsystem. The values are identical, so there's no change in functionality or semantics. traps.c already includes backlight.h where the BACKLIGHT constants are defined. Signed-off-by: Thomas Zimmermann Signed-off-by: Michael Ellerman Link: https://msgid.link/20240731130720.1148872-2-tzimmermann@suse.de --- arch/powerpc/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 28d6472c380a8..edf5cabe5dfdb 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -121,7 +121,7 @@ static void pmac_backlight_unblank(void) props = &pmac_backlight->props; props->brightness = props->max_brightness; - props->power = FB_BLANK_UNBLANK; + props->power = BACKLIGHT_POWER_ON; backlight_update_status(pmac_backlight); } mutex_unlock(&pmac_backlight_mutex); From c7907a47bb68a18758edf12fd45e201ff03e2acc Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 31 Jul 2024 15:06:15 +0200 Subject: [PATCH 04/59] macintosh/via-pmu-backlight: Use backlight power constants Replace FB_BLANK_ constants with their counterparts from the backlight subsystem. The values are identical, so there's no change in functionality or semantics. via-pmu-backlight.c already includes backlight.h where the BACKLIGHT constants are defined. Signed-off-by: Thomas Zimmermann Signed-off-by: Michael Ellerman Link: https://msgid.link/20240731130720.1148872-3-tzimmermann@suse.de --- drivers/macintosh/via-pmu-backlight.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/via-pmu-backlight.c b/drivers/macintosh/via-pmu-backlight.c index 89450645c2305..26bd9ed5e6645 100644 --- a/drivers/macintosh/via-pmu-backlight.c +++ b/drivers/macintosh/via-pmu-backlight.c @@ -178,7 +178,7 @@ void __init pmu_backlight_init(void) } bd->props.brightness = level; - bd->props.power = FB_BLANK_UNBLANK; + bd->props.power = BACKLIGHT_POWER_ON; backlight_update_status(bd); printk(KERN_INFO "PMU Backlight initialized (%s)\n", name); From c4afe3eb04a5fc095a9e3b1b25691f9ed31a52d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 4 Aug 2024 13:20:31 +0200 Subject: [PATCH 05/59] powerpc/476: Drop explicit initialization of struct i2c_device_id::driver_data to 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver doesn't use the driver_data member of struct i2c_device_id, so don't explicitly initialize this member. This prepares putting driver_data in an anonymous union which requires either no initialization or named designators. But it's also a nice cleanup on its own. Signed-off-by: Uwe Kleine-König Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20240804112032.3628645-2-u.kleine-koenig@baylibre.com --- arch/powerpc/platforms/44x/ppc476.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 164cbcd4588e4..e7b7bdaad341f 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -95,7 +95,7 @@ static int avr_probe(struct i2c_client *client) } static const struct i2c_device_id avr_id[] = { - { "akebono-avr", 0 }, + { "akebono-avr" }, { } }; From fa740ca82277b476a49fee83c6fdb023656ef779 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 7 Aug 2024 10:56:04 +0800 Subject: [PATCH 06/59] powerpc: Remove useless config comment in asm/percpu.h commit 0db880fc865f ("powerpc: Avoid nmi_enter/nmi_exit in real mode interrupt.") has a config comment typo, and the #if/#else/#endif section is small and doesn't nest additional #ifdefs so the comment is useless and should be removed completely. Signed-off-by: Jinjie Ruan Suggested-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20240807025604.2817577-1-ruanjinjie@huawei.com --- arch/powerpc/include/asm/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h index 634970ce13c6b..ecf5ac70cfae6 100644 --- a/arch/powerpc/include/asm/percpu.h +++ b/arch/powerpc/include/asm/percpu.h @@ -23,7 +23,7 @@ DECLARE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged); (static_key_enabled(&__percpu_first_chunk_is_paged.key)) #else #define percpu_first_chunk_is_paged false -#endif /* CONFIG_PPC64 && CONFIG_SMP */ +#endif #include From 46765aaec4d78b9fef59e647ab228283991de075 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 16 Aug 2024 17:33:12 +0800 Subject: [PATCH 07/59] KVM: PPC: Book3S HV: remove unused varible During build testing, we found a error: arch/powerpc/kvm/book3s_hv.c:4052:17: error: variable 'loops' set but not used unsigned long loops = 0; 1 error generated. Fix it by removing the unused variable. Fixes: b4deba5c41e9 ("KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8") Signed-off-by: Alex Shi Signed-off-by: Michael Ellerman Link: https://msgid.link/20240816093313.327268-1-alexs@kernel.org --- arch/powerpc/kvm/book3s_hv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0c06d2a598611..ba0492f9de650 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4057,7 +4057,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) /* Return to whole-core mode if we split the core earlier */ if (cmd_bit) { unsigned long hid0 = mfspr(SPRN_HID0); - unsigned long loops = 0; hid0 &= ~HID0_POWER8_DYNLPARDIS; stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE; @@ -4069,7 +4068,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) if (!(hid0 & stat_bit)) break; cpu_relax(); - ++loops; } split_info.do_nap = 0; } From db9a63913fc7aa6ea419c9a787bfa18937515037 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 26 Jul 2024 22:33:21 +1000 Subject: [PATCH 08/59] MAINTAINERS: Mark powerpc Cell as orphaned Arnd is no longer actively maintaining Cell, mark it as orphan. Also drop the dead developerworks link. Acked-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20240726123322.1165562-1-mpe@ellerman.id.au --- CREDITS | 3 +++ MAINTAINERS | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CREDITS b/CREDITS index 053e5a5003eb4..65165dc80f040 100644 --- a/CREDITS +++ b/CREDITS @@ -378,6 +378,9 @@ S: 1549 Hiironen Rd. S: Brimson, MN 55602 S: USA +N: Arnd Bergmann +D: Maintainer of Cell Broadband Engine Architecture + N: Hennus Bergman P: 1024/77D50909 76 99 FD 31 91 E1 96 1C 90 BB 22 80 62 F6 BD 63 D: Author and maintainer of the QIC-02 tape driver diff --git a/MAINTAINERS b/MAINTAINERS index 8766f3e5e87e0..9ce5bcf765832 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5096,10 +5096,8 @@ F: Documentation/devicetree/bindings/media/cec/cec-gpio.yaml F: drivers/media/cec/platform/cec-gpio/ CELL BROADBAND ENGINE ARCHITECTURE -M: Arnd Bergmann L: linuxppc-dev@lists.ozlabs.org -S: Supported -W: http://www.ibm.com/developerworks/power/cell/ +S: Orphan F: arch/powerpc/include/asm/cell*.h F: arch/powerpc/include/asm/spu*.h F: arch/powerpc/include/uapi/asm/spu*.h From 81695066c76fa74aa00aadbcb360cc7ab9c70c51 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 26 Jul 2024 22:33:22 +1000 Subject: [PATCH 09/59] MAINTAINERS: Mark powerpc spufs as orphaned Jeremy is no longer actively maintaining spufs, mark it as orphan. Also drop the dead developerworks link. Acked-by: Jeremy Kerr Acked-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20240726123322.1165562-2-mpe@ellerman.id.au --- CREDITS | 3 +++ MAINTAINERS | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CREDITS b/CREDITS index 65165dc80f040..d439f5a1bc00d 100644 --- a/CREDITS +++ b/CREDITS @@ -1872,6 +1872,9 @@ S: K osmidomkum 723 S: 160 00 Praha 6 S: Czech Republic +N: Jeremy Kerr +D: Maintainer of SPU File System + N: Michael Kerrisk E: mtk.manpages@gmail.com W: https://man7.org/ diff --git a/MAINTAINERS b/MAINTAINERS index 9ce5bcf765832..9ee5195d021c1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21493,10 +21493,8 @@ F: include/linux/spmi.h F: include/trace/events/spmi.h SPU FILE SYSTEM -M: Jeremy Kerr L: linuxppc-dev@lists.ozlabs.org -S: Supported -W: http://www.ibm.com/developerworks/power/cell/ +S: Orphan F: Documentation/filesystems/spufs/spufs.rst F: arch/powerpc/platforms/cell/spufs/ From a540ad3e386f8f84bc6d600b93792a50861a81ef Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 21 Aug 2024 08:47:51 +0200 Subject: [PATCH 10/59] powerpc: Remove unused LHZX_BE macro LHZX_BE has been unused since commit dbf44daf7c88 ("bpf, ppc64: remove ld_abs/ld_ind") Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/fd332b01c47bb9cb6c3af1696a2e109be655f5b5.1724222856.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-compat.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 2bc53c646ccd7..b0b209c1df50b 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -26,13 +26,11 @@ #define PPC_MIN_STKFRM 112 #ifdef __BIG_ENDIAN__ -#define LHZX_BE stringify_in_c(lhzx) #define LWZX_BE stringify_in_c(lwzx) #define LDX_BE stringify_in_c(ldx) #define STWX_BE stringify_in_c(stwx) #define STDX_BE stringify_in_c(stdx) #else -#define LHZX_BE stringify_in_c(lhbrx) #define LWZX_BE stringify_in_c(lwbrx) #define LDX_BE stringify_in_c(ldbrx) #define STWX_BE stringify_in_c(stwbrx) From e6b8940e7e80cdfe98ba8493214922998920dd9c Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 15 May 2024 12:44:41 +1000 Subject: [PATCH 11/59] powerpc/code-patching: Add generic memory patching patch_instruction() is designed for patching instructions in otherwise readonly memory. Other consumers also sometimes need to patch readonly memory, so have abused patch_instruction() for arbitrary data patches. This is a problem on ppc64 as patch_instruction() decides on the patch width using the 'instruction' opcode to see if it's a prefixed instruction. Data that triggers this can lead to larger writes, possibly crossing a page boundary and failing the write altogether. Introduce patch_uint(), and patch_ulong(), with aliases patch_u32(), and patch_u64() (on ppc64) designed for aligned data patches. The patch size is now determined by the called function, and is passed as an additional parameter to generic internals. While the instruction flushing is not required for data patches, it remains unconditional in this patch. A followup series is possible if benchmarking shows fewer flushes gives an improvement in some data-patching workload. ppc32 does not support prefixed instructions, so is unaffected by the original issue. Care is taken in not exposing the size parameter in the public (non-static) interface, so the compiler can const-propagate it away. Signed-off-by: Benjamin Gray Reviewed-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://msgid.link/20240515024445.236364-2-bgray@linux.ibm.com --- arch/powerpc/include/asm/code-patching.h | 31 ++++++++++++ arch/powerpc/lib/code-patching.c | 64 ++++++++++++++++++------ 2 files changed, 80 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 0e29ccf903d09..21a36e2c4e262 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -76,6 +76,37 @@ int patch_instruction(u32 *addr, ppc_inst_t instr); int raw_patch_instruction(u32 *addr, ppc_inst_t instr); int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr); +/* + * The data patching functions patch_uint() and patch_ulong(), etc., must be + * called on aligned addresses. + * + * The instruction patching functions patch_instruction() and similar must be + * called on addresses satisfying instruction alignment requirements. + */ + +#ifdef CONFIG_PPC64 + +int patch_uint(void *addr, unsigned int val); +int patch_ulong(void *addr, unsigned long val); + +#define patch_u64 patch_ulong + +#else + +static inline int patch_uint(void *addr, unsigned int val) +{ + return patch_instruction(addr, ppc_inst(val)); +} + +static inline int patch_ulong(void *addr, unsigned long val) +{ + return patch_instruction(addr, ppc_inst(val)); +} + +#endif + +#define patch_u32 patch_uint + static inline unsigned long patch_site_addr(s32 *site) { return (unsigned long)site + *site; diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 0d1f3ee911152..7f423fa3c51be 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -20,15 +20,14 @@ #include #include -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) +static int __patch_mem(void *exec_addr, unsigned long val, void *patch_addr, bool is_dword) { - if (!ppc_inst_prefixed(instr)) { - u32 val = ppc_inst_val(instr); + if (!IS_ENABLED(CONFIG_PPC64) || likely(!is_dword)) { + /* For big endian correctness: plain address would use the wrong half */ + u32 val32 = val; - __put_kernel_nofault(patch_addr, &val, u32, failed); + __put_kernel_nofault(patch_addr, &val32, u32, failed); } else { - u64 val = ppc_inst_as_ulong(instr); - __put_kernel_nofault(patch_addr, &val, u64, failed); } @@ -44,7 +43,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr int raw_patch_instruction(u32 *addr, ppc_inst_t instr) { - return __patch_instruction(addr, instr, addr); + if (ppc_inst_prefixed(instr)) + return __patch_mem(addr, ppc_inst_as_ulong(instr), addr, true); + else + return __patch_mem(addr, ppc_inst_val(instr), addr, false); } struct patch_context { @@ -276,7 +278,7 @@ static void unmap_patch_area(unsigned long addr) flush_tlb_kernel_range(addr, addr + PAGE_SIZE); } -static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) +static int __do_patch_mem_mm(void *addr, unsigned long val, bool is_dword) { int err; u32 *patch_addr; @@ -305,7 +307,7 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) orig_mm = start_using_temp_mm(patching_mm); - err = __patch_instruction(addr, instr, patch_addr); + err = __patch_mem(addr, val, patch_addr, is_dword); /* context synchronisation performed by __patch_instruction (isync or exception) */ stop_using_temp_mm(patching_mm, orig_mm); @@ -322,7 +324,7 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) return err; } -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) +static int __do_patch_mem(void *addr, unsigned long val, bool is_dword) { int err; u32 *patch_addr; @@ -339,7 +341,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) if (radix_enabled()) asm volatile("ptesync": : :"memory"); - err = __patch_instruction(addr, instr, patch_addr); + err = __patch_mem(addr, val, patch_addr, is_dword); pte_clear(&init_mm, text_poke_addr, pte); flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); @@ -347,7 +349,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } -int patch_instruction(u32 *addr, ppc_inst_t instr) +static int patch_mem(void *addr, unsigned long val, bool is_dword) { int err; unsigned long flags; @@ -359,19 +361,51 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) */ if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || !static_branch_likely(&poking_init_done)) - return raw_patch_instruction(addr, instr); + return __patch_mem(addr, val, addr, is_dword); local_irq_save(flags); if (mm_patch_enabled()) - err = __do_patch_instruction_mm(addr, instr); + err = __do_patch_mem_mm(addr, val, is_dword); else - err = __do_patch_instruction(addr, instr); + err = __do_patch_mem(addr, val, is_dword); local_irq_restore(flags); return err; } + +#ifdef CONFIG_PPC64 + +int patch_instruction(u32 *addr, ppc_inst_t instr) +{ + if (ppc_inst_prefixed(instr)) + return patch_mem(addr, ppc_inst_as_ulong(instr), true); + else + return patch_mem(addr, ppc_inst_val(instr), false); +} NOKPROBE_SYMBOL(patch_instruction); +int patch_uint(void *addr, unsigned int val) +{ + return patch_mem(addr, val, false); +} +NOKPROBE_SYMBOL(patch_uint); + +int patch_ulong(void *addr, unsigned long val) +{ + return patch_mem(addr, val, true); +} +NOKPROBE_SYMBOL(patch_ulong); + +#else + +int patch_instruction(u32 *addr, ppc_inst_t instr) +{ + return patch_mem(addr, ppc_inst_val(instr), false); +} +NOKPROBE_SYMBOL(patch_instruction) + +#endif + static int patch_memset64(u64 *addr, u64 val, size_t count) { for (u64 *end = addr + count; addr < end; addr++) From dbf828aab466c6534711d1f1454c409ea68d18d0 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 15 May 2024 12:44:42 +1000 Subject: [PATCH 12/59] powerpc/code-patching: Add data patch alignment check The new data patching still needs to be aligned within a cacheline too for the flushes to work correctly. To simplify this requirement, we just say data patches must be aligned. Detect when data patching is not aligned, returning an invalid argument error. Signed-off-by: Benjamin Gray Reviewed-by: Hari Bathini Acked-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20240515024445.236364-3-bgray@linux.ibm.com --- arch/powerpc/include/asm/code-patching.h | 6 ++++++ arch/powerpc/lib/code-patching.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 21a36e2c4e262..e7f14720f6307 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -95,11 +95,17 @@ int patch_ulong(void *addr, unsigned long val); static inline int patch_uint(void *addr, unsigned int val) { + if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned int))) + return -EINVAL; + return patch_instruction(addr, ppc_inst(val)); } static inline int patch_ulong(void *addr, unsigned long val) { + if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned long))) + return -EINVAL; + return patch_instruction(addr, ppc_inst(val)); } diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 7f423fa3c51be..acdab294b340a 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -386,12 +386,18 @@ NOKPROBE_SYMBOL(patch_instruction); int patch_uint(void *addr, unsigned int val) { + if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned int))) + return -EINVAL; + return patch_mem(addr, val, false); } NOKPROBE_SYMBOL(patch_uint); int patch_ulong(void *addr, unsigned long val) { + if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned long))) + return -EINVAL; + return patch_mem(addr, val, true); } NOKPROBE_SYMBOL(patch_ulong); From 90d4fed5b273155c378b1d37595f2209f0a92bed Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 15 May 2024 12:44:43 +1000 Subject: [PATCH 13/59] powerpc/64: Convert patch_instruction() to patch_u32() This use of patch_instruction() is working on 32 bit data, and can fail if the data looks like a prefixed instruction and the extra write crosses a page boundary. Use patch_u32() to fix the write size. Fixes: 8734b41b3efe ("powerpc/module_64: Fix livepatching for RO modules") Link: https://lore.kernel.org/all/20230203004649.1f59dbd4@yea/ Signed-off-by: Benjamin Gray Tested-by: Hari Bathini Acked-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20240515024445.236364-4-bgray@linux.ibm.com --- arch/powerpc/kernel/module_64.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 7112adc597a80..e9bab599d0c27 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -651,12 +651,11 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, // func_desc_t is 8 bytes if ABIv2, else 16 bytes desc = func_desc(addr); for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) { - if (patch_instruction(((u32 *)&entry->funcdata) + i, - ppc_inst(((u32 *)(&desc))[i]))) + if (patch_u32(((u32 *)&entry->funcdata) + i, ((u32 *)&desc)[i])) return 0; } - if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC))) + if (patch_u32(&entry->magic, STUB_MAGIC)) return 0; return 1; From 5799cd765fea93e643d81dbdae76a9c34e06dd18 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 15 May 2024 12:44:44 +1000 Subject: [PATCH 14/59] powerpc/32: Convert patch_instruction() to patch_uint() These changes are for patch_instruction() uses on data. Unlike ppc64 these should not be incorrect as-is, but using the patch_uint() alias better reflects what kind of data being patched and allows for benchmarking the effect of different patch_* implementations (e.g., skipping instruction flushing when patching data). Signed-off-by: Benjamin Gray Tested-by: Hari Bathini Acked-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20240515024445.236364-5-bgray@linux.ibm.com --- arch/powerpc/kernel/static_call.c | 2 +- arch/powerpc/platforms/powermac/smp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c index 863a7aa24650a..1502b7e439caf 100644 --- a/arch/powerpc/kernel/static_call.c +++ b/arch/powerpc/kernel/static_call.c @@ -17,7 +17,7 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) mutex_lock(&text_mutex); if (func && !is_short) { - err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target)); + err = patch_ulong(tramp + PPC_SCT_DATA, target); if (err) goto out; } diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 15644be31990d..d21b681f52fb0 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -827,7 +827,7 @@ static int smp_core99_kick_cpu(int nr) mdelay(1); /* Restore our exception vector */ - patch_instruction(vector, ppc_inst(save_vector)); + patch_uint(vector, save_vector); local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); From b7d47339d00d89af559a7068f4a640fc828177ad Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 15 May 2024 12:44:45 +1000 Subject: [PATCH 15/59] powerpc/code-patching: Add boot selftest for data patching Extend the code patching selftests with some basic coverage of the new data patching variants too. Signed-off-by: Benjamin Gray Reviewed-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://msgid.link/20240515024445.236364-6-bgray@linux.ibm.com --- arch/powerpc/lib/test-code-patching.c | 41 +++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c index f76030087f983..8cd3b32f805b0 100644 --- a/arch/powerpc/lib/test-code-patching.c +++ b/arch/powerpc/lib/test-code-patching.c @@ -438,6 +438,46 @@ static void __init test_multi_instruction_patching(void) vfree(buf); } +static void __init test_data_patching(void) +{ + void *buf; + u32 *addr32; + + buf = vzalloc(PAGE_SIZE); + check(buf); + if (!buf) + return; + + addr32 = buf + 128; + + addr32[1] = 0xA0A1A2A3; + addr32[2] = 0xB0B1B2B3; + + check(!patch_uint(&addr32[1], 0xC0C1C2C3)); + + check(addr32[0] == 0); + check(addr32[1] == 0xC0C1C2C3); + check(addr32[2] == 0xB0B1B2B3); + check(addr32[3] == 0); + + /* Unaligned patch_ulong() should fail */ + if (IS_ENABLED(CONFIG_PPC64)) + check(patch_ulong(&addr32[1], 0xD0D1D2D3) == -EINVAL); + + check(!patch_ulong(&addr32[2], 0xD0D1D2D3)); + + check(addr32[0] == 0); + check(addr32[1] == 0xC0C1C2C3); + check(*(unsigned long *)(&addr32[2]) == 0xD0D1D2D3); + + if (!IS_ENABLED(CONFIG_PPC64)) + check(addr32[3] == 0); + + check(addr32[4] == 0); + + vfree(buf); +} + static int __init test_code_patching(void) { pr_info("Running code patching self-tests ...\n"); @@ -448,6 +488,7 @@ static int __init test_code_patching(void) test_translate_branch(); test_prefixed_patching(); test_multi_instruction_patching(); + test_data_patching(); return 0; } From 0405e128110d47a40443936e68dc32d7bc4ccc0b Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 26 Aug 2024 12:12:17 +0530 Subject: [PATCH 16/59] powerpc/xmon: Fix tmpstr length check in scanhex If a function name is greater than 63 characters long, xmon command may not find them. For example, here is a test that executed an illegal instruction in a kernel function and one of call stack function has a name greater than 63 characters long: cpu 0x0: Vector: 700 (Program Check) at [c00000000a6577e0] pc: c0000000001aacb8: check__allowed__function__name__for__symbol__r4+0x8/0x10 lr: c00000000019c1e0: check__allowed__function__name__for__symbol__r1+0x20/0x40 sp: c00000000a657a80 msr: 800000000288b033 current = 0xc00000000a439900 paca = 0xc000000003e90000 irqmask: 0x03 irq_happened: 0x01 ..... [link register ] c00000000019c1e0 check__allowed__function__name__for__symbol__r1+0x20/0x40 [c00000000a657a80] c00000000a439900 (unreliable) [c00000000a657aa0] c0000000001021d8 check__allowed__function__name__for__symbol__r2_resolution_symbol+0x38/0x4c [c00000000a657ac0] c00000000019b424 power_pmu_event_init+0xa4/0xa50 and when executing a dump instruction (di) command for long function name, xmon fails to find the function symbol: 0:mon> di $check__allowed__function__name__for__symbol__r2_resolution_symbol unknown symbol 'check__allowed__function__name__for__symbol__r2_resolution_symb' 0000000000000000 ******** This is because in scanhex(), tmpstr loop index is checked only for a upper bound of 63. Fix it by replacing the upper bound value with (KSYM_NAME_LEN-1). With fix: 0:mon> di $check__allowed__function__name__for__symbol__r2_resolution_symbol c0000000001021a0 3c4c0249 addis r2,r12,585 c0000000001021a4 3842ae60 addi r2,r2,-20896 c0000000001021a8 7c0802a6 mflr r0 c0000000001021ac 60000000 nop ..... Reported-by: Miguel Ojeda Closes: https://lore.kernel.org/linuxppc-dev/CANiq72=QeTgtZL4k9=4CJP6C_Hv=rh3fsn3B9S3KFoPXkyWk3w@mail.gmail.com/ Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://msgid.link/20240826064217.46658-1-maddy@linux.ibm.com --- arch/powerpc/xmon/xmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index bd4813bad317e..e6cddbb2305f8 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3543,7 +3543,7 @@ scanhex(unsigned long *vp) } } else if (c == '$') { int i; - for (i=0; i<63; i++) { + for (i = 0; i < (KSYM_NAME_LEN - 1); i++) { c = inchar(); if (isspace(c) || c == '\0') { termch = c; From d6b34416b08895a7457c53630595ce84e4aa904c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 20 Aug 2024 16:57:05 +1000 Subject: [PATCH 17/59] powerpc/configs/64s: Enable DEFERRED_STRUCT_PAGE_INIT It can speed up initialisation of page structs at boot on large machines. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240820065705.660812-1-mpe@ellerman.id.au --- arch/powerpc/configs/ppc64_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 544a65fda77bc..6001d580c0ddc 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -93,6 +93,7 @@ CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_MEM_SOFT_DIRTY=y +CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_ZONE_DEVICE=y CONFIG_NET=y CONFIG_PACKET=y From 8ae4f16f7d7b59cca55aeca6db7c9636ffe7fbaa Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 21 Aug 2024 18:07:29 +1000 Subject: [PATCH 18/59] powerpc/64s/mm: Move __real_pte stubs into hash-4k.h The stub versions of __real_pte() etc are only used with HPT & 4K pages, so move them into the hash-4k.h header. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240821080729.872034-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 20 +++++++++++++++ arch/powerpc/include/asm/book3s/64/pgtable.h | 26 -------------------- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index c654c376ef8b8..c3efacab4b941 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -74,6 +74,26 @@ #define remap_4k_pfn(vma, addr, pfn, prot) \ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) +/* + * With 4K page size the real_pte machinery is all nops. + */ +#define __real_pte(e, p, o) ((real_pte_t){(e)}) +#define __rpte_to_pte(r) ((r).pte) +#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) + +#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ + do { \ + index = 0; \ + shift = mmu_psize_defs[psize].shift; \ + +#define pte_iterate_hashed_end() } while(0) + +/* + * We expect this to be called only for user addresses or kernel virtual + * addresses other than the linear mapping. + */ +#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K + /* * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just * a matter of returning the PTE bits that need to be modified. On 64K PTE, diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 519b1743a0f4d..f8ba72573caeb 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -330,32 +330,6 @@ static inline unsigned long pud_leaf_size(pud_t pud) #ifndef __ASSEMBLY__ -/* - * This is the default implementation of various PTE accessors, it's - * used in all cases except Book3S with 64K pages where we have a - * concept of sub-pages - */ -#ifndef __real_pte - -#define __real_pte(e, p, o) ((real_pte_t){(e)}) -#define __rpte_to_pte(r) ((r).pte) -#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) - -#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ - do { \ - index = 0; \ - shift = mmu_psize_defs[psize].shift; \ - -#define pte_iterate_hashed_end() } while(0) - -/* - * We expect this to be called only for user addresses or kernel virtual - * addresses other than the linear mapping. - */ -#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K - -#endif /* __real_pte */ - static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long clr, unsigned long set, int huge) From 197116e2dec8d23888ce76044fe673480afceff0 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Thu, 22 Aug 2024 16:54:29 +0800 Subject: [PATCH 19/59] powerpc/powermac/pfunc_base: Use helper function for_each_child_of_node() for_each_child_of_node() can help to iterate through the device_node, and we don't need to do it manually. No functional change with this conversion. Signed-off-by: Zhang Zekun Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822085430.25753-2-zhangzekun11@huawei.com --- arch/powerpc/platforms/powermac/pfunc_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index 085e0ad20eba5..8253de7373735 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -313,7 +313,7 @@ static void __init uninorth_install_pfunc(void) /* * Install handlers for the hwclock child if any */ - for (np = NULL; (np = of_get_next_child(uninorth_node, np)) != NULL;) + for_each_child_of_node(uninorth_node, np) if (of_node_name_eq(np, "hw-clock")) { unin_hwclock = np; break; From 46f4bbb8aac2b876355cdefdacd1971b65f8b631 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Thu, 22 Aug 2024 16:54:30 +0800 Subject: [PATCH 20/59] powerpc/pseries/dlpar: Use helper function for_each_child_of_node() for_each_child_of_node can help to iterate through the device_node, and we don't need to use while loop. No functional change with this conversion. Signed-off-by: Zhang Zekun Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822085430.25753-3-zhangzekun11@huawei.com --- arch/powerpc/platforms/pseries/dlpar.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 47f8eabd1bee3..ee47ed21b99d0 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -250,11 +250,8 @@ int dlpar_detach_node(struct device_node *dn) struct device_node *child; int rc; - child = of_get_next_child(dn, NULL); - while (child) { + for_each_child_of_node(dn, child) dlpar_detach_node(child); - child = of_get_next_child(dn, child); - } rc = of_detach_node(dn); if (rc) From dace02a9ee1921adee05bf1807a78f92ee2dea2b Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Aug 2024 21:06:06 +0800 Subject: [PATCH 21/59] powerpc: Remove obsoleted declaration for _get_SP The implementation of _get_SP() was removed in commit f4db196717c6 ("[POWERPC] Remove _get_SP"), remove the now obsolete declaration. Signed-off-by: Gaosheng Cui Reviewed-by: Christophe Leroy [mpe: Update change log to refer to correct commit per Christophe] Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822130609.786431-2-cuigaosheng1@huawei.com --- arch/powerpc/kernel/process.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3b506d4c55f37..e7b70c2cc0013 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -72,8 +72,6 @@ #define TM_DEBUG(x...) do { } while(0) #endif -extern unsigned long _get_SP(void); - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Are we running in "Suspend disabled" mode? If so we have to block any From 6745c5bb2e0fe513918ce2136108a2efb92bdea1 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Aug 2024 21:06:07 +0800 Subject: [PATCH 22/59] powerpc/maple: Remove obsoleted declaration for maple_calibrate_decr() The maple_calibrate_decr() have been removed since commit 10f7e7c15e6c ("[PATCH] ppc64: consolidate calibrate_decr implementations"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822130609.786431-3-cuigaosheng1@huawei.com --- arch/powerpc/platforms/maple/maple.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h index 4f358b55c3413..8ddbaa4ebd0b4 100644 --- a/arch/powerpc/platforms/maple/maple.h +++ b/arch/powerpc/platforms/maple/maple.h @@ -7,7 +7,6 @@ extern int maple_set_rtc_time(struct rtc_time *tm); extern void maple_get_rtc_time(struct rtc_time *tm); extern time64_t maple_get_boot_time(void); -extern void maple_calibrate_decr(void); extern void maple_pci_init(void); extern void maple_pci_irq_fixup(struct pci_dev *dev); extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel); From fe16a749731e86d580acf8d43b0298dfe6d1503d Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Aug 2024 21:06:08 +0800 Subject: [PATCH 23/59] powerpc/pasemi: Remove obsoleted declaration for pas_pci_irq_fixup() The pas_pci_irq_fixup() have been removed since commit 771f7404a9de ("pasemi_mac: Move the IRQ mapping from the PCI layer to the driver"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822130609.786431-4-cuigaosheng1@huawei.com --- arch/powerpc/platforms/pasemi/pasemi.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h index 018c30665e1b3..6f6743b8e48d1 100644 --- a/arch/powerpc/platforms/pasemi/pasemi.h +++ b/arch/powerpc/platforms/pasemi/pasemi.h @@ -5,7 +5,6 @@ extern time64_t pas_get_boot_time(void); extern void pas_pci_init(void); struct pci_dev; -extern void pas_pci_irq_fixup(struct pci_dev *dev); extern void pas_pci_dma_dev_setup(struct pci_dev *dev); void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset); From 600d6a7e630e970624911624eb15986245b18668 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Aug 2024 21:06:09 +0800 Subject: [PATCH 24/59] powerpc: Remove obsoleted declarations for use_cop and drop_cop The use_cop() and drop_cop() have been removed since commit 6ff4d3e96652 ("powerpc: Remove old unused icswx based coprocessor support"), now they are useless, so remove them. Signed-off-by: Gaosheng Cui Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822130609.786431-5-cuigaosheng1@huawei.com --- arch/powerpc/include/asm/mmu_context.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 37bffa0f79183..99707456c2cd7 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -116,9 +116,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) } #endif -extern int use_cop(unsigned long acop, struct mm_struct *mm); -extern void drop_cop(unsigned long acop, struct mm_struct *mm); - #ifdef CONFIG_PPC_BOOK3S_64 static inline void inc_mm_active_cpus(struct mm_struct *mm) { From 10c8ac13395a087c90ba6acd11f793588ba5609e Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Aug 2024 21:00:43 +0800 Subject: [PATCH 25/59] powerpc/powernv/pci: Remove obsoleted declaration for pnv_pci_init_ioda_hub The pnv_pci_init_ioda_hub() have been removed since commit 5ac129cdb50b ("powerpc/powernv/pci: Remove ioda1 support"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822130043.783756-1-cuigaosheng1@huawei.com --- arch/powerpc/platforms/powernv/pci.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 957f2b47a3c0c..93fba1f8661f9 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -274,7 +274,6 @@ int pnv_pci_cfg_write(struct pci_dn *pdn, int where, int size, u32 val); extern struct iommu_table *pnv_pci_table_alloc(int nid); -extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); From f9f2bff64c2f0dbee57be3d8c2741357ad3d05e6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:45 +0200 Subject: [PATCH 26/59] powerpc/8xx: Fix initial memory mapping Commit cf209951fa7f ("powerpc/8xx: Map linear memory with huge pages") introduced an initial mapping of kernel TEXT using PAGE_KERNEL_TEXT, but the pages that contain kernel TEXT may also contain kernel RODATA, and depending on selected debug options PAGE_KERNEL_TEXT may be either RWX or ROX. RODATA must be writable during init because it also contains ro_after_init data. So use PAGE_KERNEL_X instead to be sure it is RWX. Fixes: cf209951fa7f ("powerpc/8xx: Map linear memory with huge pages") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/dac7a828d8497c4548c91840575a706657baa4f1.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/8xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 388bba0ab3e7d..15d918dce27d0 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -150,11 +150,11 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) mmu_mapin_immr(); - mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_X, true); if (debug_pagealloc_enabled_or_kfence()) { top = boundary; } else { - mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_X, true); mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true); } From 65a82e117ffeeab0baf6f871a1cab11a28ace183 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:46 +0200 Subject: [PATCH 27/59] powerpc/8xx: Fix kernel vs user address comparison Since commit 9132a2e82adc ("powerpc/8xx: Define a MODULE area below kernel text"), module exec space is below PAGE_OFFSET so not only space above PAGE_OFFSET, but space above TASK_SIZE need to be seen as kernel space. Until now the problem went undetected because by default TASK_SIZE is 0x8000000 which means address space is determined by just checking upper address bit. But when TASK_SIZE is over 0x80000000, PAGE_OFFSET is used for comparison, leading to thinking module addresses are part of user space. Fix it by using TASK_SIZE instead of PAGE_OFFSET for address comparison. Fixes: 9132a2e82adc ("powerpc/8xx: Define a MODULE area below kernel text") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/3f574c9845ff0a023b46cb4f38d2c45aecd769bd.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ac74321b11928..c955a8196d55e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -41,12 +41,12 @@ #include "head_32.h" .macro compare_to_kernel_boundary scratch, addr -#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 +#if CONFIG_TASK_SIZE <= 0x80000000 && MODULES_VADDR >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ not. \scratch, \addr #else rlwinm \scratch, \addr, 16, 0xfff8 - cmpli cr0, \scratch, PAGE_OFFSET@h + cmpli cr0, \scratch, TASK_SIZE@h #endif .endm @@ -404,7 +404,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r10, SPRN_SRR0 mtspr SPRN_MD_EPN, r10 rlwinm r11, r10, 16, 0xfff8 - cmpli cr1, r11, PAGE_OFFSET@h + cmpli cr1, r11, TASK_SIZE@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ blt+ cr1, 3f From 985db026c34dfc45213649023d5505822a5dcd78 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:47 +0200 Subject: [PATCH 28/59] powerpc/8xx: Copy kernel PGD entries into all PGDIRs In order to avoid having to select PGDIR at each TLB miss based on fault address, copy kernel PGD entries into all PGDIRs in pgd_alloc(). At first it will be used for ITLB misses for kernel TEXT, then for execmem then for kernel DATA. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/c6d2bf5af2ea909071a85bdca8b1f5dc2df134a8.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/pgalloc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index d06efac6d7aa6..4ef780b291bc3 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -19,8 +19,14 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb, static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), + pgd_t *pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), pgtable_gfp_flags(mm, GFP_KERNEL)); + +#ifdef CONFIG_PPC_8xx + memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, + (MAX_PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); +#endif + return pgd; } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) From 1a736d98c84acd38e40fff69528ce7aaa55dd22d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:48 +0200 Subject: [PATCH 29/59] Revert "powerpc/8xx: Always pin kernel text TLB" This reverts commit bccc58986a2f98e3af349c85c5f49aac7fb19ef2. When STRICT_KERNEL_RWX is selected, EXEC memory must stop where RW memory start. When pinning iTLBs it means an 8M alignment for RW data start. That may be acceptable on boards with a lot of memory but one of my supported boards only has 32 Mbytes and this forced alignment leads to a waste of almost 4 Mbytes with is more than 10% of the total memory. So revert commit bccc58986a2f ("powerpc/8xx: Always pin kernel text TLB") but don't restore previous behaviour in ITLB miss handler as now kernel PGD entries are copied into each process PGDIR. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/01b6780b860c8043b51a1ba9d83acfc6f2dde910.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 8 ++++++++ arch/powerpc/mm/nohash/8xx.c | 3 ++- arch/powerpc/platforms/8xx/Kconfig | 7 +++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c955a8196d55e..66ee0a31d99d3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -587,6 +587,10 @@ start_here: lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 #endif +#ifndef CONFIG_PIN_TLB_TEXT + li r0, 0 + mtspr SPRN_MI_CTR, r0 +#endif #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) lis r0, MD_TWAM@h mtspr SPRN_MD_CTR, r0 @@ -683,6 +687,7 @@ SYM_FUNC_START_LOCAL(initial_mmu) blr SYM_FUNC_END(initial_mmu) +#ifdef CONFIG_PIN_TLB _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h ori r9, r9, (1f - PAGE_OFFSET)@l @@ -704,6 +709,7 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_MD_CTR, r6 tlbia +#ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) @@ -724,6 +730,7 @@ _GLOBAL(mmu_pin_tlb) bdnzt lt, 2b lis r0, MI_RSV4I@h mtspr SPRN_MI_CTR, r0 +#endif LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA @@ -783,3 +790,4 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi +#endif diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 15d918dce27d0..4c2f9d7169936 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -177,7 +177,8 @@ int mmu_mark_initmem_nx(void) if (!debug_pagealloc_enabled_or_kfence()) err = mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); - mmu_pin_tlb(block_mapped_ram, false); + if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_pin_tlb(block_mapped_ram, false); return err; } diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index a14d9d8997a4f..8623aebfac482 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -195,6 +195,13 @@ config PIN_TLB_IMMR CONFIG_PIN_TLB_DATA is also selected, it will reduce CONFIG_PIN_TLB_DATA to 24 Mbytes. +config PIN_TLB_TEXT + bool "Pinned TLB for TEXT" + depends on PIN_TLB + default y + help + This pins kernel text with 8M pages. + endmenu endmenu From bcf77a70c4ffc9b01044229de87f5b6f9c1f7913 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:49 +0200 Subject: [PATCH 30/59] powerpc/8xx: Allow setting DATA alignment even with STRICT_KERNEL_RWX It is now possible to not pin kernel text with a 8Mbytes TLB, so the alignment for STRICT_KERNEL_RWX can be relaxed. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/d0d8b05012b392dd166cfd911f14ba2741ce7e1e.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d7b09b064a8ac..3c202785a1464 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -853,8 +853,8 @@ config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE - depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) || \ - PPC_85xx + depends on (PPC_8xx && !PIN_TLB_DATA && (!STRICT_KERNEL_RWX || !PIN_TLB_TEXT)) || \ + PPC_BOOK3S_32 || PPC_85xx help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and @@ -870,9 +870,9 @@ config DATA_SHIFT range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_85xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 - default 23 if STRICT_KERNEL_RWX && PPC_8xx - default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA - default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx + default 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && \ + (PIN_TLB_DATA || PIN_TLB_TEXT) + default 19 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx default 24 if STRICT_KERNEL_RWX && PPC_85xx default PAGE_SHIFT help From c5eec4df25c34f4bee8c757ed157f5d96eaba554 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:50 +0200 Subject: [PATCH 31/59] powerpc/8xx: Reduce default size of module/execmem area 8xx boards don't have much memory, the two I know have respectively 32Mbytes and 128Mbytes, so there is no point in having 256 Mbytes of memory for module text. Reduce it to 32Mbytes for 8xx, that's more than enough. Nevertheless, make it a configurable value so that it can be customised if needed. Also add a build verification for overlap of module execmem space with user PMD. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/8db23b61e33a0d1913d814f94bfe71ba7ac78b0f.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 18 ++++++++++++++++++ arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 3 ++- arch/powerpc/mm/nohash/8xx.c | 2 ++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3c202785a1464..f050a37aa857c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1271,6 +1271,24 @@ config TASK_SIZE default "0x80000000" if PPC_8xx default "0xb0000000" if PPC_BOOK3S_32 default "0xc0000000" + +config MODULES_SIZE_BOOL + bool "Set custom size for modules/execmem area" + depends on EXECMEM && ADVANCED_OPTIONS + depends on PPC_8xx + help + This option allows you to set the size of kernel virtual address + space dedicated for modules/execmem. + For the time being it is only for 8xx. + + Say N here unless you know what you are doing. + +config MODULES_SIZE + int "Size of modules/execmem area (In Mbytes)" if MODULES_SIZE_BOOL + range 1 256 if EXECMEM + default 32 if EXECMEM && PPC_8xx + default 0 + endmenu if PPC64 diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index a756a1e59c54d..2986f9ba40b88 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -170,8 +170,9 @@ #define mmu_linear_psize MMU_PAGE_8M -#define MODULES_VADDR (PAGE_OFFSET - SZ_256M) #define MODULES_END PAGE_OFFSET +#define MODULES_SIZE (CONFIG_MODULES_SIZE * SZ_1M) +#define MODULES_VADDR (MODULES_END - MODULES_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 4c2f9d7169936..8b54f12d1889b 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -207,6 +207,8 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, /* 8xx can only access 32MB at the moment */ memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); + + BUILD_BUG_ON(ALIGN_DOWN(MODULES_VADDR, PGDIR_SIZE) < TASK_SIZE); } int pud_clear_huge(pud_t *pud) From 16a71c045186a11c1c743934e330de78162b86dd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:51 +0200 Subject: [PATCH 32/59] powerpc/8xx: Preallocate execmem page tables Preallocate execmem page tables before creating new PGDs so that all PGD entries related to execmem can be copied in pgd_alloc(). On 8xx there are 32 Mbytes for execmem by default so this will use 32 kbytes. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/a7180cc1ba59dec4502af39b4e9f3ff91c57280d.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mem.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d325217ab2012..7a5af64f165d6 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -412,6 +412,18 @@ EXPORT_SYMBOL_GPL(walk_system_ram_range); #ifdef CONFIG_EXECMEM static struct execmem_info execmem_info __ro_after_init; +#ifdef CONFIG_PPC_8xx +static void prealloc_execmem_pgtable(void) +{ + unsigned long va; + + for (va = ALIGN_DOWN(MODULES_VADDR, PGDIR_SIZE); va < MODULES_END; va += PGDIR_SIZE) + pte_alloc_kernel(pmd_off_k(va), va); +} +#else +static void prealloc_execmem_pgtable(void) { } +#endif + struct execmem_info __init *execmem_arch_setup(void) { pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; @@ -443,6 +455,8 @@ struct execmem_info __init *execmem_arch_setup(void) end = VMALLOC_END; #endif + prealloc_execmem_pgtable(); + execmem_info = (struct execmem_info){ .ranges = { [EXECMEM_DEFAULT] = { From 33c527522f394f63cc589a6f7af990b2232444c8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:52 +0200 Subject: [PATCH 33/59] powerpc/8xx: Inconditionally use task PGDIR in ITLB misses Now that modules exec page tables are preallocated, the instruction TLBmiss handler can use task PGDIR inconditionally. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/774fd766a8b9bcb9173b5e677d5dad0df2d3970f.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 66ee0a31d99d3..f9a05648a5229 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -199,18 +199,7 @@ instruction_counter: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 -#ifdef CONFIG_EXECMEM - mfcr r11 - compare_to_kernel_boundary r10, r10 -#endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef CONFIG_EXECMEM - blt+ 3f - rlwinm r10, r10, 0, 20, 31 - oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha -3: - mtcr r11 -#endif lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 mfspr r10, SPRN_MD_TWC From ac9f97ff8b324905d457f2694490c63b9deccbc6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:53 +0200 Subject: [PATCH 34/59] powerpc/8xx: Inconditionally use task PGDIR in DTLB misses At the time being, DATA TLB miss handlers use task PGDIR for user addresses and swapper_pg_dir for kernel addresses. Now that kernel part of swapper_pg_dir is copied into task PGDIR at PGD allocation, it is possible to avoid the above logic and always use task PGDIR. But new kernel PGD entries can still be created after init, in which case those PGD entries may miss in task PGDIR. This can be handled in DATA TLB error handler. However, it needs to be done in real mode because the missing entry might be related to the stack. So implement copy of missing PGD entry in the prolog of DATA TLB ERROR handler just after the fixup of DAR. Note that this is feasible because 8xx doesn't implement vmap or ioremap with 8Mbytes pages but only 512kbytes pages which are at PTE level. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/7a76a923d2a111f1d843d8b20b4df0c65d2f4a7b.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 57 ++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index f9a05648a5229..811a7130505cd 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -40,16 +40,6 @@ #include "head_32.h" -.macro compare_to_kernel_boundary scratch, addr -#if CONFIG_TASK_SIZE <= 0x80000000 && MODULES_VADDR >= 0x80000000 -/* By simply checking Address >= 0x80000000, we know if its a kernel address */ - not. \scratch, \addr -#else - rlwinm \scratch, \addr, 16, 0xfff8 - cmpli cr0, \scratch, TASK_SIZE@h -#endif -.endm - #define PAGE_SHIFT_512K 19 #define PAGE_SHIFT_8M 23 @@ -237,19 +227,12 @@ instruction_counter: START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 - mfcr r11 /* If we are faulting a kernel address, we have to use the * kernel page tables. */ mfspr r10, SPRN_MD_EPN - compare_to_kernel_boundary r10, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table */ - blt+ 3f - rlwinm r10, r10, 0, 20, 31 - oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha -3: - mtcr r11 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 @@ -321,15 +304,19 @@ instruction_counter: cmpwi cr1, r11, RPN_PATTERN beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ + mfspr r11, SPRN_DSISR + rlwinm r11, r11, 0, DSISR_NOHPTE + cmpwi cr1, r11, 0 + beq+ cr1, .Ldtlbie + mfspr r11, SPRN_DAR + tlbie r11 + rlwinm r11, r11, 16, 0xffff + cmplwi cr1, r11, TASK_SIZE@h + bge- cr1, FixupPGD +.Ldtlbie: EXCEPTION_PROLOG_1 /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1 - lwz r4, _DAR(r11) - lwz r5, _DSISR(r11) - andis. r10,r5,DSISR_NOHPTE@h - beq+ .Ldtlbie - tlbie r4 -.Ldtlbie: prepare_transfer_to_handler bl do_page_fault b interrupt_return @@ -383,6 +370,30 @@ DARFixed:/* Return from dcbx instruction bug workaround */ __HEAD . = 0x2000 +FixupPGD: + mtspr SPRN_M_TW, r10 + mfspr r10, SPRN_DAR + mtspr SPRN_MD_EPN, r10 + mfspr r11, SPRN_M_TWB /* Get level 1 table */ + lwz r10, (swapper_pg_dir - PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + cmpwi cr1, r10, 0 + bne cr1, 1f + + rlwinm r10, r11, 0, 20, 31 + oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha + lwz r10, (swapper_pg_dir - PAGE_OFFSET)@l(r10) /* Get the level 1 entry */ + cmpwi cr1, r10, 0 + beq cr1, 1f + stw r10, (swapper_pg_dir - PAGE_OFFSET)@l(r11) /* Set the level 1 entry */ + mfspr r10, SPRN_M_TW + mtcr r10 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + rfi +1: + mfspr r10, SPRN_M_TW + b .Ldtlbie + /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. * DAR is set to the calculated address. From 2f2b9a3adc66e978a1248ffb38df8477e8e97c57 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:54 +0200 Subject: [PATCH 35/59] powerpc/32s: Reduce default size of module/execmem area book3s/32 platforms have usually more memory than 8xx, but it is still not worth reserving a full segment (256 Mbytes) for module text. 64Mbytes should be far enough. Also fix TASK_SIZE when EXECMEM is not selected, and add a build verification for overlap of module execmem space with user segments. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/c1f6a4e47f177d919561c6e97d31af5564923cf6.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 7 ++++--- arch/powerpc/include/asm/book3s/32/pgtable.h | 3 ++- arch/powerpc/mm/book3s32/mmu.c | 2 ++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f050a37aa857c..b9f11c2625825 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1269,23 +1269,24 @@ config TASK_SIZE_BOOL config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx - default "0xb0000000" if PPC_BOOK3S_32 + default "0xb0000000" if PPC_BOOK3S_32 && EXECMEM default "0xc0000000" config MODULES_SIZE_BOOL bool "Set custom size for modules/execmem area" depends on EXECMEM && ADVANCED_OPTIONS - depends on PPC_8xx help This option allows you to set the size of kernel virtual address space dedicated for modules/execmem. - For the time being it is only for 8xx. + For the time being it is only for 8xx and book3s/32. Other + platform share it with vmalloc space. Say N here unless you know what you are doing. config MODULES_SIZE int "Size of modules/execmem area (In Mbytes)" if MODULES_SIZE_BOOL range 1 256 if EXECMEM + default 64 if EXECMEM && PPC_BOOK3S_32 default 32 if EXECMEM && PPC_8xx default 0 diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 52971ee30717f..42c3af90d1f0f 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -196,7 +196,8 @@ void unmap_kernel_page(unsigned long va); #endif #define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M) -#define MODULES_VADDR (MODULES_END - SZ_256M) +#define MODULES_SIZE (CONFIG_MODULES_SIZE * SZ_1M) +#define MODULES_VADDR (MODULES_END - MODULES_SIZE) #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 625fe7d08e067..2db167f4233f7 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -223,6 +223,8 @@ int mmu_mark_initmem_nx(void) update_bats(); + BUILD_BUG_ON(ALIGN_DOWN(MODULES_VADDR, SZ_256M) < TASK_SIZE); + for (i = TASK_SIZE >> 28; i < 16; i++) { /* Do not set NX on VM space for modules */ if (is_module_segment(i << 28)) From 82ef440f9a38a1fd7f4854397633a35af33840a5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:55 +0200 Subject: [PATCH 36/59] powerpc/603: Copy kernel PGD entries into all PGDIRs and preallocate execmem page tables For the same reason as 8xx, copy kernel PGD entries into all PGDIRs in pgd_alloc() and preallocate execmem page tables before creating new PGDs so that all PGD entries related to execmem are copied by pgd_alloc(). This will help reduce the fast-path in TLBmiss handlers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/1a0d1feee07c4cf955f6a43a704c203e5c90fa53.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/pgalloc.h | 2 +- arch/powerpc/mm/mem.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 4ef780b291bc3..bb5f3e8ea912d 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -22,7 +22,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) pgd_t *pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), pgtable_gfp_flags(mm, GFP_KERNEL)); -#ifdef CONFIG_PPC_8xx +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_BOOK3S_603) memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (MAX_PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); #endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 7a5af64f165d6..a0c5a0d7b249b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -412,7 +412,7 @@ EXPORT_SYMBOL_GPL(walk_system_ram_range); #ifdef CONFIG_EXECMEM static struct execmem_info execmem_info __ro_after_init; -#ifdef CONFIG_PPC_8xx +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_BOOK3S_603) static void prealloc_execmem_pgtable(void) { unsigned long va; From 31c0e137ec609f36877ea39cd343ef2476d080aa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:56 +0200 Subject: [PATCH 37/59] powerpc/603: Switch r0 and r3 in TLB miss handlers In preparation of next patch that will perform some additional calculations to replace comparison, switch the use of r0 and r3 as r0 has some limitations in some instructions like 'addi/subi'. Also remove outdated comments about the meaning of each register. The registers are used for many things and it would be difficult to accurately describe all things done with a given register. The function is now small enough to get a global view without much description. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/566af5e87685b1a85d3182549c0d520ce2d8877a.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 94 +++++++++++----------------- 1 file changed, 38 insertions(+), 56 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 57196883a00e3..7995506e7fbd7 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -411,39 +411,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) */ . = INTERRUPT_INST_TLB_MISS_603 InstructionTLBMiss: -/* - * r0: userspace flag (later scratch) - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: fault address - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_IMISS + mfspr r0,SPRN_IMISS #ifdef CONFIG_EXECMEM lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + cmplw 0,r1,r0 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC rlwinm r2, r2, 28, 0xfffff000 #ifdef CONFIG_EXECMEM - li r0, 3 + li r3, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r0, 0 + li r3, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ +112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ andc. r1,r1,r2 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ #ifdef CONFIG_EXECMEM - rlwimi r2, r0, 0, 31, 31 /* userspace ? -> PP lsb */ + rlwimi r2, r3, 0, 31, 31 /* userspace ? -> PP lsb */ #endif ori r1, r1, 0xe06 /* clear out reserved bits */ andc r1, r2, r1 /* PP = user? 1 : 0 */ @@ -451,7 +445,7 @@ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - tlbli r3 + tlbli r0 mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r3 rfi @@ -480,35 +474,29 @@ InstructionAddressInvalid: */ . = INTERRUPT_DATA_LOAD_TLB_MISS_603 DataLoadTLBMiss: -/* - * r0: userspace flag (later scratch) - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: fault address - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS + mfspr r0,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + cmplw 0,r1,r0 mfspr r2, SPRN_SDR1 li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ rlwinm r2, r2, 28, 0xfffff000 - li r0, 3 + li r3, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r0, 0 + li r3, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ +112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */ - rlwimi r2,r0,0,30,31 /* userspace ? -> PP */ + rlwimi r2,r3,0,30,31 /* userspace ? -> PP */ rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ @@ -518,23 +506,23 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 BEGIN_MMU_FTR_SECTION - li r0,1 + li r3,1 mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 + rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */ + slw r3,r3,r2 + xor r1,r3,r1 + srw r3,r1,r2 mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 + rlwimi r2,r3,31-14,14,14 mtspr SPRN_SRR1,r2 mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi MMU_FTR_SECTION_ELSE mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) DataAddressInvalid: @@ -560,34 +548,28 @@ DataAddressInvalid: */ . = INTERRUPT_DATA_STORE_TLB_MISS_603 DataStoreTLBMiss: -/* - * r0: userspace flag (later scratch) - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: fault address - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS + mfspr r0,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + cmplw 0,r1,r0 mfspr r2, SPRN_SDR1 li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED rlwinm r2, r2, 28, 0xfffff000 - li r0, 3 + li r3, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r0, 0 + li r3, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ +112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r2,r0,0,31,31 /* userspace ? -> PP lsb */ + rlwimi r2,r3,0,31,31 /* userspace ? -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ andc r1,r2,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION @@ -597,23 +579,23 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION - li r0,1 + li r3,1 mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 + rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */ + slw r3,r3,r2 + xor r1,r3,r1 + srw r3,r1,r2 mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 + rlwimi r2,r3,31-14,14,14 mtspr SPRN_SRR1,r2 mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi MMU_FTR_SECTION_ELSE mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) From 3f57d90c231d3329aaed7079dd05b5a2f7692a58 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:57 +0200 Subject: [PATCH 38/59] powerpc/603: Inconditionally use task PGDIR in ITLB misses Now that modules exec page tables are preallocated, the instruction TLBmiss handler can use task PGDIR inconditionally. Also revise the identification of user vs kernel user space by doing a calculation instead of a comparison: Get the segment number and subtract the number of the first kernel segment. The result is positive for kernel addresses and negative for user addresses, which means that upper 2 bits are 0 for kernel and 3 for user. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/9a3242162ad2faab8019c698e501b326a126ee9e.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 7995506e7fbd7..156304c00ecea 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -413,22 +413,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) InstructionTLBMiss: /* Get PTE (linux-style) and check access */ mfspr r0,SPRN_IMISS -#ifdef CONFIG_EXECMEM - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r0 -#endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC rlwinm r2, r2, 28, 0xfffff000 + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ #ifdef CONFIG_EXECMEM - li r3, 3 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r3, 0 - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwinm r3, r0, 4, 0xf + subi r3, r3, (TASK_SIZE >> 28) & 0xf #endif -112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ @@ -437,7 +430,7 @@ InstructionTLBMiss: bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ #ifdef CONFIG_EXECMEM - rlwimi r2, r3, 0, 31, 31 /* userspace ? -> PP lsb */ + rlwimi r2, r3, 1, 31, 31 /* userspace ? -> PP lsb */ #endif ori r1, r1, 0xe06 /* clear out reserved bits */ andc r1, r2, r1 /* PP = user? 1 : 0 */ From 062e825a336017c0334c7497690826c95aa1a84f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:58 +0200 Subject: [PATCH 39/59] powerpc/603: Inconditionally use task PGDIR in DTLB misses At the time being, DATA TLB miss handlers use task PGDIR for user addresses and swapper_pg_dir for kernel addresses. Now that kernel part of swapper_pg_dir is copied into task PGDIR at PGD allocation, it is possible to avoid the above logic and always use task PGDIR. But new kernel PGD entries can still be created after init, in which case those PGD entries may miss in task PGDIR. This can be handled in DATA TLB error handler. However, it needs to be done in real mode because the missing entry might be related to the stack. So implement copy of missing PGD entry in DATA TLB miss handler just after detection of invalid PGD entry. Also replace comparison by same calculation as in previous patch to know if an address belongs to a kernel or user segment. Note that as mentioned in platforms/Kconfig.cputype, SMP is not supported on 603 processors so there is no risk of the PGD entry be populated during the fault. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/a2ba8eeb1c845eeb9e46b6fe3a5e9f841df9a033.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 65 ++++++++++++++++------------ 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 156304c00ecea..cb2bca76be535 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -469,27 +469,22 @@ InstructionAddressInvalid: DataLoadTLBMiss: /* Get PTE (linux-style) and check access */ mfspr r0,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r0 mfspr r2, SPRN_SDR1 - li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ - rlwinm r2, r2, 28, 0xfffff000 - li r3, 3 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r3, 0 - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ + rlwinm r1, r2, 28, 0xfffff000 + rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r1) /* get pmd entry */ + rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ + subi r3, r3, (TASK_SIZE >> 28) & 0xf + beq- 2f /* bail if no mapping */ +1: rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */ - rlwimi r2,r3,0,30,31 /* userspace ? -> PP */ + rlwimi r2,r3,2,30,31 /* userspace ? -> PP */ rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ @@ -518,6 +513,16 @@ MMU_FTR_SECTION_ELSE tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + +2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + cmpwi cr0,r2,0 + beq- DataAddressInvalid /* return if no mapping */ + stw r2,0(r1) + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + b 1b DataAddressInvalid: mfspr r3,SPRN_SRR1 rlwinm r1,r3,9,6,6 /* Get load/store bit */ @@ -543,26 +548,22 @@ DataAddressInvalid: DataStoreTLBMiss: /* Get PTE (linux-style) and check access */ mfspr r0,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r0 mfspr r2, SPRN_SDR1 - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED - rlwinm r2, r2, 28, 0xfffff000 - li r3, 3 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r3, 0 - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ + rlwinm r1, r2, 28, 0xfffff000 + rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r1) /* get pmd entry */ + rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ + subi r3, r3, (TASK_SIZE >> 28) & 0xf + beq- 2f /* bail if no mapping */ +1: rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r2,r3,0,31,31 /* userspace ? -> PP lsb */ + rlwimi r2,r3,1,31,31 /* userspace ? -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ andc r1,r2,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION @@ -592,6 +593,16 @@ MMU_FTR_SECTION_ELSE rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) +2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + cmpwi cr0,r2,0 + beq- DataAddressInvalid /* return if no mapping */ + stw r2,0(r1) + rlwinm r2,r2,0,0,19 /* extract address of pte page */ + b 1b + #ifndef CONFIG_ALTIVEC #define altivec_assist_exception unknown_exception #endif From dca5b1d69aea36ab559d9ca13729370007c60df1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 14:26:54 +0200 Subject: [PATCH 40/59] powerpc/32: Implement validation of emergency stack VMAP stack added an emergency stack on powerpc/32 for when there is a stack overflow, but failed to add stack validation for that emergency stack. That validation is required for show stack. Implement it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/2439d50b019f758db4a6d7b238b06441ab109799.1724156805.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/thread_info.h | 4 ++++ arch/powerpc/kernel/process.c | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 15c5691dd2184..6ebca2996f18f 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -226,6 +226,10 @@ static inline int arch_within_stack_frames(const void * const stack, return BAD_STACK; } +#ifdef CONFIG_PPC32 +extern void *emergency_ctx[]; +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e7b70c2cc0013..ff61a3e7984ce 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2175,10 +2175,10 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, return 0; } +#ifdef CONFIG_PPC64 static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, unsigned long nbytes) { -#ifdef CONFIG_PPC64 unsigned long stack_page; unsigned long cpu = task_cpu(p); @@ -2206,10 +2206,26 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; # endif -#endif return 0; } +#else +static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ + unsigned long stack_page; + unsigned long cpu = task_cpu(p); + + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + return 0; + + stack_page = (unsigned long)emergency_ctx[cpu] - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + return 0; +} +#endif /* * validate the stack frame of a particular minimum size, used for when we are From 65948b0e716a47382731889ee6bbb18642b8b003 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Aug 2024 10:00:29 +0200 Subject: [PATCH 41/59] powerpc/vdso: Inconditionally use CFUNC macro During merge of commit 4e991e3c16a3 ("powerpc: add CFUNC assembly label annotation") a fallback version of CFUNC macro was added at the last minute, so it can be used inconditionally. Fixes: 4e991e3c16a3 ("powerpc: add CFUNC assembly label annotation") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/0fa863f2f69b2ca4094ae066fcf1430fb31110c9.1724313540.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso/gettimeofday.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 48fc6658053aa..894cb939cd2b3 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -38,11 +38,7 @@ .else addi r4, r5, VDSO_DATA_OFFSET .endif -#ifdef __powerpc64__ bl CFUNC(DOTSYM(\funct)) -#else - bl \funct -#endif PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) From b76e0d4215b6b622127ebcceaa7f603313ceaec4 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Wed, 21 Aug 2024 19:50:26 -0700 Subject: [PATCH 42/59] powerpc/pseries: Use correct data types from pseries_hp_errorlog struct _be32 type is defined for some elements in pseries_hp_errorlog struct but also used them u32 after be32_to_cpu() conversion. Example: In handle_dlpar_errorlog() hp_elog->_drc_u.drc_index = be32_to_cpu(hp_elog->_drc_u.drc_index); And later assigned to u32 type dlpar_cpu() - u32 drc_index = hp_elog->_drc_u.drc_index; This incorrect usage is giving the following warnings and the patch resolve these warnings with the correct assignment. arch/powerpc/platforms/pseries/dlpar.c:398:53: sparse: sparse: incorrect type in argument 1 (different base types) @@ expected unsigned int [usertype] drc_index @@ got restricted __be32 [usertype] drc_index @@ ... arch/powerpc/platforms/pseries/dlpar.c:418:43: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __be32 [usertype] drc_count @@ got unsigned int [usertype] @@ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408182142.wuIKqYae-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202408182302.o7QRO45S-lkp@intel.com/ Signed-off-by: Haren Myneni v3: - Fix warnings from using incorrect data types in pseries_hp_errorlog struct v2: - Remove pr_info() and TODO comments - Update more information in the commit logs Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822025028.938332-1-haren@linux.ibm.com --- arch/powerpc/platforms/pseries/dlpar.c | 17 ----------------- arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 +- arch/powerpc/platforms/pseries/hotplug-memory.c | 16 ++++++++-------- arch/powerpc/platforms/pseries/pmem.c | 2 +- 4 files changed, 10 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index ee47ed21b99d0..0777cccd6ac87 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -331,23 +331,6 @@ int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) { int rc; - /* pseries error logs are in BE format, convert to cpu type */ - switch (hp_elog->id_type) { - case PSERIES_HP_ELOG_ID_DRC_COUNT: - hp_elog->_drc_u.drc_count = - be32_to_cpu(hp_elog->_drc_u.drc_count); - break; - case PSERIES_HP_ELOG_ID_DRC_INDEX: - hp_elog->_drc_u.drc_index = - be32_to_cpu(hp_elog->_drc_u.drc_index); - break; - case PSERIES_HP_ELOG_ID_DRC_IC: - hp_elog->_drc_u.ic.count = - be32_to_cpu(hp_elog->_drc_u.ic.count); - hp_elog->_drc_u.ic.index = - be32_to_cpu(hp_elog->_drc_u.ic.index); - } - switch (hp_elog->resource) { case PSERIES_HP_ELOG_RESOURCE_MEM: rc = dlpar_memory(hp_elog); diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index e62835a12d73f..6838a0fcda296 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -757,7 +757,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) u32 drc_index; int rc; - drc_index = hp_elog->_drc_u.drc_index; + drc_index = be32_to_cpu(hp_elog->_drc_u.drc_index); lock_device_hotplug(); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 3fe3ddb30c04b..38dc4f7c9296b 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -817,16 +817,16 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_ACTION_ADD: switch (hp_elog->id_type) { case PSERIES_HP_ELOG_ID_DRC_COUNT: - count = hp_elog->_drc_u.drc_count; + count = be32_to_cpu(hp_elog->_drc_u.drc_count); rc = dlpar_memory_add_by_count(count); break; case PSERIES_HP_ELOG_ID_DRC_INDEX: - drc_index = hp_elog->_drc_u.drc_index; + drc_index = be32_to_cpu(hp_elog->_drc_u.drc_index); rc = dlpar_memory_add_by_index(drc_index); break; case PSERIES_HP_ELOG_ID_DRC_IC: - count = hp_elog->_drc_u.ic.count; - drc_index = hp_elog->_drc_u.ic.index; + count = be32_to_cpu(hp_elog->_drc_u.ic.count); + drc_index = be32_to_cpu(hp_elog->_drc_u.ic.index); rc = dlpar_memory_add_by_ic(count, drc_index); break; default: @@ -838,16 +838,16 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_ACTION_REMOVE: switch (hp_elog->id_type) { case PSERIES_HP_ELOG_ID_DRC_COUNT: - count = hp_elog->_drc_u.drc_count; + count = be32_to_cpu(hp_elog->_drc_u.drc_count); rc = dlpar_memory_remove_by_count(count); break; case PSERIES_HP_ELOG_ID_DRC_INDEX: - drc_index = hp_elog->_drc_u.drc_index; + drc_index = be32_to_cpu(hp_elog->_drc_u.drc_index); rc = dlpar_memory_remove_by_index(drc_index); break; case PSERIES_HP_ELOG_ID_DRC_IC: - count = hp_elog->_drc_u.ic.count; - drc_index = hp_elog->_drc_u.ic.index; + count = be32_to_cpu(hp_elog->_drc_u.ic.count); + drc_index = be32_to_cpu(hp_elog->_drc_u.ic.index); rc = dlpar_memory_remove_by_ic(count, drc_index); break; default: diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c index 3c290b9ed01b3..0f1d45f32e4a4 100644 --- a/arch/powerpc/platforms/pseries/pmem.c +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -121,7 +121,7 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) return -EINVAL; } - drc_index = hp_elog->_drc_u.drc_index; + drc_index = be32_to_cpu(hp_elog->_drc_u.drc_index); lock_device_hotplug(); From 17a51171c20d590d3d3c632bcdd946f5fc3c0061 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Wed, 21 Aug 2024 19:50:27 -0700 Subject: [PATCH 43/59] powerpc/pseries/dlpar: Remove device tree node for DLPAR IO remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the powerpc-pseries specific implementation, the IO hotplug event is handled in the user space (drmgr tool). But update the device tree and /dev/mem access to allocate buffers for some RTAS calls are restricted when the kernel lockdown feature is enabled. For the DLPAR IO REMOVE, the corresponding device tree nodes and properties have to be removed from the device tree after the device disable. The user space removes the device tree nodes by updating /proc/ppc64/ofdt which is not allowed under system lockdown is enabled. This restriction can be resolved by moving the complete IO hotplug handling in the kernel. But the pseries implementation need user interaction to power off and to remove device from the slot during hotplug event handling. To overcome the /proc/ppc64/ofdt restriction, this patch extends the /sys/kernel/dlpar interface and provides ‘dt remove index ’ to the user space so that drmgr tool can remove the corresponding device tree nodes based on DRC index from the device tree. Signed-off-by: Scott Cheloha Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822025028.938332-2-haren@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 1 + arch/powerpc/platforms/pseries/dlpar.c | 88 +++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 065ffd1b2f8ad..04406162fc5ac 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -397,6 +397,7 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect) #define PSERIES_HP_ELOG_RESOURCE_SLOT 3 #define PSERIES_HP_ELOG_RESOURCE_PHB 4 #define PSERIES_HP_ELOG_RESOURCE_PMEM 6 +#define PSERIES_HP_ELOG_RESOURCE_DT 7 #define PSERIES_HP_ELOG_ACTION_ADD 1 #define PSERIES_HP_ELOG_ACTION_REMOVE 2 diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 0777cccd6ac87..6573eb0e1c91e 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -327,6 +327,87 @@ int dlpar_unisolate_drc(u32 drc_index) return 0; } +static int changeset_detach_node_recursive(struct of_changeset *ocs, + struct device_node *node) +{ + struct device_node *child; + int rc; + + for_each_child_of_node(node, child) { + rc = changeset_detach_node_recursive(ocs, child); + if (rc) { + of_node_put(child); + return rc; + } + } + + return of_changeset_detach_node(ocs, node); +} + +static int dlpar_hp_dt_remove(u32 drc_index) +{ + struct device_node *np; + struct of_changeset ocs; + u32 index; + int rc = 0; + + /* + * Prune all nodes with a matching index. + */ + of_changeset_init(&ocs); + + for_each_node_with_property(np, "ibm,my-drc-index") { + rc = of_property_read_u32(np, "ibm,my-drc-index", &index); + if (rc) { + pr_err("%s: %pOF: of_property_read_u32 %s: %d\n", + __func__, np, "ibm,my-drc-index", rc); + of_node_put(np); + goto out; + } + + if (index == drc_index) { + rc = changeset_detach_node_recursive(&ocs, np); + if (rc) { + of_node_put(np); + goto out; + } + } + } + + rc = of_changeset_apply(&ocs); + +out: + of_changeset_destroy(&ocs); + return rc; +} + +static int dlpar_hp_dt(struct pseries_hp_errorlog *phpe) +{ + u32 drc_index; + int rc; + + if (phpe->id_type != PSERIES_HP_ELOG_ID_DRC_INDEX) + return -EINVAL; + + drc_index = be32_to_cpu(phpe->_drc_u.drc_index); + + lock_device_hotplug(); + + switch (phpe->action) { + case PSERIES_HP_ELOG_ACTION_REMOVE: + rc = dlpar_hp_dt_remove(drc_index); + break; + default: + pr_err("Invalid action (%d) specified\n", phpe->action); + rc = -EINVAL; + break; + } + + unlock_device_hotplug(); + + return rc; +} + int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) { int rc; @@ -341,6 +422,9 @@ int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_RESOURCE_PMEM: rc = dlpar_hp_pmem(hp_elog); break; + case PSERIES_HP_ELOG_RESOURCE_DT: + rc = dlpar_hp_dt(hp_elog); + break; default: pr_warn_ratelimited("Invalid resource (%d) specified\n", @@ -393,6 +477,8 @@ static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog) hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM; } else if (sysfs_streq(arg, "cpu")) { hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU; + } else if (sysfs_streq(arg, "dt")) { + hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_DT; } else { pr_err("Invalid resource specified.\n"); return -EINVAL; @@ -534,7 +620,7 @@ static ssize_t dlpar_store(const struct class *class, const struct class_attribu static ssize_t dlpar_show(const struct class *class, const struct class_attribute *attr, char *buf) { - return sprintf(buf, "%s\n", "memory,cpu"); + return sprintf(buf, "%s\n", "memory,cpu,dt"); } static CLASS_ATTR_RW(dlpar); From 02b98ff44a57c1376c5a92a8518fda5c82bb5a91 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Wed, 21 Aug 2024 19:50:28 -0700 Subject: [PATCH 44/59] powerpc/pseries/dlpar: Add device tree nodes for DLPAR IO add MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the powerpc-pseries specific implementation, the IO hotplug event is handled in the user space (drmgr tool). For the DLPAR IO ADD, the corresponding device tree nodes and properties will be added to the device tree after the device enable. The user space (drmgr tool) uses configure_connector RTAS call with the DRC index to retrieve the device nodes and updates the device tree by writing to /proc/ppc64/ofdt. Under system lockdown, /dev/mem access to allocate buffers for configure_connector RTAS call is restricted which means the user space can not issue this RTAS call and also can not access to /proc/ppc64/ofdt. The pseries implementation need user interaction to power-on and add device to the slot during the ADD event handling. So adds complexity if the complete hotplug ADD event handling moved to the kernel. To overcome /dev/mem access restriction, this patch extends the /sys/kernel/dlpar interface and provides ‘dt add index ’ to the user space. The drmgr tool uses this interface to update the device tree whenever the device is added. This interface retrieves device tree nodes for the corresponding DRC index using the configure_connector RTAS call and adds new device nodes / properties to the device tree. Signed-off-by: Scott Cheloha Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822025028.938332-3-haren@linux.ibm.com --- arch/powerpc/platforms/pseries/dlpar.c | 130 +++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 6573eb0e1c91e..213aa26dc8b33 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -23,6 +23,7 @@ #include #include #include +#include static struct workqueue_struct *pseries_hp_wq; @@ -261,6 +262,20 @@ int dlpar_detach_node(struct device_node *dn) return 0; } +static int dlpar_changeset_attach_cc_nodes(struct of_changeset *ocs, + struct device_node *dn) +{ + int rc; + + rc = of_changeset_attach_node(ocs, dn); + + if (!rc && dn->child) + rc = dlpar_changeset_attach_cc_nodes(ocs, dn->child); + if (!rc && dn->sibling) + rc = dlpar_changeset_attach_cc_nodes(ocs, dn->sibling); + + return rc; +} #define DR_ENTITY_SENSE 9003 #define DR_ENTITY_PRESENT 1 @@ -327,6 +342,118 @@ int dlpar_unisolate_drc(u32 drc_index) return 0; } +static struct device_node * +get_device_node_with_drc_index(u32 index) +{ + struct device_node *np = NULL; + u32 node_index; + int rc; + + for_each_node_with_property(np, "ibm,my-drc-index") { + rc = of_property_read_u32(np, "ibm,my-drc-index", + &node_index); + if (rc) { + pr_err("%s: %pOF: of_property_read_u32 %s: %d\n", + __func__, np, "ibm,my-drc-index", rc); + of_node_put(np); + return NULL; + } + + if (index == node_index) + break; + } + + return np; +} + +static struct device_node * +get_device_node_with_drc_info(u32 index) +{ + struct device_node *np = NULL; + struct of_drc_info drc; + struct property *info; + const __be32 *value; + u32 node_index; + int i, j, count; + + for_each_node_with_property(np, "ibm,drc-info") { + info = of_find_property(np, "ibm,drc-info", NULL); + if (info == NULL) { + /* XXX can this happen? */ + of_node_put(np); + return NULL; + } + value = of_prop_next_u32(info, NULL, &count); + if (value == NULL) + continue; + value++; + for (i = 0; i < count; i++) { + if (of_read_drc_info_cell(&info, &value, &drc)) + break; + if (index > drc.last_drc_index) + continue; + node_index = drc.drc_index_start; + for (j = 0; j < drc.num_sequential_elems; j++) { + if (index == node_index) + return np; + node_index += drc.sequential_inc; + } + } + } + + return NULL; +} + +static int dlpar_hp_dt_add(u32 index) +{ + struct device_node *np, *nodes; + struct of_changeset ocs; + int rc; + + /* + * Do not add device node(s) if already exists in the + * device tree. + */ + np = get_device_node_with_drc_index(index); + if (np) { + pr_err("%s: Adding device node for index (%d), but " + "already exists in the device tree\n", + __func__, index); + rc = -EINVAL; + goto out; + } + + np = get_device_node_with_drc_info(index); + + if (!np) + return -EIO; + + /* Next, configure the connector. */ + nodes = dlpar_configure_connector(cpu_to_be32(index), np); + if (!nodes) { + rc = -EIO; + goto out; + } + + /* + * Add the new nodes from dlpar_configure_connector() onto + * the device-tree. + */ + of_changeset_init(&ocs); + rc = dlpar_changeset_attach_cc_nodes(&ocs, nodes); + + if (!rc) + rc = of_changeset_apply(&ocs); + else + dlpar_free_cc_nodes(nodes); + + of_changeset_destroy(&ocs); + +out: + of_node_put(np); + return rc; +} + static int changeset_detach_node_recursive(struct of_changeset *ocs, struct device_node *node) { @@ -394,6 +521,9 @@ static int dlpar_hp_dt(struct pseries_hp_errorlog *phpe) lock_device_hotplug(); switch (phpe->action) { + case PSERIES_HP_ELOG_ACTION_ADD: + rc = dlpar_hp_dt_add(drc_index); + break; case PSERIES_HP_ELOG_ACTION_REMOVE: rc = dlpar_hp_dt_remove(drc_index); break; From 7509c23770054fdaffd966926462248d44a323c1 Mon Sep 17 00:00:00 2001 From: Huang Xiaojia Date: Mon, 26 Aug 2024 23:09:55 +0800 Subject: [PATCH 45/59] powerpc: Constify struct kobj_type 'struct kobj_type' is not modified. It is only used in kobject_init_and_add()/kobject_init() which takes a 'const struct kobj_type *ktype' parameter. Constifying this structure moves some data to a read-only section, so increase over all security. On a x86_64, compiled with ppc64 defconfig: Before: ====== text data bss dec hex filename 7145 606 0 7751 1e47 arch/powerpc/kernel/cacheinfo.o 3663 384 16 4063 fdf arch/powerpc/kernel/secvar-sysfs.o After: ====== text data bss dec hex filename 7193 558 0 7751 1e47 arch/powerpc/kernel/cacheinfo.o 3663 384 16 4063 fdf arch/powerpc/kernel/secvar-sysfs.o Signed-off-by: Huang Xiaojia Signed-off-by: Michael Ellerman Link: https://msgid.link/20240826150957.3500237-1-huangxiaojia2@huawei.com --- arch/powerpc/kernel/cacheinfo.c | 2 +- arch/powerpc/kernel/secvar-sysfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index f502337dd37d6..0fcc463b02e25 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -735,7 +735,7 @@ static const struct sysfs_ops cache_index_ops = { .show = cache_index_show, }; -static struct kobj_type cache_index_type = { +static const struct kobj_type cache_index_type = { .release = cache_index_release, .sysfs_ops = &cache_index_ops, .default_groups = cache_index_default_groups, diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index eb3c053f323f7..fbeb1cbac01b2 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -125,7 +125,7 @@ static const struct attribute_group secvar_attr_group = { }; __ATTRIBUTE_GROUPS(secvar_attr); -static struct kobj_type secvar_ktype = { +static const struct kobj_type secvar_ktype = { .sysfs_ops = &kobj_sysfs_ops, .default_groups = secvar_attr_groups, }; From 7492ca369e98a45b55592456dc1b24d58b3392f5 Mon Sep 17 00:00:00 2001 From: Huang Xiaojia Date: Mon, 26 Aug 2024 23:09:56 +0800 Subject: [PATCH 46/59] powerpc: powernv: Constify struct kobj_type 'struct kobj_type' is not modified. It is only used in kobject_init() which takes a 'const struct kobj_type *ktype' parameter. Constifying this structure moves some data to a read-only section, so increase over all security. On a x86_64, compiled with ppc64 defconfig: Before: ====== text data bss dec hex filename 3775 256 8 4039 fc7 arch/powerpc/platforms/powernv/opal-dump.o 2679 260 8 2947 b83 arch/powerpc/platforms/powernv/opal-elog.o After: ====== text data bss dec hex filename 3823 208 8 4039 fc7 arch/powerpc/platforms/powernv/opal-dump.o 2727 212 8 2947 b83 arch/powerpc/platforms/powernv/opal-elog.o Signed-off-by: Huang Xiaojia Signed-off-by: Michael Ellerman Link: https://msgid.link/20240826150957.3500237-2-huangxiaojia2@huawei.com --- arch/powerpc/platforms/powernv/opal-dump.c | 2 +- arch/powerpc/platforms/powernv/opal-elog.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 16c5860f13720..608e4b68c5ea9 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -210,7 +210,7 @@ static struct attribute *dump_default_attrs[] = { }; ATTRIBUTE_GROUPS(dump_default); -static struct kobj_type dump_ktype = { +static const struct kobj_type dump_ktype = { .sysfs_ops = &dump_sysfs_ops, .release = &dump_release, .default_groups = dump_default_groups, diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 554fdd7f88b8a..5db1e733143bf 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -146,7 +146,7 @@ static struct attribute *elog_default_attrs[] = { }; ATTRIBUTE_GROUPS(elog_default); -static struct kobj_type elog_ktype = { +static const struct kobj_type elog_ktype = { .sysfs_ops = &elog_sysfs_ops, .release = &elog_release, .default_groups = elog_default_groups, From 6f2683274d0d0b51a986f73c2afb9058156f1641 Mon Sep 17 00:00:00 2001 From: Huang Xiaojia Date: Mon, 26 Aug 2024 23:09:57 +0800 Subject: [PATCH 47/59] powerpc: pseries: Constify struct kobj_type 'struct kobj_type' is not modified. It is only used in kobject_init() which takes a 'const struct kobj_type *ktype' parameter. Constifying this structure moves some data to a read-only section, so increase over all security. On a x86_64, compiled with ppc64 defconfig: Before: ====== text data bss dec hex filename 1885 368 16 2269 8dd arch/powerpc/platforms/pseries/vas-sysfs.o After: ====== text data bss dec hex filename 1981 272 16 2269 8dd arch/powerpc/platforms/pseries/vas-sysfs.o Signed-off-by: Huang Xiaojia Signed-off-by: Michael Ellerman Link: https://msgid.link/20240826150957.3500237-3-huangxiaojia2@huawei.com --- arch/powerpc/platforms/pseries/vas-sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c index f9f682724e776..9e05a0e99cadf 100644 --- a/arch/powerpc/platforms/pseries/vas-sysfs.c +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -162,13 +162,13 @@ static const struct sysfs_ops vas_sysfs_ops = { .store = vas_type_store, }; -static struct kobj_type vas_def_attr_type = { +static const struct kobj_type vas_def_attr_type = { .release = vas_type_release, .sysfs_ops = &vas_sysfs_ops, .default_groups = vas_def_capab_groups, }; -static struct kobj_type vas_qos_attr_type = { +static const struct kobj_type vas_qos_attr_type = { .release = vas_type_release, .sysfs_ops = &vas_sysfs_ops, .default_groups = vas_qos_capab_groups, From 19f1bc3fb55452739dd3d56cfd06c29ecdbe3e9f Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Fri, 30 Aug 2024 07:31:31 -0400 Subject: [PATCH 48/59] powerpc: Replace kretprobe code with rethook on powerpc This is an adaptation of commit f3a112c0c40d ("x86,rethook,kprobes: Replace kretprobe with rethook on x86") to powerpc. Rethook follows the existing kretprobe implementation, but separates it from kprobes so that it can be used by fprobe (ftrace-based function entry/exit probes). As such, this patch also enables fprobe to work on powerpc. The only other change compared to the existing kretprobe implementation is doing the return address fixup in arch_rethook_fixup_return(). Reference to other archs: commit b57c2f124098 ("riscv: add riscv rethook implementation") commit 7b0a096436c2 ("LoongArch: Replace kretprobe with rethook") Note: ===== In future, rethook will be only for kretprobe, and kretprobe will be replaced by fprobe. https://lore.kernel.org/all/172000134410.63468.13742222887213469474.stgit@devnote2/ We will adapt the above implementation for powerpc once its upstream. Until then, we can have this implementation of rethook to serve current kretprobe usecases. Reviewed-by: Naveen Rao Signed-off-by: Abhishek Dubey Signed-off-by: Michael Ellerman Link: https://msgid.link/20240830113131.7597-1-adubey@linux.ibm.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/kprobes.c | 65 +--------------------------- arch/powerpc/kernel/optprobes.c | 2 +- arch/powerpc/kernel/rethook.c | 73 ++++++++++++++++++++++++++++++++ arch/powerpc/kernel/stacktrace.c | 6 ++- 6 files changed, 81 insertions(+), 67 deletions(-) create mode 100644 arch/powerpc/kernel/rethook.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b9f11c2625825..1f9d23b276b58 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -269,6 +269,7 @@ config PPC select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_RETHOOK if KPROBES select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1784b6a6ca1dd..f43c1198768c6 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -139,6 +139,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 14c5ddec30563..f8aa91bc3b175 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -228,16 +228,6 @@ static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs kcb->kprobe_saved_msr = regs->msr; } -void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *)regs->link; - ri->fp = NULL; - - /* Replace the return addr with trampoline addr */ - regs->link = (unsigned long)__kretprobe_trampoline; -} -NOKPROBE_SYMBOL(arch_prepare_kretprobe); - static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; @@ -394,49 +384,6 @@ int kprobe_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(kprobe_handler); -/* - * Function return probe trampoline: - * - init_kprobes() establishes a probepoint here - * - When the probed function returns, this probe - * causes the handlers to fire - */ -asm(".global __kretprobe_trampoline\n" - ".type __kretprobe_trampoline, @function\n" - "__kretprobe_trampoline:\n" - "nop\n" - "blr\n" - ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n"); - -/* - * Called when the probe at kretprobe trampoline is hit - */ -static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) -{ - unsigned long orig_ret_address; - - orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); - /* - * We get here through one of two paths: - * 1. by taking a trap -> kprobe_handler() -> here - * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here - * - * When going back through (1), we need regs->nip to be setup properly - * as it is used to determine the return address from the trap. - * For (2), since nip is not honoured with optprobes, we instead setup - * the link register properly so that the subsequent 'blr' in - * __kretprobe_trampoline jumps back to the right instruction. - * - * For nip, we should set the address to the previous instruction since - * we end up emulating it in kprobe_handler(), which increments the nip - * again. - */ - regs_set_return_ip(regs, orig_ret_address - 4); - regs->link = orig_ret_address; - - return 0; -} -NOKPROBE_SYMBOL(trampoline_probe_handler); - /* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" @@ -539,19 +486,9 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) } NOKPROBE_SYMBOL(kprobe_fault_handler); -static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - -int __init arch_init_kprobes(void) -{ - return register_kprobe(&trampoline_p); -} - int arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&arch_rethook_trampoline) return 1; return 0; diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 004fae2044a3e..c0b351d61058f 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -56,7 +56,7 @@ static unsigned long can_optimize(struct kprobe *p) * has a 'nop' instruction, which can be emulated. * So further checks can be skipped. */ - if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&arch_rethook_trampoline) return addr + sizeof(kprobe_opcode_t); /* diff --git a/arch/powerpc/kernel/rethook.c b/arch/powerpc/kernel/rethook.c new file mode 100644 index 0000000000000..5f5f47ae82cfa --- /dev/null +++ b/arch/powerpc/kernel/rethook.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PowerPC implementation of rethook. This depends on kprobes. + */ + +#include +#include + +/* + * Function return trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +asm(".global arch_rethook_trampoline\n" + ".type arch_rethook_trampoline, @function\n" + "arch_rethook_trampoline:\n" + "nop\n" + "blr\n" + ".size arch_rethook_trampoline, .-arch_rethook_trampoline\n"); + +/* + * Called when the probe at kretprobe trampoline is hit + */ +static int trampoline_rethook_handler(struct kprobe *p, struct pt_regs *regs) +{ + return !rethook_trampoline_handler(regs, regs->gpr[1]); +} +NOKPROBE_SYMBOL(trampoline_rethook_handler); + +void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount) +{ + rh->ret_addr = regs->link; + rh->frame = regs->gpr[1]; + + /* Replace the return addr with trampoline addr */ + regs->link = (unsigned long)arch_rethook_trampoline; +} +NOKPROBE_SYMBOL(arch_rethook_prepare); + +/* This is called from rethook_trampoline_handler(). */ +void arch_rethook_fixup_return(struct pt_regs *regs, unsigned long orig_ret_address) +{ + /* + * We get here through one of two paths: + * 1. by taking a trap -> kprobe_handler() -> here + * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here + * + * When going back through (1), we need regs->nip to be setup properly + * as it is used to determine the return address from the trap. + * For (2), since nip is not honoured with optprobes, we instead setup + * the link register properly so that the subsequent 'blr' in + * arch_rethook_trampoline jumps back to the right instruction. + * + * For nip, we should set the address to the previous instruction since + * we end up emulating it in kprobe_handler(), which increments the nip + * again. + */ + regs_set_return_ip(regs, orig_ret_address - 4); + regs->link = orig_ret_address; +} +NOKPROBE_SYMBOL(arch_rethook_fixup_return); + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &arch_rethook_trampoline, + .pre_handler = trampoline_rethook_handler +}; + +/* rethook initializer */ +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index e6a958a5da276..90882b5175cd4 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -133,12 +134,13 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum * arch-dependent code, they are generic. */ ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack); -#ifdef CONFIG_KPROBES + /* * Mark stacktraces with kretprobed functions on them * as unreliable. */ - if (ip == (unsigned long)__kretprobe_trampoline) +#ifdef CONFIG_RETHOOK + if (ip == (unsigned long)arch_rethook_trampoline) return -EINVAL; #endif From 8589cdf0cf0b0a03d2285fc46ada2f28113f8620 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 21 Aug 2024 18:07:45 +1000 Subject: [PATCH 49/59] powerpc/64s: Make mmu_hash_ops __ro_after_init The mmu_hash_ops are only assigned to during boot, so mark them __ro_after_init to prevent any further modification. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240821080745.872151-1-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/hash_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6727a15ab94f9..e1eadd03f1339 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -125,7 +125,7 @@ int mmu_ci_restrictions; #endif static u8 *linear_map_hash_slots; static unsigned long linear_map_hash_count; -struct mmu_hash_ops mmu_hash_ops; +struct mmu_hash_ops mmu_hash_ops __ro_after_init; EXPORT_SYMBOL(mmu_hash_ops); /* From f61d413a1c1feaa4cd04fca840564ab90124bec4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 Aug 2024 13:29:10 +1000 Subject: [PATCH 50/59] powerpc/mm/64s: Move THP reqs into a separate symbol Move the Kconfig symbols related to transparent hugepages (THP) under a separate config symbol, separate from CONFIG_PPC_BOOK3S_64. The new symbol is automatically enabled if CONFIG_PPC_BOOK3S_64 is enabled, so there is no behaviour change, except for the existence of the new PPC_THP symbol. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240823032911.1238471-1-mpe@ellerman.id.au --- arch/powerpc/platforms/Kconfig.cputype | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 4b0d7d4f88f66..9536e591c72a9 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -84,11 +84,8 @@ config PPC_BOOK3S_64 bool "Server processors" select PPC_FPU select PPC_HAVE_PMU_SUPPORT - select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_SPLIT_PMD_PTLOCK - select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_NUMA_BALANCING select HAVE_MOVE_PMD @@ -108,6 +105,13 @@ config PPC_BOOK3E_64 endchoice +config PPC_THP + def_bool y + depends on PPC_BOOK3S_64 + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE + choice prompt "CPU selection" help From 87def77bb5f4b61d47538a34bb2b2cb7db3c037b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 Aug 2024 13:29:11 +1000 Subject: [PATCH 51/59] powerpc/mm/64s: Restrict THP to Radix or HPT w/64K pages Transparent hugepages (THP) are not supported when using the Hash Page Table (HPT) MMU with 4K pages. Currently a HPT-only 4K kernel still allows THP to be enabled, which is misleading. Add restrictions to the PPC_THP symbol so that if the kernel is configured with 4K pages and only the HPT MMU (no Radix), then THP is disabled. Note that it's still possible to build a combined Radix/HPT kernel with 4K pages, which does allow THP to be enabled at build time. As such the HPT code still needs to provide some THP related symbols, to allow the build to succeed, but those code paths are never run. See the stubs in arch/powerpc/include/asm/book3s/64/hash-4k.h. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240823032911.1238471-2-mpe@ellerman.id.au --- arch/powerpc/platforms/Kconfig.cputype | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9536e591c72a9..1453ccc900c43 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -108,6 +108,7 @@ endchoice config PPC_THP def_bool y depends on PPC_BOOK3S_64 + depends on PPC_RADIX_MMU || (PPC_64S_HASH_MMU && PAGE_SIZE_64KB) select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE From 29dbb984496daa490f3c1181b734b538a6f7b534 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 Aug 2024 17:08:30 +1000 Subject: [PATCH 52/59] powerpc/64s: Remove the "fast endian switch" syscall The non-standard "fast endian switch" syscall was added in 2008[1], but was never widely used. It was disabled by default in 2017[2], and there's no evidence it's ever been used since. Remove it entirely. A normal endian switch syscall was added in 2015[3]. [1]: 745a14cc264b ("[POWERPC] Add fast little-endian switch system call") [2]: 529d235a0e19 ("powerpc: Add a proper syscall for switching endianness") [3]: 727f13616c45 ("powerpc: Disable the fast-endian switch syscall by default") Signed-off-by: Michael Ellerman Link: https://msgid.link/20240823070830.1269033-1-mpe@ellerman.id.au --- arch/powerpc/Kconfig.debug | 6 ------ arch/powerpc/kernel/exceptions-64s.S | 17 ----------------- 2 files changed, 23 deletions(-) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 3799ceceb04a3..0bbec4afc0d59 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -379,12 +379,6 @@ config FAIL_IOMMU If you are unsure, say N. -config PPC_FAST_ENDIAN_SWITCH - bool "Deprecated fast endian-switch syscall" - depends on DEBUG_KERNEL && PPC_BOOK3S_64 - help - If you're unsure what this is, say N. - config KASAN_SHADOW_OFFSET hex depends on KASAN diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eaf2f167c3428..195b075d116cf 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1989,13 +1989,6 @@ INT_DEFINE_END(system_call) INTERRUPT_TO_KERNEL #endif -#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH -BEGIN_FTR_SECTION - cmpdi r0,0x1ebe - beq- 1f -END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) -#endif - /* We reach here with PACA in r13, r13 in r9. */ mfspr r11,SPRN_SRR0 mfspr r12,SPRN_SRR1 @@ -2015,16 +2008,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) b system_call_common #endif .endif - -#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH - /* Fast LE/BE switch system call */ -1: mfspr r12,SPRN_SRR1 - xori r12,r12,MSR_LE - mtspr SPRN_SRR1,r12 - mr r13,r9 - RFI_TO_USER /* return to userspace */ - b . /* prevent speculative execution */ -#endif .endm EXC_REAL_BEGIN(system_call, 0xc00, 0x100) From 5b4bc44a4854ccd41eef9aa9f47677f7d136c294 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 3 Sep 2024 21:19:51 +1000 Subject: [PATCH 53/59] powerpc: Stop using no_llseek Since commit 868941b14441 ("fs: remove no_llseek"), no_llseek() is simply defined to be NULL, and a NULL llseek means seeking is unsupported. So for statically defined file_operations, such as all these, there's no need or benefit to set llseek = no_llseek. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240903111951.141376-1-mpe@ellerman.id.au --- arch/powerpc/kernel/eeh.c | 4 ---- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 1 - arch/powerpc/platforms/cell/spufs/file.c | 17 ----------------- arch/powerpc/platforms/powernv/eeh-powernv.c | 1 - arch/powerpc/platforms/pseries/dtl.c | 1 - 5 files changed, 24 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index d03f17987fca7..2f7f0efd564a9 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1682,7 +1682,6 @@ static ssize_t eeh_force_recover_write(struct file *filp, static const struct file_operations eeh_force_recover_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_force_recover_write, }; @@ -1726,7 +1725,6 @@ static ssize_t eeh_dev_check_write(struct file *filp, static const struct file_operations eeh_dev_check_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_check_write, .read = eeh_debugfs_dev_usage, }; @@ -1846,7 +1844,6 @@ static ssize_t eeh_dev_break_write(struct file *filp, static const struct file_operations eeh_dev_break_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_break_write, .read = eeh_debugfs_dev_usage, }; @@ -1893,7 +1890,6 @@ static ssize_t eeh_dev_can_recover(struct file *filp, static const struct file_operations eeh_dev_can_recover_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_can_recover, .read = eeh_debugfs_dev_usage, }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 2bd6abcdc113b..1ea591ec60833 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -644,7 +644,6 @@ static int mpc52xx_wdt_release(struct inode *inode, struct file *file) static const struct file_operations mpc52xx_wdt_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .write = mpc52xx_wdt_write, .unlocked_ioctl = mpc52xx_wdt_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 7f4e0db8eb086..d5a2c77bc9087 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -453,7 +453,6 @@ static const struct file_operations spufs_cntl_fops = { .release = spufs_cntl_release, .read = simple_attr_read, .write = simple_attr_write, - .llseek = no_llseek, .mmap = spufs_cntl_mmap, }; @@ -634,7 +633,6 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf, static const struct file_operations spufs_mbox_fops = { .open = spufs_pipe_open, .read = spufs_mbox_read, - .llseek = no_llseek, }; static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf, @@ -664,7 +662,6 @@ static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf, static const struct file_operations spufs_mbox_stat_fops = { .open = spufs_pipe_open, .read = spufs_mbox_stat_read, - .llseek = no_llseek, }; /* low-level ibox access function */ @@ -769,7 +766,6 @@ static const struct file_operations spufs_ibox_fops = { .open = spufs_pipe_open, .read = spufs_ibox_read, .poll = spufs_ibox_poll, - .llseek = no_llseek, }; static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf, @@ -797,7 +793,6 @@ static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf, static const struct file_operations spufs_ibox_stat_fops = { .open = spufs_pipe_open, .read = spufs_ibox_stat_read, - .llseek = no_llseek, }; /* low-level mailbox write */ @@ -901,7 +896,6 @@ static const struct file_operations spufs_wbox_fops = { .open = spufs_pipe_open, .write = spufs_wbox_write, .poll = spufs_wbox_poll, - .llseek = no_llseek, }; static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf, @@ -929,7 +923,6 @@ static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf, static const struct file_operations spufs_wbox_stat_fops = { .open = spufs_pipe_open, .read = spufs_wbox_stat_read, - .llseek = no_llseek, }; static int spufs_signal1_open(struct inode *inode, struct file *file) @@ -1056,7 +1049,6 @@ static const struct file_operations spufs_signal1_fops = { .read = spufs_signal1_read, .write = spufs_signal1_write, .mmap = spufs_signal1_mmap, - .llseek = no_llseek, }; static const struct file_operations spufs_signal1_nosched_fops = { @@ -1064,7 +1056,6 @@ static const struct file_operations spufs_signal1_nosched_fops = { .release = spufs_signal1_release, .write = spufs_signal1_write, .mmap = spufs_signal1_mmap, - .llseek = no_llseek, }; static int spufs_signal2_open(struct inode *inode, struct file *file) @@ -1195,7 +1186,6 @@ static const struct file_operations spufs_signal2_fops = { .read = spufs_signal2_read, .write = spufs_signal2_write, .mmap = spufs_signal2_mmap, - .llseek = no_llseek, }; static const struct file_operations spufs_signal2_nosched_fops = { @@ -1203,7 +1193,6 @@ static const struct file_operations spufs_signal2_nosched_fops = { .release = spufs_signal2_release, .write = spufs_signal2_write, .mmap = spufs_signal2_mmap, - .llseek = no_llseek, }; /* @@ -1343,7 +1332,6 @@ static const struct file_operations spufs_mss_fops = { .open = spufs_mss_open, .release = spufs_mss_release, .mmap = spufs_mss_mmap, - .llseek = no_llseek, }; static vm_fault_t @@ -1401,7 +1389,6 @@ static const struct file_operations spufs_psmap_fops = { .open = spufs_psmap_open, .release = spufs_psmap_release, .mmap = spufs_psmap_mmap, - .llseek = no_llseek, }; @@ -1732,7 +1719,6 @@ static const struct file_operations spufs_mfc_fops = { .flush = spufs_mfc_flush, .fsync = spufs_mfc_fsync, .mmap = spufs_mfc_mmap, - .llseek = no_llseek, }; static int spufs_npc_set(void *data, u64 val) @@ -2102,7 +2088,6 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, static const struct file_operations spufs_dma_info_fops = { .open = spufs_info_open, .read = spufs_dma_info_read, - .llseek = no_llseek, }; static void spufs_get_proxydma_info(struct spu_context *ctx, @@ -2159,7 +2144,6 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, static const struct file_operations spufs_proxydma_info_fops = { .open = spufs_info_open, .read = spufs_proxydma_info_read, - .llseek = no_llseek, }; static int spufs_show_tid(struct seq_file *s, void *private) @@ -2442,7 +2426,6 @@ static const struct file_operations spufs_switch_log_fops = { .read = spufs_switch_log_read, .poll = spufs_switch_log_poll, .release = spufs_switch_log_release, - .llseek = no_llseek, }; /** diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 3d072a7455bf6..db3370d1673c3 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -99,7 +99,6 @@ static ssize_t pnv_eeh_ei_write(struct file *filp, static const struct file_operations pnv_eeh_ei_fops = { .open = simple_open, - .llseek = no_llseek, .write = pnv_eeh_ei_write, }; diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 3f1cdccebc9c1..8cb9d36ea4915 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -325,7 +325,6 @@ static const struct file_operations dtl_fops = { .open = dtl_file_open, .release = dtl_file_release, .read = dtl_file_read, - .llseek = no_llseek, }; static struct dentry *dtl_dir; From a5a670df1db79f4bb462601aa4cc03caffa761a2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 21 Aug 2024 18:21:01 +1000 Subject: [PATCH 54/59] macintosh/via-pmu: register_pmu_pm_ops() can be __init register_pmu_pm_ops() is only called at init time, via device_initcall(), so can be marked __init. The driver can't be built as a module. Reviewed-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman Link: https://msgid.link/20240821082101.877438-1-mpe@ellerman.id.au --- drivers/macintosh/via-pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 9d5703b609375..b0f09c70f1ff8 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -2334,7 +2334,7 @@ static const struct platform_suspend_ops pmu_pm_ops = { .valid = pmu_sleep_valid, }; -static int register_pmu_pm_ops(void) +static int __init register_pmu_pm_ops(void) { if (pmu_kind == PMU_OHARE_BASED) powerbook_sleep_init_3400(); From 8c9c01ce695eea84d19482e7429e3d54ceb7585c Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 12 Aug 2024 15:11:52 +0530 Subject: [PATCH 55/59] selftests/powerpc: Allow building without static libc Currently exec-target.c is linked statically with libc, which on Fedora at least requires installing an additional package (glibc-static). If that package is not installed the build fails with: CC exec_target /usr/bin/ld: cannot find -lc: No such file or directory collect2: error: ld returned 1 exit status All exec_target.c does is call sys_exit, which can be done easily enough using inline assembly, and removes the requirement for a static libc to be installed. Suggested-by: Michael Ellerman Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://msgid.link/20240812094152.418586-1-maddy@linux.ibm.com --- .../selftests/powerpc/benchmarks/Makefile | 2 +- .../selftests/powerpc/benchmarks/exec_target.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index 1321922038d0f..ca4483c238b9f 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -18,4 +18,4 @@ $(OUTPUT)/context_switch: LDLIBS += -lpthread $(OUTPUT)/fork: LDLIBS += -lpthread -$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles +$(OUTPUT)/exec_target: CFLAGS += -nostartfiles diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c index c14b0fc1edde0..a6408d3f26cdd 100644 --- a/tools/testing/selftests/powerpc/benchmarks/exec_target.c +++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c @@ -7,10 +7,22 @@ */ #define _GNU_SOURCE -#include #include void _start(void) { - syscall(SYS_exit, 0); + asm volatile ( + "li %%r0, %[sys_exit];" + "li %%r3, 0;" + "sc;" + : + : [sys_exit] "i" (SYS_exit) + /* + * "sc" will clobber r0, r3-r13, cr0, ctr, xer and memory. + * Even though sys_exit never returns, handle clobber + * registers. + */ + : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "r13", "cr0", "ctr", "xer", "memory" + ); } From b0e2b828dfca645a228f8c89d12fbc2baecfb7ea Mon Sep 17 00:00:00 2001 From: Narayana Murty N Date: Mon, 9 Sep 2024 09:02:20 -0500 Subject: [PATCH 56/59] powerpc/pseries/eeh: Fix pseries_eeh_err_inject VFIO_EEH_PE_INJECT_ERR ioctl is currently failing on pseries due to missing implementation of err_inject eeh_ops for pseries. This patch implements pseries_eeh_err_inject in eeh_ops/pseries eeh_ops. Implements support for injecting MMIO load/store error for testing from user space. The check on PCI error type (bus type) code is moved to platform code, since the eeh_pe_inject_err can be allowed to more error types depending on platform requirement. Removal of the check for 'type' in eeh_pe_inject_err() doesn't impact PowerNV as pnv_eeh_err_inject() already has an equivalent check in place. Signed-off-by: Narayana Murty N Reviewed-by: Vaibhav Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20240909140220.529333-1-nnmlinux@linux.ibm.com --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh.c | 9 +++-- arch/powerpc/platforms/pseries/eeh_pseries.c | 39 +++++++++++++++++++- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 91a9fd53254fa..5e34611de9ef4 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -308,6 +308,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed); int eeh_pe_configure(struct eeh_pe *pe); int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); +int eeh_pe_inject_mmio_error(struct pci_dev *pdev); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 2f7f0efd564a9..0e59b8fd9bc69 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1537,10 +1537,6 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, if (!eeh_ops || !eeh_ops->err_inject) return -ENOENT; - /* Check on PCI error type */ - if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) - return -EINVAL; - /* Check on PCI error function */ if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) return -EINVAL; @@ -1848,6 +1844,11 @@ static const struct file_operations eeh_dev_break_fops = { .read = eeh_debugfs_dev_usage, }; +int eeh_pe_inject_mmio_error(struct pci_dev *pdev) +{ + return eeh_debugfs_break_device(pdev); +} + static ssize_t eeh_dev_can_recover(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index b1ae0c0d11878..1893f66371fa4 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -784,6 +784,43 @@ static int pseries_notify_resume(struct eeh_dev *edev) } #endif +/** + * pseries_eeh_err_inject - Inject specified error to the indicated PE + * @pe: the indicated PE + * @type: error type + * @func: specific error type + * @addr: address + * @mask: address mask + * The routine is called to inject specified error, which is + * determined by @type and @func, to the indicated PE + */ +static int pseries_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + struct eeh_dev *pdev; + + /* Check on PCI error type */ + if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) + return -EINVAL; + + switch (func) { + case EEH_ERR_FUNC_LD_MEM_ADDR: + case EEH_ERR_FUNC_LD_MEM_DATA: + case EEH_ERR_FUNC_ST_MEM_ADDR: + case EEH_ERR_FUNC_ST_MEM_DATA: + /* injects a MMIO error for all pdev's belonging to PE */ + pci_lock_rescan_remove(); + list_for_each_entry(pdev, &pe->edevs, entry) + eeh_pe_inject_mmio_error(pdev->pdev); + pci_unlock_rescan_remove(); + break; + default: + return -ERANGE; + } + + return 0; +} + static struct eeh_ops pseries_eeh_ops = { .name = "pseries", .probe = pseries_eeh_probe, @@ -792,7 +829,7 @@ static struct eeh_ops pseries_eeh_ops = { .reset = pseries_eeh_reset, .get_log = pseries_eeh_get_log, .configure_bridge = pseries_eeh_configure_bridge, - .err_inject = NULL, + .err_inject = pseries_eeh_err_inject, .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, From 01d34cc93639172272c3e47edd5cf1a3ffc6dc7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 9 Sep 2024 15:09:02 +0200 Subject: [PATCH 57/59] powerpc: Switch back to struct platform_driver::remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 0edb555a65d1 ("platform: Make platform_driver::remove() return void") .remove() is (again) the right callback to implement for platform drivers. Convert all pwm drivers to use .remove(), with the eventual goal to drop struct platform_driver::remove_new(). As .remove() and .remove_new() have the same prototypes, conversion is done by just changing the structure member name in the driver initializer. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://msgid.link/20240909130902.851274-2-u.kleine-koenig@baylibre.com --- arch/powerpc/platforms/512x/mpc512x_lpbfifo.c | 2 +- arch/powerpc/platforms/85xx/sgy_cts1000.c | 2 +- arch/powerpc/platforms/pasemi/gpio_mdio.c | 2 +- arch/powerpc/platforms/powernv/opal-prd.c | 2 +- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- arch/powerpc/sysdev/fsl_msi.c | 2 +- arch/powerpc/sysdev/pmi.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c index 4a25b6b486158..9668b052cd4b3 100644 --- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c +++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c @@ -504,7 +504,7 @@ MODULE_DEVICE_TABLE(of, mpc512x_lpbfifo_match); static struct platform_driver mpc512x_lpbfifo_driver = { .probe = mpc512x_lpbfifo_probe, - .remove_new = mpc512x_lpbfifo_remove, + .remove = mpc512x_lpbfifo_remove, .driver = { .name = DRV_NAME, .of_match_table = mpc512x_lpbfifo_match, diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index 34ce21f42623f..e635b27ee7186 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -143,7 +143,7 @@ static struct platform_driver gpio_halt_driver = { .of_match_table = gpio_halt_match, }, .probe = gpio_halt_probe, - .remove_new = gpio_halt_remove, + .remove = gpio_halt_remove, }; module_platform_driver(gpio_halt_driver); diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index 4e983af329492..e4538d4712565 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -285,7 +285,7 @@ MODULE_DEVICE_TABLE(of, gpio_mdio_match); static struct platform_driver gpio_mdio_driver = { .probe = gpio_mdio_probe, - .remove_new = gpio_mdio_remove, + .remove = gpio_mdio_remove, .driver = { .name = "gpio-mdio-bitbang", .of_match_table = gpio_mdio_match, diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 24f04f20d3e85..dc246ed4b7b4c 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -443,7 +443,7 @@ static struct platform_driver opal_prd_driver = { .of_match_table = opal_prd_match, }, .probe = opal_prd_probe, - .remove_new = opal_prd_remove, + .remove = opal_prd_remove, }; module_platform_driver(opal_prd_driver); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f6a70bc92e835..d95e03b3d3e3f 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -1509,7 +1509,7 @@ static const struct of_device_id papr_scm_match[] = { static struct platform_driver papr_scm_driver = { .probe = papr_scm_probe, - .remove_new = papr_scm_remove, + .remove = papr_scm_remove, .driver = { .name = "papr_scm", .of_match_table = papr_scm_match, diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index e205135ae1fea..1aa0cb097c9c9 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -603,7 +603,7 @@ static struct platform_driver fsl_of_msi_driver = { .of_match_table = fsl_of_msi_ids, }, .probe = fsl_of_msi_probe, - .remove_new = fsl_of_msi_remove, + .remove = fsl_of_msi_remove, }; static __init int fsl_of_msi_init(void) diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c index 737f97fd67d72..2511e586fe311 100644 --- a/arch/powerpc/sysdev/pmi.c +++ b/arch/powerpc/sysdev/pmi.c @@ -193,7 +193,7 @@ static void pmi_of_remove(struct platform_device *dev) static struct platform_driver pmi_of_platform_driver = { .probe = pmi_of_probe, - .remove_new = pmi_of_remove, + .remove = pmi_of_remove, .driver = { .name = "pmi", .of_match_table = pmi_match, From b77d36bb9a3de774950ba712a0e47f9d33c6f6d7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 27 Aug 2024 16:36:51 +1000 Subject: [PATCH 58/59] MAINTAINERS: powerpc: Add Maddy Maddy will be helping out with upstream maintenance, add him as a reviewer. Acked-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://msgid.link/20240827063651.28985-1-mpe@ellerman.id.au --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9ee5195d021c1..fee5ef272986b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12908,6 +12908,7 @@ M: Michael Ellerman R: Nicholas Piggin R: Christophe Leroy R: Naveen N Rao +R: Madhavan Srinivasan L: linuxppc-dev@lists.ozlabs.org S: Supported W: https://github.com/linuxppc/wiki/wiki From 39190ac7cff1fd15135fa8e658030d9646fdb5f2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 16 Sep 2024 22:05:10 +1000 Subject: [PATCH 59/59] powerpc/atomic: Use YZ constraints for DS-form instructions The 'ld' and 'std' instructions require a 4-byte aligned displacement because they are DS-form instructions. But the "m" asm constraint doesn't enforce that. That can lead to build errors if the compiler chooses a non-aligned displacement, as seen with GCC 14: /tmp/ccuSzwiR.s: Assembler messages: /tmp/ccuSzwiR.s:2579: Error: operand out of domain (39 is not a multiple of 4) make[5]: *** [scripts/Makefile.build:229: net/core/page_pool.o] Error 1 Dumping the generated assembler shows: ld 8,39(8) # MEM[(const struct atomic64_t *)_29].counter, t Use the YZ constraints to tell the compiler either to generate a DS-form displacement, or use an X-form instruction, either of which prevents the build error. See commit 2d43cc701b96 ("powerpc/uaccess: Fix build errors seen with GCC 13/14") for more details on the constraint letters. Fixes: 9f0cbea0d8cc ("[POWERPC] Implement atomic{, 64}_{read, write}() without volatile") Cc: stable@vger.kernel.org # v2.6.24+ Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20240913125302.0a06b4c7@canb.auug.org.au Tested-by: Mina Almasry Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://msgid.link/20240916120510.2017749-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/asm-compat.h | 6 ++++++ arch/powerpc/include/asm/atomic.h | 5 +++-- arch/powerpc/include/asm/uaccess.h | 7 +------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index b0b209c1df50b..f48e644900a26 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -37,6 +37,12 @@ #define STDX_BE stringify_in_c(stdbrx) #endif +#ifdef CONFIG_CC_IS_CLANG +#define DS_FORM_CONSTRAINT "Z<>" +#else +#define DS_FORM_CONSTRAINT "YZ<>" +#endif + #else /* 32-bit */ /* operations for longs and pointers */ diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 5bf6a4d49268c..d1ea554c33ed7 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with @@ -197,7 +198,7 @@ static __inline__ s64 arch_atomic64_read(const atomic64_t *v) if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED)) __asm__ __volatile__("ld %0,0(%1)" : "=r"(t) : "b"(&v->counter)); else - __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter)); + __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : DS_FORM_CONSTRAINT (v->counter)); return t; } @@ -208,7 +209,7 @@ static __inline__ void arch_atomic64_set(atomic64_t *v, s64 i) if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED)) __asm__ __volatile__("std %1,0(%2)" : "=m"(v->counter) : "r"(i), "b"(&v->counter)); else - __asm__ __volatile__("std%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i)); + __asm__ __volatile__("std%U0%X0 %1,%0" : "=" DS_FORM_CONSTRAINT (v->counter) : "r"(i)); } #define ATOMIC64_OP(op, asm_op) \ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index fd594bf6c6a9c..4f5a46a77fa2b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef __powerpc64__ /* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */ @@ -92,12 +93,6 @@ __pu_failed: \ : label) #endif -#ifdef CONFIG_CC_IS_CLANG -#define DS_FORM_CONSTRAINT "Z<>" -#else -#define DS_FORM_CONSTRAINT "YZ<>" -#endif - #ifdef __powerpc64__ #ifdef CONFIG_PPC_KERNEL_PREFIXED #define __put_user_asm2_goto(x, ptr, label) \