From ca1314d73eed493c49bb1932c60a8605530db2e4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:40 +0000 Subject: [PATCH 01/12] arm64: syscall: exit userspace before unmasking exceptions In el0_svc_common() we unmask exceptions before we call user_exit(), and so there's a window where an IRQ or debug exception can be taken while RCU is not watching. In do_debug_exception() we account for this in via debug_exception_{enter,exit}(), but in the el1_irq asm we do not and we call trace functions which rely on RCU before we have a guarantee that RCU is watching. Let's avoid this by having el0_svc_common() exit userspace before unmasking exceptions, matching what we do for all other EL0 entry paths. We can use user_exit_irqoff() to avoid the pointless save/restore of IRQ flags while we're sure exceptions are masked in DAIF. The workaround for Cortex-A76 erratum 1463225 may trigger a debug exception before this point, but the debug code invoked in this case is safe even when RCU is not watching. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index e4c0dadf0d92c..13fe79f8e2db1 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -120,8 +120,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, */ cortex_a76_erratum_1463225_svc_handler(); + user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); - user_exit(); if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { /* From 114e0a684753516ef4b71ccb55a8ebcfa8735edb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:41 +0000 Subject: [PATCH 02/12] arm64: mark idle code as noinstr Core code disables RCU when calling arch_cpu_idle(), so it's not safe for arch_cpu_idle() or its calees to be instrumented, as the instrumentation callbacks may attempt to use RCU or other features which are unsafe to use in this context. Mark them noinstr to prevent issues. The use of local_irq_enable() in arch_cpu_idle() is similarly problematic, and the "sched/idle: Fix arch_cpu_idle() vs tracing" patch queued in the tip tree addresses that case. Reported-by: Marco Elver Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a47a40ec6ad91..0c65a91f8f53f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -72,13 +72,13 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -static void __cpu_do_idle(void) +static void noinstr __cpu_do_idle(void) { dsb(sy); wfi(); } -static void __cpu_do_idle_irqprio(void) +static void noinstr __cpu_do_idle_irqprio(void) { unsigned long pmr; unsigned long daif_bits; @@ -108,7 +108,7 @@ static void __cpu_do_idle_irqprio(void) * ensure that interrupts are not masked at the PMR (because the core will * not wake up if we block the wake up signal in the interrupt controller). */ -void cpu_do_idle(void) +void noinstr cpu_do_idle(void) { if (system_uses_irq_prio_masking()) __cpu_do_idle_irqprio(); @@ -119,7 +119,7 @@ void cpu_do_idle(void) /* * This is our default idle handler. */ -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { /* * This should do all the clock switching and wait for interrupt From da192676483232a0a9478c89cdddd412e5167470 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:42 +0000 Subject: [PATCH 03/12] arm64: entry: mark entry code as noinstr Functions in entry-common.c are marked as notrace and NOKPROBE_SYMBOL(), but they're still subject to other instrumentation which may rely on lockdep/rcu/context-tracking being up-to-date, and may cause nested exceptions (e.g. for WARN/BUG or KASAN's use of BRK) which will corrupt exceptions registers which have not yet been read. Prevent this by marking all functions in entry-common.c as noinstr to prevent compiler instrumentation. This also blacklists the functions for tracing and kprobes, so we don't need to handle that separately. Functions elsewhere will be dealt with in subsequent patches. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 75 +++++++++++--------------------- 1 file changed, 25 insertions(+), 50 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 43d4c329775f7..75e99161f79ea 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,7 +17,7 @@ #include #include -static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -25,32 +25,28 @@ static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) far = untagged_addr(far); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el1_abort); -static void notrace el1_pc(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); local_daif_inherit(regs); do_sp_pc_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el1_pc); -static void notrace el1_undef(struct pt_regs *regs) +static void noinstr el1_undef(struct pt_regs *regs) { local_daif_inherit(regs); do_undefinstr(regs); } -NOKPROBE_SYMBOL(el1_undef); -static void notrace el1_inv(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) { local_daif_inherit(regs); bad_mode(regs, 0, esr); } -NOKPROBE_SYMBOL(el1_inv); -static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -64,16 +60,14 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) do_debug_exception(far, esr, regs); } -NOKPROBE_SYMBOL(el1_dbg); -static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) { local_daif_inherit(regs); do_ptrauth_fault(regs, esr); } -NOKPROBE_SYMBOL(el1_fpac); -asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -106,9 +100,8 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) el1_inv(regs, esr); } } -NOKPROBE_SYMBOL(el1_sync_handler); -static void notrace el0_da(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -117,9 +110,8 @@ static void notrace el0_da(struct pt_regs *regs, unsigned long esr) far = untagged_addr(far); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_da); -static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -135,41 +127,36 @@ static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_ia); -static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_acc(esr, regs); } -NOKPROBE_SYMBOL(el0_fpsimd_acc); -static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_sve_acc(esr, regs); } -NOKPROBE_SYMBOL(el0_sve_acc); -static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_exc(esr, regs); } -NOKPROBE_SYMBOL(el0_fpsimd_exc); -static void notrace el0_sys(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_sysinstr(esr, regs); } -NOKPROBE_SYMBOL(el0_sys); -static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -180,41 +167,36 @@ static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_pc); -static void notrace el0_sp(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); } -NOKPROBE_SYMBOL(el0_sp); -static void notrace el0_undef(struct pt_regs *regs) +static void noinstr el0_undef(struct pt_regs *regs) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_undefinstr(regs); } -NOKPROBE_SYMBOL(el0_undef); -static void notrace el0_bti(struct pt_regs *regs) +static void noinstr el0_bti(struct pt_regs *regs) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_bti(regs); } -NOKPROBE_SYMBOL(el0_bti); -static void notrace el0_inv(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); bad_el0_sync(regs, 0, esr); } -NOKPROBE_SYMBOL(el0_inv); -static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) { /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ unsigned long far = read_sysreg(far_el1); @@ -226,26 +208,23 @@ static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) do_debug_exception(far, esr, regs); local_daif_restore(DAIF_PROCCTX_NOIRQ); } -NOKPROBE_SYMBOL(el0_dbg); -static void notrace el0_svc(struct pt_regs *regs) +static void noinstr el0_svc(struct pt_regs *regs) { if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); do_el0_svc(regs); } -NOKPROBE_SYMBOL(el0_svc); -static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_ptrauth_fault(regs, esr); } -NOKPROBE_SYMBOL(el0_fpac); -asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -297,27 +276,24 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) el0_inv(regs, esr); } } -NOKPROBE_SYMBOL(el0_sync_handler); #ifdef CONFIG_COMPAT -static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_cp15instr(esr, regs); } -NOKPROBE_SYMBOL(el0_cp15); -static void notrace el0_svc_compat(struct pt_regs *regs) +static void noinstr el0_svc_compat(struct pt_regs *regs) { if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); do_el0_svc_compat(regs); } -NOKPROBE_SYMBOL(el0_svc_compat); -asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) +asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -360,5 +336,4 @@ asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) el0_inv(regs, esr); } } -NOKPROBE_SYMBOL(el0_sync_compat_handler); #endif /* CONFIG_COMPAT */ From 2f911d494f3f028bbe6346e383a354225682cf1b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:43 +0000 Subject: [PATCH 04/12] arm64: entry: move enter_from_user_mode to entry-common.c In later patches we'll want to extend enter_from_user_mode() and add a corresponding exit_to_user_mode(). As these will be common for all entries/exits from userspace, it'd be better for these to live in entry-common.c with the rest of the entry logic. This patch moves enter_from_user_mode() into entry-common.c. As with other functions in entry-common.c it is marked as noinstr (which prevents all instrumentation, tracing, and kprobes) but there are no other functional changes. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-5-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 6 ++++++ arch/arm64/kernel/traps.c | 7 ------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 75e99161f79ea..9a685e7686fea 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -101,6 +101,12 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) } } +asmlinkage void noinstr enter_from_user_mode(void) +{ + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); +} + static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8af4e0e857366..580c60afc39a9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -876,13 +876,6 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) nmi_exit(); } -asmlinkage void enter_from_user_mode(void) -{ - CT_WARN_ON(ct_state() != CONTEXT_USER); - user_exit_irqoff(); -} -NOKPROBE_SYMBOL(enter_from_user_mode); - /* GENERIC_BUG traps */ int is_valid_bugaddr(unsigned long addr) From 3cb5ed4d76c15fb97c10e5e9f5268d92c68222ca Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:44 +0000 Subject: [PATCH 05/12] arm64: entry: prepare ret_to_user for function call In a subsequent patch ret_to_user will need to make a C function call (in some configurations) which may clobber x0-x18 at the start of the finish_ret_to_user block, before enable_step_tsk consumes the flags loaded into x1. In preparation for this, let's load the flags into x19, which is preserved across C function calls. This avoids a redundant reload of the flags and ensures we operate on a consistent shapshot regardless. There should be no functional change as a result of this patch. At this point of the entry/exit paths we only need to preserve x28 (tsk) and the sp, and x19 is free for this use. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-6-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b295fb912b12b..84aec600eeed4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -774,13 +774,13 @@ SYM_CODE_END(el0_error) SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 - ldr x1, [tsk, #TSK_TI_FLAGS] - and x2, x1, #_TIF_WORK_MASK + ldr x19, [tsk, #TSK_TI_FLAGS] + and x2, x19, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: /* Ignore asynchronous tag check faults in the uaccess routines */ clear_mte_async_tcf - enable_step_tsk x1, x2 + enable_step_tsk x19, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK bl stackleak_erase #endif @@ -791,11 +791,12 @@ finish_ret_to_user: */ work_pending: mov x0, sp // 'regs' + mov x1, x19 bl do_notify_resume #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on // enabled while in userspace #endif - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step + ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user SYM_CODE_END(ret_to_user) From 105fc3352077bba5faaf12cf39f7e3aad26fb70b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:45 +0000 Subject: [PATCH 06/12] arm64: entry: move el1 irq/nmi logic to C In preparation for reworking the EL1 irq/nmi entry code, move the existing logic to C. We no longer need the asm_nmi_enter() and asm_nmi_exit() wrappers, so these are removed. The new C functions are marked noinstr, which prevents compiler instrumentation and runtime probing. In subsequent patches we'll want the new C helpers to be called in all cases, so we don't bother wrapping the calls with ifdeferry. Even when the new C functions are stubs the trivial calls are unlikely to have a measurable impact on the IRQ or NMI paths anyway. Prototypes are added to as otherwise (in some configurations) GCC will complain about the lack of a forward declaration. We already do this for existing function, e.g. enter_from_user_mode(). The new helpers are marked as noinstr (which prevents all instrumentation, tracing, and kprobes). Otherwise, there should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-7-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 2 ++ arch/arm64/kernel/entry-common.c | 16 ++++++++++++++ arch/arm64/kernel/entry.S | 34 ++++-------------------------- arch/arm64/kernel/irq.c | 15 ------------- 4 files changed, 22 insertions(+), 45 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 99b9383cd036d..d69d53dd7be78 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -31,6 +31,8 @@ static inline u32 disr_to_esr(u64 disr) return esr; } +asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); +asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 9a685e7686fea..920da254be1da 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,6 +17,22 @@ #include #include +asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + nmi_enter(); + + trace_hardirqs_off(); +} + +asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + nmi_exit(); + else + trace_hardirqs_on(); +} + static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 84aec600eeed4..53e30750fc288 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -637,16 +637,8 @@ SYM_CODE_START_LOCAL_NOALIGN(el1_irq) gic_prio_irq_setup pmr=x20, tmp=x1 enable_da_f -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_enter -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif + mov x0, sp + bl enter_el1_irq_or_nmi irq_handler @@ -665,26 +657,8 @@ alternative_else_nop_endif 1: #endif -#ifdef CONFIG_ARM64_PSEUDO_NMI - /* - * When using IRQ priority masking, we can get spurious interrupts while - * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a - * section with interrupts disabled. Skip tracing in those cases. - */ - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_exit -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbnz x0, 1f -#endif - bl trace_hardirqs_on -1: -#endif + mov x0, sp + bl exit_el1_irq_or_nmi kernel_exit 1 SYM_CODE_END(el1_irq) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 9cf2fb87584ae..60456a62da116 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -67,18 +67,3 @@ void __init init_IRQ(void) local_daif_restore(DAIF_PROCCTX_NOIRQ); } } - -/* - * Stubs to make nmi_enter/exit() code callable from ASM - */ -asmlinkage void notrace asm_nmi_enter(void) -{ - nmi_enter(); -} -NOKPROBE_SYMBOL(asm_nmi_enter); - -asmlinkage void notrace asm_nmi_exit(void) -{ - nmi_exit(); -} -NOKPROBE_SYMBOL(asm_nmi_exit); From 23529049c68423820487304f244144e0d576e85a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:46 +0000 Subject: [PATCH 07/12] arm64: entry: fix non-NMI user<->kernel transitions When built with PROVE_LOCKING, NO_HZ_FULL, and CONTEXT_TRACKING_FORCE will WARN() at boot time that interrupts are enabled when we call context_tracking_user_enter(), despite the DAIF flags indicating that IRQs are masked. The problem is that we're not tracking IRQ flag changes accurately, and so lockdep believes interrupts are enabled when they are not (and vice-versa). We can shuffle things so to make this more accurate. For kernel->user transitions there are a number of constraints we need to consider: 1) When we call __context_tracking_user_enter() HW IRQs must be disabled and lockdep must be up-to-date with this. 2) Userspace should be treated as having IRQs enabled from the PoV of both lockdep and tracing. 3) As context_tracking_user_enter() stops RCU from watching, we cannot use RCU after calling it. 4) IRQ flag tracing and lockdep have state that must be manipulated before RCU is disabled. ... with similar constraints applying for user->kernel transitions, with the ordering reversed. The generic entry code has enter_from_user_mode() and exit_to_user_mode() helpers to handle this. We can't use those directly, so we add arm64 copies for now (without the instrumentation markers which aren't used on arm64). These replace the existing user_exit() and user_exit_irqoff() calls spread throughout handlers, and the exception unmasking is left as-is. Note that: * The accounting for debug exceptions from userspace now happens in el0_dbg() and ret_to_user(), so this is removed from debug_exception_enter() and debug_exception_exit(). As user_exit_irqoff() wakes RCU, the userspace-specific check is removed. * The accounting for syscalls now happens in el0_svc(), el0_svc_compat(), and ret_to_user(), so this is removed from el0_svc_common(). This does not adversely affect the workaround for erratum 1463225, as this does not depend on any of the state tracking. * In ret_to_user() we mask interrupts with local_daif_mask(), and so we need to inform lockdep and tracing. Here a trace_hardirqs_off() is sufficient and safe as we have not yet exited kernel context and RCU is usable. * As PROVE_LOCKING selects TRACE_IRQFLAGS, the ifdeferry in entry.S only needs to check for the latter. * EL0 SError handling will be dealt with in a subsequent patch, as this needs to be treated as an NMI. Prior to this patch, booting an appropriately-configured kernel would result in spats as below: | DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled()) | WARNING: CPU: 2 PID: 1 at kernel/locking/lockdep.c:5280 check_flags.part.54+0x1dc/0x1f0 | Modules linked in: | CPU: 2 PID: 1 Comm: init Not tainted 5.10.0-rc3 #3 | Hardware name: linux,dummy-virt (DT) | pstate: 804003c5 (Nzcv DAIF +PAN -UAO -TCO BTYPE=--) | pc : check_flags.part.54+0x1dc/0x1f0 | lr : check_flags.part.54+0x1dc/0x1f0 | sp : ffff80001003bd80 | x29: ffff80001003bd80 x28: ffff66ce801e0000 | x27: 00000000ffffffff x26: 00000000000003c0 | x25: 0000000000000000 x24: ffffc31842527258 | x23: ffffc31842491368 x22: ffffc3184282d000 | x21: 0000000000000000 x20: 0000000000000001 | x19: ffffc318432ce000 x18: 0080000000000000 | x17: 0000000000000000 x16: ffffc31840f18a78 | x15: 0000000000000001 x14: ffffc3184285c810 | x13: 0000000000000001 x12: 0000000000000000 | x11: ffffc318415857a0 x10: ffffc318406614c0 | x9 : ffffc318415857a0 x8 : ffffc31841f1d000 | x7 : 647261685f706564 x6 : ffffc3183ff7c66c | x5 : ffff66ce801e0000 x4 : 0000000000000000 | x3 : ffffc3183fe00000 x2 : ffffc31841500000 | x1 : e956dc24146b3500 x0 : 0000000000000000 | Call trace: | check_flags.part.54+0x1dc/0x1f0 | lock_is_held_type+0x10c/0x188 | rcu_read_lock_sched_held+0x70/0x98 | __context_tracking_enter+0x310/0x350 | context_tracking_enter.part.3+0x5c/0xc8 | context_tracking_user_enter+0x6c/0x80 | finish_ret_to_user+0x2c/0x13cr Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-8-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 1 + arch/arm64/kernel/entry-common.c | 40 +++++++++++++++++++----------- arch/arm64/kernel/entry.S | 35 ++++++++++---------------- arch/arm64/kernel/syscall.c | 1 - arch/arm64/mm/fault.c | 22 ++++++++-------- 5 files changed, 51 insertions(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d69d53dd7be78..d579b2e6db7a7 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr) asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); +asmlinkage void exit_to_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); void do_bti(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 920da254be1da..49d1c1dd9bafa 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -119,15 +119,25 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) asmlinkage void noinstr enter_from_user_mode(void) { + lockdep_hardirqs_off(CALLER_ADDR0); CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); + trace_hardirqs_off_finish(); +} + +asmlinkage void noinstr exit_to_user_mode(void) +{ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + user_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); } static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); far = untagged_addr(far); do_mem_abort(far, esr, regs); @@ -145,35 +155,35 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(far)) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); } static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_acc(esr, regs); } static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sve_acc(esr, regs); } static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_exc(esr, regs); } static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sysinstr(esr, regs); } @@ -185,35 +195,35 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(instruction_pointer(regs))) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); } static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); } static void noinstr el0_undef(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_undefinstr(regs); } static void noinstr el0_bti(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_bti(regs); } static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); bad_el0_sync(regs, 0, esr); } @@ -226,7 +236,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - user_exit_irqoff(); + enter_from_user_mode(); do_debug_exception(far, esr, regs); local_daif_restore(DAIF_PROCCTX_NOIRQ); } @@ -236,12 +246,13 @@ static void noinstr el0_svc(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc(regs); } static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_ptrauth_fault(regs, esr); } @@ -302,7 +313,7 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) #ifdef CONFIG_COMPAT static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_cp15instr(esr, regs); } @@ -312,6 +323,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc_compat(regs); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 53e30750fc288..d72c818b019ca 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,18 +30,18 @@ #include /* - * Context tracking subsystem. Used to instrument transitions - * between user and kernel mode. + * Context tracking and irqflag tracing need to instrument transitions between + * user and kernel mode. */ - .macro ct_user_exit_irqoff -#ifdef CONFIG_CONTEXT_TRACKING + .macro user_exit_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) bl enter_from_user_mode #endif .endm - .macro ct_user_enter -#ifdef CONFIG_CONTEXT_TRACKING - bl context_tracking_user_enter + .macro user_enter_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) + bl exit_to_user_mode #endif .endm @@ -298,9 +298,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR - .if \el == 0 - ct_user_enter - .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN alternative_if_not ARM64_HAS_PAN @@ -700,21 +697,14 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 el0_irq_naked: gic_prio_irq_setup pmr=x20, tmp=x0 - ct_user_exit_irqoff + user_exit_irqoff enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - tbz x22, #55, 1f bl do_el0_irq_bp_hardening 1: irq_handler -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on -#endif b ret_to_user SYM_CODE_END(el0_irq) @@ -733,7 +723,7 @@ SYM_CODE_START_LOCAL(el0_error) el0_error_naked: mrs x25, esr_el1 gic_prio_kentry_setup tmp=x2 - ct_user_exit_irqoff + user_exit_irqoff enable_dbg mov x0, sp mov x1, x25 @@ -748,10 +738,14 @@ SYM_CODE_END(el0_error) SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif ldr x19, [tsk, #TSK_TI_FLAGS] and x2, x19, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: + user_enter_irqoff /* Ignore asynchronous tag check faults in the uaccess routines */ clear_mte_async_tcf enable_step_tsk x19, x2 @@ -767,9 +761,6 @@ work_pending: mov x0, sp // 'regs' mov x1, x19 bl do_notify_resume -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on // enabled while in userspace -#endif ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user SYM_CODE_END(ret_to_user) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 13fe79f8e2db1..f8f758e4a3064 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -120,7 +120,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, */ cortex_a76_erratum_1463225_svc_handler(); - user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1ee94002801fa..1f450b784d2c8 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -789,16 +789,14 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); + if (!user_mode(regs)) { + /* + * Tell lockdep we disabled irqs in entry.S. Do nothing if they were + * already disabled to preserve the last enabled/disabled addresses. + */ + if (interrupts_enabled(regs)) + trace_hardirqs_off(); - if (user_mode(regs)) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - } else { /* * We might have interrupted pretty much anything. In * fact, if we're a debug exception, we can even interrupt @@ -819,8 +817,10 @@ static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - if (!user_mode(regs)) - rcu_nmi_exit(); + if (user_mode(regs)) + return; + + rcu_nmi_exit(); if (interrupts_enabled(regs)) trace_hardirqs_on(); From 1ec2f2c05b2ab845d068bff29bd32dbfc6a6ad4c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:47 +0000 Subject: [PATCH 08/12] arm64: ptrace: prepare for EL1 irq/rcu tracking Exceptions from EL1 may be taken when RCU isn't watching (e.g. in idle sequences), or when the lockdep hardirqs transiently out-of-sync with the hardware state (e.g. in the middle of local_irq_enable()). To correctly handle these cases, we'll need to save/restore this state across some exceptions taken from EL1. A series of subsequent patches will update EL1 exception handlers to handle this. In preparation for this, and to avoid dependencies between those patches, this patch adds two new fields to struct pt_regs so that exception handlers can track this state. Note that this is placed in pt_regs as some entry/exit sequences such as el1_irq are invoked from assembly, which makes it very difficult to add a separate structure as with the irqentry_state used by x86. We can separate this once more of the exception logic is moved to C. While the fields only need to be bool, they are both made u64 to keep pt_regs 16-byte aligned. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-9-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 997cf8c8cd526..28c85b87b8cd4 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -193,6 +193,10 @@ struct pt_regs { /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ u64 pmr_save; u64 stackframe[2]; + + /* Only valid for some EL1 exceptions. */ + u64 lockdep_hardirqs; + u64 exit_rcu; }; static inline bool in_syscall(struct pt_regs const *regs) From 7cd1ea1010acbede7eb87b6abb6198921fb36957 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:48 +0000 Subject: [PATCH 09/12] arm64: entry: fix non-NMI kernel<->kernel transitions There are periods in kernel mode when RCU is not watching and/or the scheduler tick is disabled, but we can still take exceptions such as interrupts. The arm64 exception handlers do not account for this, and it's possible that RCU is not watching while an exception handler runs. The x86/generic entry code handles this by ensuring that all (non-NMI) kernel exception handlers call irqentry_enter() and irqentry_exit(), which handle RCU, lockdep, and IRQ flag tracing. We can't yet move to the generic entry code, and already hadnle the user<->kernel transitions elsewhere, so we add new kernel<->kernel transition helpers alog the lines of the generic entry code. Since we now track interrupts becoming masked when an exception is taken, local_daif_inherit() is modified to track interrupts becoming re-enabled when the original context is inherited. To balance the entry/exit paths, each handler masks all DAIF exceptions before exit_to_kernel_mode(). Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-10-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/daifflags.h | 3 ++ arch/arm64/kernel/entry-common.c | 67 ++++++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index ec213b4a16502..1c26d7baa67f8 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -128,6 +128,9 @@ static inline void local_daif_inherit(struct pt_regs *regs) { unsigned long flags = regs->pstate & DAIF_MASK; + if (interrupts_enabled(regs)) + trace_hardirqs_on(); + /* * We can't use local_daif_restore(regs->pstate) here as * system_has_prio_mask_debugging() won't restore the I bit if it can diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 49d1c1dd9bafa..86622d8dd0d84 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,12 +17,58 @@ #include #include +/* + * This is intended to match the logic in irqentry_enter(), handling the kernel + * mode transitions only. + */ +static void noinstr enter_from_kernel_mode(struct pt_regs *regs) +{ + regs->exit_rcu = false; + + if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter(); + trace_hardirqs_off_finish(); + + regs->exit_rcu = true; + return; + } + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter_check_tick(); + trace_hardirqs_off_finish(); +} + +/* + * This is intended to match the logic in irqentry_exit(), handling the kernel + * mode transitions only, and with preemption handled elsewhere. + */ +static void noinstr exit_to_kernel_mode(struct pt_regs *regs) +{ + lockdep_assert_irqs_disabled(); + + if (interrupts_enabled(regs)) { + if (regs->exit_rcu) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + rcu_irq_exit(); + lockdep_hardirqs_on(CALLER_ADDR0); + return; + } + + trace_hardirqs_on(); + } else { + if (regs->exit_rcu) + rcu_irq_exit(); + } +} + asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) nmi_enter(); - - trace_hardirqs_off(); + else + enter_from_kernel_mode(regs); } asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) @@ -30,36 +76,48 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) nmi_exit(); else - trace_hardirqs_on(); + exit_to_kernel_mode(regs); } static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + enter_from_kernel_mode(regs); local_daif_inherit(regs); far = untagged_addr(far); do_mem_abort(far, esr, regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_sp_pc_abort(far, esr, regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_undef(struct pt_regs *regs) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_undefinstr(regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); bad_mode(regs, 0, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) @@ -79,8 +137,11 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_ptrauth_fault(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); } asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) From f0cd5ac1e4c53cb691b3ed3cda1031e1c42153e2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:49 +0000 Subject: [PATCH 10/12] arm64: entry: fix NMI {user, kernel}->kernel transitions Exceptions which can be taken at (almost) any time are consdiered to be NMIs. On arm64 that includes: * SDEI events * GICv3 Pseudo-NMIs * Kernel stack overflows * Unexpected/unhandled exceptions ... but currently debug exceptions (BRKs, breakpoints, watchpoints, single-step) are not considered NMIs. As these can be taken at any time, kernel features (lockdep, RCU, ftrace) may not be in a consistent kernel state. For example, we may take an NMI from the idle code or partway through an entry/exit path. While nmi_enter() and nmi_exit() handle most of this state, notably they don't save/restore the lockdep state across an NMI being taken and handled. When interrupts are enabled and an NMI is taken, lockdep may see interrupts become disabled within the NMI code, but not see interrupts become enabled when returning from the NMI, leaving lockdep believing interrupts are disabled when they are actually disabled. The x86 code handles this in idtentry_{enter,exit}_nmi(), which will shortly be moved to the generic entry code. As we can't use either yet, we copy the x86 approach in arm64-specific helpers. All the NMI entrypoints are marked as noinstr to prevent any instrumentation handling code being invoked before the state has been corrected. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-11-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 2 ++ arch/arm64/kernel/entry-common.c | 34 ++++++++++++++++++++++++++++-- arch/arm64/kernel/sdei.c | 7 +++--- arch/arm64/kernel/traps.c | 15 ++++++++----- 4 files changed, 48 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d579b2e6db7a7..0756191f44f64 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -35,6 +35,8 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); asmlinkage void exit_to_user_mode(void); +void arm64_enter_nmi(struct pt_regs *regs); +void arm64_exit_nmi(struct pt_regs *regs); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); void do_bti(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 86622d8dd0d84..6d70be0d3e8fe 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -63,10 +63,40 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs) } } +void noinstr arm64_enter_nmi(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + __nmi_enter(); + lockdep_hardirqs_off(CALLER_ADDR0); + lockdep_hardirq_enter(); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); + ftrace_nmi_enter(); +} + +void noinstr arm64_exit_nmi(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + ftrace_nmi_exit(); + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + lockdep_hardirq_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); + __nmi_exit(); +} + asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) - nmi_enter(); + arm64_enter_nmi(regs); else enter_from_kernel_mode(regs); } @@ -74,7 +104,7 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) - nmi_exit(); + arm64_exit_nmi(regs); else exit_to_kernel_mode(regs); } diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 7689f2031c0c4..793c46d6a4479 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -223,16 +224,16 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, } -asmlinkage __kprobes notrace unsigned long +asmlinkage noinstr unsigned long __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { unsigned long ret; - nmi_enter(); + arm64_enter_nmi(regs); ret = _sdei_handler(regs, arg); - nmi_exit(); + arm64_exit_nmi(regs); return ret; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 580c60afc39a9..2059d8f43f55f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -753,8 +754,10 @@ const char *esr_get_class_string(u32 esr) * bad_mode handles the impossible case in the exception vector. This is always * fatal. */ -asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { + arm64_enter_nmi(regs); + console_verbose(); pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n", @@ -786,7 +789,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) __aligned(16); -asmlinkage void handle_bad_stack(struct pt_regs *regs) +asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); @@ -794,6 +797,8 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs) unsigned int esr = read_sysreg(esr_el1); unsigned long far = read_sysreg(far_el1); + arm64_enter_nmi(regs); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!"); @@ -865,15 +870,15 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) } } -asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) +asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr) { - nmi_enter(); + arm64_enter_nmi(regs); /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) arm64_serror_panic(regs, esr); - nmi_exit(); + arm64_exit_nmi(regs); } /* GENERIC_BUG traps */ From 2a9b3e6ac69a8bf177d8496a11e749e2dc72fa22 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:50 +0000 Subject: [PATCH 11/12] arm64: entry: fix EL1 debug transitions In debug_exception_enter() and debug_exception_exit() we trace hardirqs on/off while RCU isn't guaranteed to be watching, and we don't save and restore the hardirq state, and so may return with this having changed. Handle this appropriately with new entry/exit helpers which do the bare minimum to ensure this is appropriately maintained, without marking debug exceptions as NMIs. These are placed in entry-common.c with the other entry/exit helpers. In future we'll want to reconsider whether some debug exceptions should be NMIs, but this will require a significant refactoring, and for now this should prevent issues with lockdep and RCU. Signed-off-by: Mark Rutland Cc: Catalin Marins Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-12-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 26 ++++++++++++++++++++++++++ arch/arm64/mm/fault.c | 25 ------------------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 6d70be0d3e8fe..70e0a7591245d 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -150,6 +150,30 @@ static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } +static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); +} + +static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -162,7 +186,9 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + arm64_enter_el1_dbg(regs); do_debug_exception(far, esr, regs); + arm64_exit_el1_dbg(regs); } static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1f450b784d2c8..795d224f184ff 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -789,23 +789,6 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - if (!user_mode(regs)) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); - - /* - * We might have interrupted pretty much anything. In - * fact, if we're a debug exception, we can even interrupt - * NMI processing. We don't want this code makes in_nmi() - * to return true, but we need to notify RCU. - */ - rcu_nmi_enter(); - } - preempt_disable(); /* This code is a bit fragile. Test it. */ @@ -816,14 +799,6 @@ NOKPROBE_SYMBOL(debug_exception_enter); static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - - if (user_mode(regs)) - return; - - rcu_nmi_exit(); - - if (interrupts_enabled(regs)) - trace_hardirqs_on(); } NOKPROBE_SYMBOL(debug_exception_exit); From 9e5344e0ffc33f4fee899f98b6939a0682b1d9c3 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 30 Nov 2020 17:07:09 +0000 Subject: [PATCH 12/12] arm64: mte: Fix typo in macro definition UL in the definition of SYS_TFSR_EL1_TF1 was misspelled causing compilation issues when trying to implement in kernel MTE async mode. Fix the macro correcting the typo. Note: MTE async mode will be introduced with a future series. Fixes: c058b1c4a5ea ("arm64: mte: system register definitions") Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Vincenzo Frascino Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20201130170709.22309-1-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 174817ba119cb..c8ab9900293ff 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -983,7 +983,7 @@ #define SYS_TFSR_EL1_TF0_SHIFT 0 #define SYS_TFSR_EL1_TF1_SHIFT 1 #define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT) -#define SYS_TFSR_EL1_TF1 (UK(2) << SYS_TFSR_EL1_TF1_SHIFT) +#define SYS_TFSR_EL1_TF1 (UL(1) << SYS_TFSR_EL1_TF1_SHIFT) /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31))