From dfbdcda280eb762bae2184145cc0702932d41798 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 18 Sep 2021 10:44:34 +0200 Subject: [PATCH 1/5] gcc-plugins: arm-ssp: Prepare for THREAD_INFO_IN_TASK support We will be enabling THREAD_INFO_IN_TASK support for ARM, which means that we can no longer load the stack canary value by masking the stack pointer and taking the copy that lives in thread_info. Instead, we will be able to load it from the task_struct directly, by using the TPIDRURO register which will hold the current task pointer when THREAD_INFO_IN_TASK is in effect. This is much more straight-forward, and allows us to declutter this code a bit while at it. Note that this means that ARMv6 (non-v6K) SMP systems can no longer use this feature, but those are quite rare to begin with, so this is a reasonable trade off. Reviewed-by: Kees Cook Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Tested-by: Amit Daniel Kachhap --- arch/arm/Kconfig | 2 +- arch/arm/Makefile | 5 +--- arch/arm/include/asm/stackprotector.h | 2 -- arch/arm/include/asm/thread_info.h | 3 --- arch/arm/kernel/asm-offsets.c | 4 --- arch/arm/kernel/process.c | 4 --- scripts/gcc-plugins/arm_ssp_per_task_plugin.c | 27 +++++-------------- 7 files changed, 8 insertions(+), 39 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fc196421b2ced..ff3e64ae959e4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1600,7 +1600,7 @@ config XEN config STACKPROTECTOR_PER_TASK bool "Use a unique stack canary value for each task" - depends on GCC_PLUGINS && STACKPROTECTOR && SMP && !XIP_DEFLATED_DATA + depends on GCC_PLUGINS && STACKPROTECTOR && THREAD_INFO_IN_TASK && !XIP_DEFLATED_DATA select GCC_PLUGIN_ARM_SSP_PER_TASK default y help diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 847c31e7c3687..b46e673a0ebe6 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -273,11 +273,8 @@ ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y) prepare: stack_protector_prepare stack_protector_prepare: prepare0 $(eval SSP_PLUGIN_CFLAGS := \ - -fplugin-arg-arm_ssp_per_task_plugin-tso=$(shell \ - awk '{if ($$2 == "THREAD_SZ_ORDER") print $$3;}'\ - include/generated/asm-offsets.h) \ -fplugin-arg-arm_ssp_per_task_plugin-offset=$(shell \ - awk '{if ($$2 == "TI_STACK_CANARY") print $$3;}'\ + awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}'\ include/generated/asm-offsets.h)) $(eval KBUILD_CFLAGS += $(SSP_PLUGIN_CFLAGS)) $(eval GCC_PLUGINS_CFLAGS += $(SSP_PLUGIN_CFLAGS)) diff --git a/arch/arm/include/asm/stackprotector.h b/arch/arm/include/asm/stackprotector.h index 72a20c3a0a90b..088d03161be5c 100644 --- a/arch/arm/include/asm/stackprotector.h +++ b/arch/arm/include/asm/stackprotector.h @@ -39,8 +39,6 @@ static __always_inline void boot_init_stack_canary(void) current->stack_canary = canary; #ifndef CONFIG_STACKPROTECTOR_PER_TASK __stack_chk_guard = current->stack_canary; -#else - current_thread_info()->stack_canary = current->stack_canary; #endif } diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 9a18da3e10cc3..f0cacc733231f 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -55,9 +55,6 @@ struct thread_info { struct task_struct *task; /* main task structure */ __u32 cpu; /* cpu */ __u32 cpu_domain; /* cpu domain */ -#ifdef CONFIG_STACKPROTECTOR_PER_TASK - unsigned long stack_canary; -#endif struct cpu_context_save cpu_context; /* cpu context */ __u32 abi_syscall; /* ABI type and syscall nr */ __u8 used_cp[16]; /* thread used copro */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index a646a3f6440f8..9c864ee76107b 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -63,10 +63,6 @@ int main(void) #ifdef CONFIG_IWMMXT DEFINE(TI_IWMMXT_STATE, offsetof(struct thread_info, fpstate.iwmmxt)); #endif -#ifdef CONFIG_STACKPROTECTOR_PER_TASK - DEFINE(TI_STACK_CANARY, offsetof(struct thread_info, stack_canary)); -#endif - DEFINE(THREAD_SZ_ORDER, THREAD_SIZE_ORDER); BLANK(); DEFINE(S_R0, offsetof(struct pt_regs, ARM_r0)); DEFINE(S_R1, offsetof(struct pt_regs, ARM_r1)); diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 0e2d3051741ed..cd73c216b2729 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -269,10 +269,6 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, thread_notify(THREAD_NOTIFY_COPY, thread); -#ifdef CONFIG_STACKPROTECTOR_PER_TASK - thread->stack_canary = p->stack_canary; -#endif - return 0; } diff --git a/scripts/gcc-plugins/arm_ssp_per_task_plugin.c b/scripts/gcc-plugins/arm_ssp_per_task_plugin.c index 8c1af9bdcb1bb..7328d037f9751 100644 --- a/scripts/gcc-plugins/arm_ssp_per_task_plugin.c +++ b/scripts/gcc-plugins/arm_ssp_per_task_plugin.c @@ -4,7 +4,7 @@ __visible int plugin_is_GPL_compatible; -static unsigned int sp_mask, canary_offset; +static unsigned int canary_offset; static unsigned int arm_pertask_ssp_rtl_execute(void) { @@ -13,7 +13,7 @@ static unsigned int arm_pertask_ssp_rtl_execute(void) for (insn = get_insns(); insn; insn = NEXT_INSN(insn)) { const char *sym; rtx body; - rtx mask, masked_sp; + rtx current; /* * Find a SET insn involving a SYMBOL_REF to __stack_chk_guard @@ -30,19 +30,13 @@ static unsigned int arm_pertask_ssp_rtl_execute(void) /* * Replace the source of the SET insn with an expression that - * produces the address of the copy of the stack canary value - * stored in struct thread_info + * produces the address of the current task's stack canary value */ - mask = GEN_INT(sext_hwi(sp_mask, GET_MODE_PRECISION(Pmode))); - masked_sp = gen_reg_rtx(Pmode); + current = gen_reg_rtx(Pmode); - emit_insn_before(gen_rtx_set(masked_sp, - gen_rtx_AND(Pmode, - stack_pointer_rtx, - mask)), - insn); + emit_insn_before(gen_load_tp_hard(current), insn); - SET_SRC(body) = gen_rtx_PLUS(Pmode, masked_sp, + SET_SRC(body) = gen_rtx_PLUS(Pmode, current, GEN_INT(canary_offset)); } return 0; @@ -72,7 +66,6 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, const char * const plugin_name = plugin_info->base_name; const int argc = plugin_info->argc; const struct plugin_argument *argv = plugin_info->argv; - int tso = 0; int i; if (!plugin_default_version_check(version, &gcc_version)) { @@ -91,11 +84,6 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, return 1; } - if (!strcmp(argv[i].key, "tso")) { - tso = atoi(argv[i].value); - continue; - } - if (!strcmp(argv[i].key, "offset")) { canary_offset = atoi(argv[i].value); continue; @@ -105,9 +93,6 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, return 1; } - /* create the mask that produces the base of the stack */ - sp_mask = ~((1U << (12 + tso)) - 1); - PASS_INFO(arm_pertask_ssp_rtl, "expand", 1, PASS_POS_INSERT_AFTER); register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, From 19f29aebd929c31c5cc901f38a9295617b602c38 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 18 Sep 2021 10:44:35 +0200 Subject: [PATCH 2/5] ARM: smp: Pass task to secondary_start_kernel This avoids needing to compute the task pointer in this function, which will no longer be possible once we move thread_info off the stack. Signed-off-by: Keith Packard Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Tested-by: Amit Daniel Kachhap --- arch/arm/include/asm/smp.h | 3 ++- arch/arm/kernel/head-nommu.S | 1 + arch/arm/kernel/head.S | 5 +++-- arch/arm/kernel/smp.c | 8 ++++++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 5d508f5d56c49..f16cbbd5cda44 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -48,7 +48,7 @@ extern void set_smp_ipi_range(int ipi_base, int nr_ipi); * Called from platform specific assembly code, this is the * secondary CPU entry point. */ -asmlinkage void secondary_start_kernel(void); +asmlinkage void secondary_start_kernel(struct task_struct *task); /* @@ -61,6 +61,7 @@ struct secondary_data { }; unsigned long swapper_pg_dir; void *stack; + struct task_struct *task; }; extern struct secondary_data secondary_data; extern void secondary_startup(void); diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 0fc814bbc34b1..fadfee9e2b45e 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -115,6 +115,7 @@ ENTRY(secondary_startup) ret r12 1: bl __after_proc_init ldr sp, [r7, #12] @ set up the stack pointer + ldr r0, [r7, #16] @ set up task pointer mov fp, #0 b secondary_start_kernel ENDPROC(secondary_startup) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 29070eb8df7d9..fa44e2d9f0b04 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -424,8 +424,9 @@ ENDPROC(secondary_startup) ENDPROC(secondary_startup_arm) ENTRY(__secondary_switched) - ldr_l r7, secondary_data + 12 @ get secondary_data.stack - mov sp, r7 + adr_l r7, secondary_data + 12 @ get secondary_data.stack + ldr sp, [r7] + ldr r0, [r7, #4] @ get secondary_data.task mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 842427ff2b3cb..8979d548ec178 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -153,6 +153,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.pgdir = virt_to_phys(idmap_pgd); secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); #endif + secondary_data.task = idle; sync_cache_w(&secondary_data); /* @@ -375,9 +376,12 @@ void arch_cpu_idle_dead(void) */ __asm__("mov sp, %0\n" " mov fp, #0\n" + " mov r0, %1\n" " b secondary_start_kernel" : - : "r" (task_stack_page(current) + THREAD_SIZE - 8)); + : "r" (task_stack_page(current) + THREAD_SIZE - 8), + "r" (current) + : "r0"); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -400,7 +404,7 @@ static void smp_store_cpu_info(unsigned int cpuid) * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. */ -asmlinkage void secondary_start_kernel(void) +asmlinkage void secondary_start_kernel(struct task_struct *task) { struct mm_struct *mm = &init_mm; unsigned int cpu; From 3855ab614df4818c833864572559a97fd9f9a299 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 18 Sep 2021 10:44:36 +0200 Subject: [PATCH 3/5] ARM: smp: Free up the TLS register while running in the kernel To prepare for a subsequent patch that stores the current task pointer in the user space TLS register while running in the kernel, modify the set_tls and switch_tls routines not to touch the register directly, and update the return to user space code to load the correct value. Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Tested-by: Amit Daniel Kachhap --- arch/arm/include/asm/tls.h | 10 +++++++--- arch/arm/kernel/entry-header.S | 8 ++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h index 5a66c3b13c92f..c3296499176ce 100644 --- a/arch/arm/include/asm/tls.h +++ b/arch/arm/include/asm/tls.h @@ -12,8 +12,8 @@ .macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2 mrc p15, 0, \tmp2, c13, c0, 2 @ get the user r/w register - mcr p15, 0, \tp, c13, c0, 3 @ set TLS register - mcr p15, 0, \tpuser, c13, c0, 2 @ and the user r/w register + @ TLS register update is deferred until return to user space + mcr p15, 0, \tpuser, c13, c0, 2 @ set the user r/w register str \tmp2, [\base, #TI_TP_VALUE + 4] @ save it .endm @@ -38,18 +38,22 @@ #ifdef CONFIG_TLS_REG_EMUL #define tls_emu 1 #define has_tls_reg 1 +#define defer_tls_reg_update 0 #define switch_tls switch_tls_none #elif defined(CONFIG_CPU_V6) #define tls_emu 0 #define has_tls_reg (elf_hwcap & HWCAP_TLS) +#define defer_tls_reg_update 0 #define switch_tls switch_tls_v6 #elif defined(CONFIG_CPU_32v6K) #define tls_emu 0 #define has_tls_reg 1 +#define defer_tls_reg_update 1 #define switch_tls switch_tls_v6k #else #define tls_emu 0 #define has_tls_reg 0 +#define defer_tls_reg_update 0 #define switch_tls switch_tls_software #endif @@ -77,7 +81,7 @@ static inline void set_tls(unsigned long val) */ barrier(); - if (!tls_emu) { + if (!tls_emu && !defer_tls_reg_update) { if (has_tls_reg) { asm("mcr p15, 0, %0, c13, c0, 3" : : "r" (val)); diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 40db0f9188b69..ae24dd54e9efb 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -292,6 +292,14 @@ .macro restore_user_regs, fast = 0, offset = 0 +#if defined(CONFIG_CPU_32v6K) && !defined(CONFIG_CPU_V6) + @ The TLS register update is deferred until return to user space so we + @ can use it for other things while running in the kernel + get_thread_info r1 + ldr r1, [r1, #TI_TP_VALUE] + mcr p15, 0, r1, c13, c0, 3 @ set TLS register +#endif + uaccess_enable r1, isb=0 #ifndef CONFIG_THUMB2_KERNEL @ ARM mode restore From 50596b7559bf226bb35ad55855ee979453ec06a1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 18 Sep 2021 10:44:37 +0200 Subject: [PATCH 4/5] ARM: smp: Store current pointer in TPIDRURO register if available Now that the user space TLS register is assigned on every return to user space, we can use it to keep the 'current' pointer while running in the kernel. This removes the need to access it via thread_info, which is located at the base of the stack, but will be moved out of there in a subsequent patch. Use the __builtin_thread_pointer() helper when available - this will help GCC understand that reloading the value within the same function is not necessary, even when using the per-task stack protector (which also generates accesses via the TLS register). For example, the generated code below loads TPIDRURO only once, and uses it to access both the stack canary and the preempt_count fields. : e92d 41f0 stmdb sp!, {r4, r5, r6, r7, r8, lr} ee1d 4f70 mrc 15, 0, r4, cr13, cr0, {3} 4606 mov r6, r0 b094 sub sp, #80 ; 0x50 f8d4 34e8 ldr.w r3, [r4, #1256] ; 0x4e8 <- stack canary 9313 str r3, [sp, #76] ; 0x4c f8d4 8004 ldr.w r8, [r4, #4] <- preempt count Co-developed-by: Keith Packard Signed-off-by: Keith Packard Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Tested-by: Amit Daniel Kachhap --- arch/arm/Kconfig | 5 +++ arch/arm/Makefile | 4 +++ arch/arm/include/asm/assembler.h | 24 ++++++++++++++ arch/arm/include/asm/current.h | 50 ++++++++++++++++++++++++++++++ arch/arm/include/asm/switch_to.h | 2 ++ arch/arm/include/asm/thread_info.h | 2 ++ arch/arm/kernel/entry-armv.S | 5 +++ arch/arm/kernel/entry-common.S | 1 + arch/arm/kernel/head-common.S | 5 +++ arch/arm/kernel/process.c | 4 +++ arch/arm/kernel/smp.c | 2 ++ arch/arm/mm/proc-macros.S | 3 +- 12 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 arch/arm/include/asm/current.h diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ff3e64ae959e4..cd195e6f4ea61 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1157,6 +1157,11 @@ config SMP_ON_UP If you don't know what to do here, say Y. + +config CURRENT_POINTER_IN_TPIDRURO + def_bool y + depends on SMP && CPU_32v6K && !CPU_V6 + config ARM_CPU_TOPOLOGY bool "Support cpu topology definition" depends on SMP && CPU_V7 diff --git a/arch/arm/Makefile b/arch/arm/Makefile index b46e673a0ebe6..1c540157e2831 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -113,6 +113,10 @@ ifeq ($(CONFIG_CC_IS_CLANG),y) CFLAGS_ABI += -meabi gnu endif +ifeq ($(CONFIG_CURRENT_POINTER_IN_TPIDRURO),y) +CFLAGS_ABI += -mtp=cp15 +endif + # Accept old syntax despite ".syntax unified" AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index e2b1fd558bf3c..c1551dee28be5 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -199,6 +199,30 @@ .endm .endr + .macro get_current, rd +#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO + mrc p15, 0, \rd, c13, c0, 3 @ get TPIDRURO register +#else + get_thread_info \rd + ldr \rd, [\rd, #TI_TASK] +#endif + .endm + + .macro set_current, rn +#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO + mcr p15, 0, \rn, c13, c0, 3 @ set TPIDRURO register +#endif + .endm + + .macro reload_current, t1:req, t2:req +#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO + adr_l \t1, __entry_task @ get __entry_task base address + mrc p15, 0, \t2, c13, c0, 4 @ get per-CPU offset + ldr \t1, [\t1, \t2] @ load variable + mcr p15, 0, \t1, c13, c0, 3 @ store in TPIDRURO +#endif + .endm + /* * Get current thread_info. */ diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h new file mode 100644 index 0000000000000..1d472fa7697bd --- /dev/null +++ b/arch/arm/include/asm/current.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021 Keith Packard + * Copyright (c) 2021 Google, LLC + */ + +#ifndef _ASM_ARM_CURRENT_H +#define _ASM_ARM_CURRENT_H + +#ifndef __ASSEMBLY__ + +struct task_struct; + +static inline void set_current(struct task_struct *cur) +{ + if (!IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO)) + return; + + /* Set TPIDRURO */ + asm("mcr p15, 0, %0, c13, c0, 3" :: "r"(cur) : "memory"); +} + +#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO + +static inline struct task_struct *get_current(void) +{ + struct task_struct *cur; + +#if __has_builtin(__builtin_thread_pointer) + /* + * Use the __builtin helper when available - this results in better + * code, especially when using GCC in combination with the per-task + * stack protector, as the compiler will recognize that it needs to + * load the TLS register only once in every function. + */ + cur = __builtin_thread_pointer(); +#else + asm("mrc p15, 0, %0, c13, c0, 3" : "=r"(cur)); +#endif + return cur; +} + +#define current get_current() +#else +#include +#endif /* CONFIG_CURRENT_POINTER_IN_TPIDRURO */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ARM_CURRENT_H */ diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index 007d8fea71572..61e4a3c4ca6ec 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -26,6 +26,8 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info #define switch_to(prev,next,last) \ do { \ __complete_pending_tlbi(); \ + if (IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO)) \ + __this_cpu_write(__entry_task, next); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ } while (0) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index f0cacc733231f..76b6fbd5540c2 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -29,6 +29,8 @@ struct task_struct; +DECLARE_PER_CPU(struct task_struct *, __entry_task); + #include struct cpu_context_save { diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 241b73d64df73..7263a45abf3d5 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -384,6 +384,8 @@ ENDPROC(__fiq_abt) ATRAP( teq r8, r7) ATRAP( mcrne p15, 0, r8, c1, c0, 0) + reload_current r7, r8 + @ @ Clear FP to mark the first stack frame @ @@ -762,6 +764,8 @@ ENTRY(__switch_to) add r7, r7, #TSK_STACK_CANARY & ~IMM12_MASK .endif ldr r7, [r7, #TSK_STACK_CANARY & IMM12_MASK] +#elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) + ldr r7, [r2, #TI_TASK] #endif #ifdef CONFIG_CPU_USE_DOMAINS mcr p15, 0, r6, c3, c0, 0 @ Set domain register @@ -776,6 +780,7 @@ ENTRY(__switch_to) #endif THUMB( mov ip, r4 ) mov r0, r5 + set_current r7 ARM( ldmia r4, {r4 - sl, fp, sp, pc} ) @ Load all regs saved previously THUMB( ldmia ip!, {r4 - sl, fp} ) @ Load all regs saved previously THUMB( ldr sp, [ip], #4 ) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index d9c99db50243f..ac86c34682bb5 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -170,6 +170,7 @@ ENTRY(vector_swi) str saved_psr, [sp, #S_PSR] @ Save CPSR str r0, [sp, #S_OLD_R0] @ Save OLD_R0 #endif + reload_current r10, ip zero_fp alignment_trap r10, ip, __cr_alignment asm_trace_hardirqs_on save=0 diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 29b2eda136bba..da18e0a17dc20 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -105,6 +105,11 @@ __mmap_switched: mov r1, #0 bl __memset @ clear .bss +#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO + adr_l r0, init_task @ get swapper task_struct + set_current r0 +#endif + ldmia r4, {r0, r1, r2, r3} str r9, [r0] @ Save processor ID str r7, [r1] @ Save machine type diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index cd73c216b2729..30428d756515d 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -36,6 +36,10 @@ #include "signal.h" +#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO +DEFINE_PER_CPU(struct task_struct *, __entry_task); +#endif + #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include unsigned long __stack_chk_guard __read_mostly; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8979d548ec178..97ee6b1567e9f 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -409,6 +409,8 @@ asmlinkage void secondary_start_kernel(struct task_struct *task) struct mm_struct *mm = &init_mm; unsigned int cpu; + set_current(task); + secondary_biglittle_init(); /* diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index e2c743aa2eb2b..d48ba99d739ca 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -30,8 +30,7 @@ * act_mm - get current->active_mm */ .macro act_mm, rd - get_thread_info \rd - ldr \rd, [\rd, #TI_TASK] + get_current \rd .if (TSK_ACTIVE_MM > IMM12_MASK) add \rd, \rd, #TSK_ACTIVE_MM & ~IMM12_MASK .endif From 18ed1c01a7dd3d7c780b06a49124da237a4c1790 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 18 Sep 2021 10:44:38 +0200 Subject: [PATCH 5/5] ARM: smp: Enable THREAD_INFO_IN_TASK Now that we no longer rely on thread_info living at the base of the task stack to be able to access the 'current' pointer, we can wire up the generic support for moving thread_info into the task struct itself. Note that this requires us to update the cpu field in thread_info explicitly, now that the core code no longer does so. Ideally, we would switch the percpu code to access the cpu field in task_struct instead, but this unleashes #include circular dependency hell. Co-developed-by: Keith Packard Signed-off-by: Keith Packard Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Tested-by: Amit Daniel Kachhap --- arch/arm/Kconfig | 1 + arch/arm/include/asm/assembler.h | 5 +++++ arch/arm/include/asm/switch_to.h | 14 ++++++++++++++ arch/arm/include/asm/thread_info.h | 10 +++++++++- arch/arm/kernel/asm-offsets.c | 2 ++ arch/arm/kernel/entry-armv.S | 2 +- arch/arm/kernel/smp.c | 3 +++ 7 files changed, 35 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cd195e6f4ea61..4f61c9789e7f3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -125,6 +125,7 @@ config ARM select PERF_USE_VMALLOC select RTC_LIB select SYS_SUPPORTS_APM_EMULATION + select THREAD_INFO_IN_TASK if CURRENT_POINTER_IN_TPIDRURO select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M # Above selects are sorted alphabetically; please add new ones # according to that. Thanks. diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index c1551dee28be5..7d23d4bb2168b 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -227,10 +227,15 @@ * Get current thread_info. */ .macro get_thread_info, rd +#ifdef CONFIG_THREAD_INFO_IN_TASK + /* thread_info is the first member of struct task_struct */ + get_current \rd +#else ARM( mov \rd, sp, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT ) THUMB( mov \rd, sp ) THUMB( lsr \rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT ) mov \rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT +#endif .endm /* diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index 61e4a3c4ca6ec..b55c7b2755e49 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -23,9 +23,23 @@ */ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info *, struct thread_info *); +static inline void set_ti_cpu(struct task_struct *p) +{ +#ifdef CONFIG_THREAD_INFO_IN_TASK + /* + * The core code no longer maintains the thread_info::cpu field once + * CONFIG_THREAD_INFO_IN_TASK is in effect, but we rely on it for + * raw_smp_processor_id(), which cannot access struct task_struct* + * directly for reasons of circular #inclusion hell. + */ + task_thread_info(p)->cpu = task_cpu(p); +#endif +} + #define switch_to(prev,next,last) \ do { \ __complete_pending_tlbi(); \ + set_ti_cpu(next); \ if (IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO)) \ __this_cpu_write(__entry_task, next); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 76b6fbd5540c2..787511396f3f4 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -54,7 +54,9 @@ struct cpu_context_save { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ +#ifndef CONFIG_THREAD_INFO_IN_TASK struct task_struct *task; /* main task structure */ +#endif __u32 cpu; /* cpu */ __u32 cpu_domain; /* cpu domain */ struct cpu_context_save cpu_context; /* cpu context */ @@ -70,11 +72,16 @@ struct thread_info { #define INIT_THREAD_INFO(tsk) \ { \ - .task = &tsk, \ + INIT_THREAD_INFO_TASK(tsk) \ .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ } +#ifdef CONFIG_THREAD_INFO_IN_TASK +#define INIT_THREAD_INFO_TASK(tsk) +#else +#define INIT_THREAD_INFO_TASK(tsk) .task = &(tsk), + /* * how to get the thread information struct from C */ @@ -85,6 +92,7 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *) (current_stack_pointer & ~(THREAD_SIZE - 1)); } +#endif #define thread_saved_pc(tsk) \ ((unsigned long)(task_thread_info(tsk)->cpu_context.pc)) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 9c864ee76107b..645845e4982a6 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -43,7 +43,9 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); +#ifndef CONFIG_THREAD_INFO_IN_TASK DEFINE(TI_TASK, offsetof(struct thread_info, task)); +#endif DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 7263a45abf3d5..a54b5044d406d 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -765,7 +765,7 @@ ENTRY(__switch_to) .endif ldr r7, [r7, #TSK_STACK_CANARY & IMM12_MASK] #elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) - ldr r7, [r2, #TI_TASK] + mov r7, r2 @ Preserve 'next' #endif #ifdef CONFIG_CPU_USE_DOMAINS mcr p15, 0, r6, c3, c0, 0 @ Set domain register diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 97ee6b1567e9f..cde5b6d8bac5d 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -154,6 +154,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); #endif secondary_data.task = idle; + if (IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK)) + task_thread_info(idle)->cpu = cpu; + sync_cache_w(&secondary_data); /*