From e4160bbb43ff9bffcd01fcc60feebd35391b8bbf Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 23 Jun 2011 17:07:40 +0100 Subject: [PATCH] --- yaml --- r: 258267 b: refs/heads/master c: 66a625a88174664fc252d7b1ac1773e76e52326b h: refs/heads/master i: 258265: f44fa88f30977be7b93c82eedf3277674914013e 258263: dda7d6a9be3690bb8bd39eca610bde8cd5d8d2be v: v3 --- [refs] | 2 +- .../Documentation/arm/kernel_user_helpers.txt | 267 ------------------ trunk/arch/arm/kernel/entry-armv.S | 246 ++++++++++------ trunk/arch/arm/mm/proc-macros.S | 63 +++++ 4 files changed, 216 insertions(+), 362 deletions(-) delete mode 100644 trunk/Documentation/arm/kernel_user_helpers.txt diff --git a/[refs] b/[refs] index 0b68dcb5e53c..9523e9eb82e5 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 2ff0720933f9a5e502a46fb6ea14ceeaad65131f +refs/heads/master: 66a625a88174664fc252d7b1ac1773e76e52326b diff --git a/trunk/Documentation/arm/kernel_user_helpers.txt b/trunk/Documentation/arm/kernel_user_helpers.txt deleted file mode 100644 index a17df9f91d16..000000000000 --- a/trunk/Documentation/arm/kernel_user_helpers.txt +++ /dev/null @@ -1,267 +0,0 @@ -Kernel-provided User Helpers -============================ - -These are segment of kernel provided user code reachable from user space -at a fixed address in kernel memory. This is used to provide user space -with some operations which require kernel help because of unimplemented -native feature and/or instructions in many ARM CPUs. The idea is for this -code to be executed directly in user mode for best efficiency but which is -too intimate with the kernel counter part to be left to user libraries. -In fact this code might even differ from one CPU to another depending on -the available instruction set, or whether it is a SMP systems. In other -words, the kernel reserves the right to change this code as needed without -warning. Only the entry points and their results as documented here are -guaranteed to be stable. - -This is different from (but doesn't preclude) a full blown VDSO -implementation, however a VDSO would prevent some assembly tricks with -constants that allows for efficient branching to those code segments. And -since those code segments only use a few cycles before returning to user -code, the overhead of a VDSO indirect far call would add a measurable -overhead to such minimalistic operations. - -User space is expected to bypass those helpers and implement those things -inline (either in the code emitted directly by the compiler, or part of -the implementation of a library call) when optimizing for a recent enough -processor that has the necessary native support, but only if resulting -binaries are already to be incompatible with earlier ARM processors due to -useage of similar native instructions for other things. In other words -don't make binaries unable to run on earlier processors just for the sake -of not using these kernel helpers if your compiled code is not going to -use new instructions for other purpose. - -New helpers may be added over time, so an older kernel may be missing some -helpers present in a newer kernel. For this reason, programs must check -the value of __kuser_helper_version (see below) before assuming that it is -safe to call any particular helper. This check should ideally be -performed only once at process startup time, and execution aborted early -if the required helpers are not provided by the kernel version that -process is running on. - -kuser_helper_version --------------------- - -Location: 0xffff0ffc - -Reference declaration: - - extern int32_t __kuser_helper_version; - -Definition: - - This field contains the number of helpers being implemented by the - running kernel. User space may read this to determine the availability - of a particular helper. - -Usage example: - -#define __kuser_helper_version (*(int32_t *)0xffff0ffc) - -void check_kuser_version(void) -{ - if (__kuser_helper_version < 2) { - fprintf(stderr, "can't do atomic operations, kernel too old\n"); - abort(); - } -} - -Notes: - - User space may assume that the value of this field never changes - during the lifetime of any single process. This means that this - field can be read once during the initialisation of a library or - startup phase of a program. - -kuser_get_tls -------------- - -Location: 0xffff0fe0 - -Reference prototype: - - void * __kuser_get_tls(void); - -Input: - - lr = return address - -Output: - - r0 = TLS value - -Clobbered registers: - - none - -Definition: - - Get the TLS value as previously set via the __ARM_NR_set_tls syscall. - -Usage example: - -typedef void * (__kuser_get_tls_t)(void); -#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0) - -void foo() -{ - void *tls = __kuser_get_tls(); - printf("TLS = %p\n", tls); -} - -Notes: - - - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12). - -kuser_cmpxchg -------------- - -Location: 0xffff0fc0 - -Reference prototype: - - int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr); - -Input: - - r0 = oldval - r1 = newval - r2 = ptr - lr = return address - -Output: - - r0 = success code (zero or non-zero) - C flag = set if r0 == 0, clear if r0 != 0 - -Clobbered registers: - - r3, ip, flags - -Definition: - - Atomically store newval in *ptr only if *ptr is equal to oldval. - Return zero if *ptr was changed or non-zero if no exchange happened. - The C flag is also set if *ptr was changed to allow for assembly - optimization in the calling code. - -Usage example: - -typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr); -#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0) - -int atomic_add(volatile int *ptr, int val) -{ - int old, new; - - do { - old = *ptr; - new = old + val; - } while(__kuser_cmpxchg(old, new, ptr)); - - return new; -} - -Notes: - - - This routine already includes memory barriers as needed. - - - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12). - -kuser_memory_barrier --------------------- - -Location: 0xffff0fa0 - -Reference prototype: - - void __kuser_memory_barrier(void); - -Input: - - lr = return address - -Output: - - none - -Clobbered registers: - - none - -Definition: - - Apply any needed memory barrier to preserve consistency with data modified - manually and __kuser_cmpxchg usage. - -Usage example: - -typedef void (__kuser_dmb_t)(void); -#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0) - -Notes: - - - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15). - -kuser_cmpxchg64 ---------------- - -Location: 0xffff0f60 - -Reference prototype: - - int __kuser_cmpxchg64(const int64_t *oldval, - const int64_t *newval, - volatile int64_t *ptr); - -Input: - - r0 = pointer to oldval - r1 = pointer to newval - r2 = pointer to target value - lr = return address - -Output: - - r0 = success code (zero or non-zero) - C flag = set if r0 == 0, clear if r0 != 0 - -Clobbered registers: - - r3, lr, flags - -Definition: - - Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr - is equal to the 64-bit value pointed by *oldval. Return zero if *ptr was - changed or non-zero if no exchange happened. - - The C flag is also set if *ptr was changed to allow for assembly - optimization in the calling code. - -Usage example: - -typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval, - const int64_t *newval, - volatile int64_t *ptr); -#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60) - -int64_t atomic_add64(volatile int64_t *ptr, int64_t val) -{ - int64_t old, new; - - do { - old = *ptr; - new = old + val; - } while(__kuser_cmpxchg64(&old, &new, ptr)); - - return new; -} - -Notes: - - - This routine already includes memory barriers as needed. - - - Due to the length of this sequence, this spans 2 conventional kuser - "slots", therefore 0xffff0f80 is not used as a valid entry point. - - - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1). diff --git a/trunk/arch/arm/kernel/entry-armv.S b/trunk/arch/arm/kernel/entry-armv.S index 8f9ab5c9015b..90c62cd51ca9 100644 --- a/trunk/arch/arm/kernel/entry-armv.S +++ b/trunk/arch/arm/kernel/entry-armv.S @@ -383,7 +383,7 @@ ENDPROC(__pabt_svc) .endm .macro kuser_cmpxchg_check -#if !defined(CONFIG_CPU_32v6K) && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) +#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) #ifndef CONFIG_MMU #warning "NPTL on non MMU needs fixing" #else @@ -392,7 +392,7 @@ ENDPROC(__pabt_svc) @ perform a quick test inline since it should be false @ 99.9999% of the time. The rest is done out of line. cmp r2, #TASK_SIZE - blhs kuser_cmpxchg64_fixup + blhs kuser_cmpxchg_fixup #endif #endif .endm @@ -758,12 +758,31 @@ ENDPROC(__switch_to) /* * User helpers. * + * These are segment of kernel provided user code reachable from user space + * at a fixed address in kernel memory. This is used to provide user space + * with some operations which require kernel help because of unimplemented + * native feature and/or instructions in many ARM CPUs. The idea is for + * this code to be executed directly in user mode for best efficiency but + * which is too intimate with the kernel counter part to be left to user + * libraries. In fact this code might even differ from one CPU to another + * depending on the available instruction set and restrictions like on + * SMP systems. In other words, the kernel reserves the right to change + * this code as needed without warning. Only the entry points and their + * results are guaranteed to be stable. + * * Each segment is 32-byte aligned and will be moved to the top of the high * vector page. New segments (if ever needed) must be added in front of * existing ones. This mechanism should be used only for things that are * really small and justified, and not be abused freely. * - * See Documentation/arm/kernel_user_helpers.txt for formal definitions. + * User space is expected to implement those things inline when optimizing + * for a processor that has the necessary native support, but only if such + * resulting binaries are already to be incompatible with earlier ARM + * processors due to the use of unsupported instructions other than what + * is provided here. In other words don't make binaries unable to run on + * earlier processors just for the sake of not using these kernel helpers + * if your compiled code is not going to use the new instructions for other + * purpose. */ THUMB( .arm ) @@ -780,104 +799,97 @@ ENDPROC(__switch_to) __kuser_helper_start: /* - * Due to the length of some sequences, __kuser_cmpxchg64 spans 2 regular - * kuser "slots", therefore 0xffff0f80 is not used as a valid entry point. + * Reference prototype: + * + * void __kernel_memory_barrier(void) + * + * Input: + * + * lr = return address + * + * Output: + * + * none + * + * Clobbered: + * + * none + * + * Definition and user space usage example: + * + * typedef void (__kernel_dmb_t)(void); + * #define __kernel_dmb (*(__kernel_dmb_t *)0xffff0fa0) + * + * Apply any needed memory barrier to preserve consistency with data modified + * manually and __kuser_cmpxchg usage. + * + * This could be used as follows: + * + * #define __kernel_dmb() \ + * asm volatile ( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #95" \ + * : : : "r0", "lr","cc" ) */ -__kuser_cmpxchg64: @ 0xffff0f60 - -#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) - - /* - * Poor you. No fast solution possible... - * The kernel itself must perform the operation. - * A special ghost syscall is used for that (see traps.c). - */ - stmfd sp!, {r7, lr} - ldr r7, 1f @ it's 20 bits - swi __ARM_NR_cmpxchg64 - ldmfd sp!, {r7, pc} -1: .word __ARM_NR_cmpxchg64 - -#elif defined(CONFIG_CPU_32v6K) - - stmfd sp!, {r4, r5, r6, r7} - ldrd r4, r5, [r0] @ load old val - ldrd r6, r7, [r1] @ load new val - smp_dmb arm -1: ldrexd r0, r1, [r2] @ load current val - eors r3, r0, r4 @ compare with oldval (1) - eoreqs r3, r1, r5 @ compare with oldval (2) - strexdeq r3, r6, r7, [r2] @ store newval if eq - teqeq r3, #1 @ success? - beq 1b @ if no then retry - smp_dmb arm - rsbs r0, r3, #0 @ set returned val and C flag - ldmfd sp!, {r4, r5, r6, r7} - bx lr - -#elif !defined(CONFIG_SMP) - -#ifdef CONFIG_MMU - - /* - * The only thing that can break atomicity in this cmpxchg64 - * implementation is either an IRQ or a data abort exception - * causing another process/thread to be scheduled in the middle of - * the critical sequence. The same strategy as for cmpxchg is used. - */ - stmfd sp!, {r4, r5, r6, lr} - ldmia r0, {r4, r5} @ load old val - ldmia r1, {r6, lr} @ load new val -1: ldmia r2, {r0, r1} @ load current val - eors r3, r0, r4 @ compare with oldval (1) - eoreqs r3, r1, r5 @ compare with oldval (2) -2: stmeqia r2, {r6, lr} @ store newval if eq - rsbs r0, r3, #0 @ set return val and C flag - ldmfd sp!, {r4, r5, r6, pc} - - .text -kuser_cmpxchg64_fixup: - @ Called from kuser_cmpxchg_fixup. - @ r2 = address of interrupted insn (must be preserved). - @ sp = saved regs. r7 and r8 are clobbered. - @ 1b = first critical insn, 2b = last critical insn. - @ If r2 >= 1b and r2 <= 2b then saved pc_usr is set to 1b. - mov r7, #0xffff0fff - sub r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64))) - subs r8, r2, r7 - rsbcss r8, r8, #(2b - 1b) - strcs r7, [sp, #S_PC] -#if __LINUX_ARM_ARCH__ < 6 - bcc kuser_cmpxchg32_fixup -#endif - mov pc, lr - .previous - -#else -#warning "NPTL on non MMU needs fixing" - mov r0, #-1 - adds r0, r0, #0 - usr_ret lr -#endif - -#else -#error "incoherent kernel configuration" -#endif - - /* pad to next slot */ - .rept (16 - (. - __kuser_cmpxchg64)/4) - .word 0 - .endr - - .align 5 - __kuser_memory_barrier: @ 0xffff0fa0 smp_dmb arm usr_ret lr .align 5 +/* + * Reference prototype: + * + * int __kernel_cmpxchg(int oldval, int newval, int *ptr) + * + * Input: + * + * r0 = oldval + * r1 = newval + * r2 = ptr + * lr = return address + * + * Output: + * + * r0 = returned value (zero or non-zero) + * C flag = set if r0 == 0, clear if r0 != 0 + * + * Clobbered: + * + * r3, ip, flags + * + * Definition and user space usage example: + * + * typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr); + * #define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0) + * + * Atomically store newval in *ptr if *ptr is equal to oldval for user space. + * Return zero if *ptr was changed or non-zero if no exchange happened. + * The C flag is also set if *ptr was changed to allow for assembly + * optimization in the calling code. + * + * Notes: + * + * - This routine already includes memory barriers as needed. + * + * For example, a user space atomic_add implementation could look like this: + * + * #define atomic_add(ptr, val) \ + * ({ register unsigned int *__ptr asm("r2") = (ptr); \ + * register unsigned int __result asm("r1"); \ + * asm volatile ( \ + * "1: @ atomic_add\n\t" \ + * "ldr r0, [r2]\n\t" \ + * "mov r3, #0xffff0fff\n\t" \ + * "add lr, pc, #4\n\t" \ + * "add r1, r0, %2\n\t" \ + * "add pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \ + * "bcc 1b" \ + * : "=&r" (__result) \ + * : "r" (__ptr), "rIL" (val) \ + * : "r0","r3","ip","lr","cc","memory" ); \ + * __result; }) + */ + __kuser_cmpxchg: @ 0xffff0fc0 #if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) @@ -913,7 +925,7 @@ __kuser_cmpxchg: @ 0xffff0fc0 usr_ret lr .text -kuser_cmpxchg32_fixup: +kuser_cmpxchg_fixup: @ Called from kuser_cmpxchg_check macro. @ r2 = address of interrupted insn (must be preserved). @ sp = saved regs. r7 and r8 are clobbered. @@ -951,6 +963,39 @@ kuser_cmpxchg32_fixup: .align 5 +/* + * Reference prototype: + * + * int __kernel_get_tls(void) + * + * Input: + * + * lr = return address + * + * Output: + * + * r0 = TLS value + * + * Clobbered: + * + * none + * + * Definition and user space usage example: + * + * typedef int (__kernel_get_tls_t)(void); + * #define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0) + * + * Get the TLS value as previously set via the __ARM_NR_set_tls syscall. + * + * This could be used as follows: + * + * #define __kernel_get_tls() \ + * ({ register unsigned int __val asm("r0"); \ + * asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \ + * : "=r" (__val) : : "lr","cc" ); \ + * __val; }) + */ + __kuser_get_tls: @ 0xffff0fe0 ldr r0, [pc, #(16 - 8)] @ read TLS, set in kuser_get_tls_init usr_ret lr @@ -959,6 +1004,19 @@ __kuser_get_tls: @ 0xffff0fe0 .word 0 @ 0xffff0ff0 software TLS value, then .endr @ pad up to __kuser_helper_version +/* + * Reference declaration: + * + * extern unsigned int __kernel_helper_version; + * + * Definition and user space usage example: + * + * #define __kernel_helper_version (*(unsigned int *)0xffff0ffc) + * + * User space may read this to determine the curent number of helpers + * available. + */ + __kuser_helper_version: @ 0xffff0ffc .word ((__kuser_helper_end - __kuser_helper_start) >> 5) diff --git a/trunk/arch/arm/mm/proc-macros.S b/trunk/arch/arm/mm/proc-macros.S index 34261f9486b9..4ae9b4407074 100644 --- a/trunk/arch/arm/mm/proc-macros.S +++ b/trunk/arch/arm/mm/proc-macros.S @@ -254,3 +254,66 @@ mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line mcr p15, 0, ip, c7, c10, 4 @ data write barrier .endm + +.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0 + .type \name\()_processor_functions, #object + .align 2 +ENTRY(\name\()_processor_functions) + .word \dabort + .word \pabort + .word cpu_\name\()_proc_init + .word cpu_\name\()_proc_fin + .word cpu_\name\()_reset + .word cpu_\name\()_do_idle + .word cpu_\name\()_dcache_clean_area + .word cpu_\name\()_switch_mm + + .if \nommu + .word 0 + .else + .word cpu_\name\()_set_pte_ext + .endif + + .if \suspend + .word cpu_\name\()_suspend_size + .word cpu_\name\()_do_suspend + .word cpu_\name\()_do_resume + .else + .word 0 + .word 0 + .word 0 + .endif + + .size \name\()_processor_functions, . - \name\()_processor_functions +.endm + +.macro define_cache_functions name:req + .align 2 + .type \name\()_cache_fns, #object +ENTRY(\name\()_cache_fns) + .long \name\()_flush_icache_all + .long \name\()_flush_kern_cache_all + .long \name\()_flush_user_cache_all + .long \name\()_flush_user_cache_range + .long \name\()_coherent_kern_range + .long \name\()_coherent_user_range + .long \name\()_flush_kern_dcache_area + .long \name\()_dma_map_area + .long \name\()_dma_unmap_area + .long \name\()_dma_flush_range + .size \name\()_cache_fns, . - \name\()_cache_fns +.endm + +.macro define_tlb_functions name:req, flags_up:req, flags_smp + .type \name\()_tlb_fns, #object +ENTRY(\name\()_tlb_fns) + .long \name\()_flush_user_tlb_range + .long \name\()_flush_kern_tlb_range + .ifnb \flags_smp + ALT_SMP(.long \flags_smp ) + ALT_UP(.long \flags_up ) + .else + .long \flags_up + .endif + .size \name\()_tlb_fns, . - \name\()_tlb_fns +.endm