[PATCH] annotate arch/x86_64/lib/*.S

Add unwind annotations to arch/x86_64/lib/*.S, and also use the macros provided by linux/linkage.h where-ever possible. Some of the alternative instructions handling needed to be adjusted so that the replacement code would also have valid unwind information. Signed-off-by: Jan Beulich <jbeulich@novell.com> Signed-off-by: Andi Kleen <ak@suse.de>
git-mirror · Sep 26, 2006 · 8d379da · 8d379da
1 parent fb2e284
commit 8d379da
Show file tree

Hide file tree

Showing 9 changed files with 244 additions and 143 deletions.
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
@@ -1,10 +1,22 @@
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
 /*
  * Zero a page. 	
  * rdi	page
  */			
-	.globl clear_page
-	.p2align 4
-clear_page:
+	ALIGN
+clear_page_c:
+	CFI_STARTPROC
+	movl $4096/8,%ecx
+	xorl %eax,%eax
+	rep stosq
+	ret
+	CFI_ENDPROC
+ENDPROC(clear_page)
+
+ENTRY(clear_page)
+	CFI_STARTPROC
 	xorl   %eax,%eax
 	movl   $4096/64,%ecx
 	.p2align 4
@@ -23,28 +35,25 @@ clear_page:
 	jnz	.Lloop
 	nop
 	ret
-clear_page_end:
+	CFI_ENDPROC
+.Lclear_page_end:
+ENDPROC(clear_page)
 
 	/* Some CPUs run faster using the string instructions.
 	   It is also a lot simpler. Use this when possible */
 
 #include <asm/cpufeature.h>
 
+	.section .altinstr_replacement,"ax"
+1:	.byte 0xeb					/* jmp <disp8> */
+	.byte (clear_page_c - clear_page) - (2f - 1b)	/* offset */
+2:
+	.previous
 	.section .altinstructions,"a"
 	.align 8
-	.quad  clear_page
-	.quad  clear_page_c
-	.byte  X86_FEATURE_REP_GOOD
-	.byte  clear_page_end-clear_page
-	.byte  clear_page_c_end-clear_page_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
-clear_page_c:
-	movl $4096/8,%ecx
-	xorl %eax,%eax
-	rep 
-	stosq
-	ret
-clear_page_c_end:
+	.quad clear_page
+	.quad 1b
+	.byte X86_FEATURE_REP_GOOD
+	.byte .Lclear_page_end - clear_page
+	.byte 2b - 1b
 	.previous
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
@@ -1,17 +1,33 @@
 /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
 
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+	ALIGN
+copy_page_c:
+	CFI_STARTPROC
+	movl $4096/8,%ecx
+	rep movsq
+	ret
+	CFI_ENDPROC
+ENDPROC(copy_page_c)
+
 /* Don't use streaming store because it's better when the target
    ends up in cache. */
 
 /* Could vary the prefetch distance based on SMP/UP */
 
-	.globl copy_page
-	.p2align 4
-copy_page:
+ENTRY(copy_page)
+	CFI_STARTPROC
 	subq	$3*8,%rsp
+	CFI_ADJUST_CFA_OFFSET 3*8
 	movq	%rbx,(%rsp)
+	CFI_REL_OFFSET rbx, 0
 	movq	%r12,1*8(%rsp)
+	CFI_REL_OFFSET r12, 1*8
 	movq	%r13,2*8(%rsp)
+	CFI_REL_OFFSET r13, 2*8
 
 	movl	$(4096/64)-5,%ecx
 	.p2align 4
@@ -72,30 +88,33 @@ copy_page:
 	jnz	.Loop2
 
 	movq	(%rsp),%rbx
+	CFI_RESTORE rbx
 	movq	1*8(%rsp),%r12
+	CFI_RESTORE r12
 	movq	2*8(%rsp),%r13
+	CFI_RESTORE r13
 	addq	$3*8,%rsp
+	CFI_ADJUST_CFA_OFFSET -3*8
 	ret
+.Lcopy_page_end:
+	CFI_ENDPROC
+ENDPROC(copy_page)
 
 	/* Some CPUs run faster using the string copy instructions.
 	   It is also a lot simpler. Use this when possible */
 
 #include <asm/cpufeature.h>
 
+	.section .altinstr_replacement,"ax"
+1:	.byte 0xeb					/* jmp <disp8> */
+	.byte (copy_page_c - copy_page) - (2f - 1b)	/* offset */
+2:
+	.previous
 	.section .altinstructions,"a"
 	.align 8
-	.quad  copy_page
-	.quad  copy_page_c
-	.byte  X86_FEATURE_REP_GOOD
-	.byte  copy_page_c_end-copy_page_c
-	.byte  copy_page_c_end-copy_page_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
-copy_page_c:
-	movl $4096/8,%ecx
-	rep 
-	movsq 
-	ret
-copy_page_c_end:
+	.quad copy_page
+	.quad 1b
+	.byte X86_FEATURE_REP_GOOD
+	.byte .Lcopy_page_end - copy_page
+	.byte 2b - 1b
 	.previous
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
@@ -4,6 +4,9 @@
  * Functions to copy from and to user space.		
  */		 
 
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
 #define FIX_ALIGNMENT 1
 
 	#include <asm/current.h>
@@ -12,9 +15,8 @@
 	#include <asm/cpufeature.h>
 
 /* Standard copy_to_user with segment limit checking */		
-	.globl copy_to_user
-	.p2align 4	
-copy_to_user:
+ENTRY(copy_to_user)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%rax)
 	movq %rdi,%rcx
 	addq %rdx,%rcx
@@ -25,9 +27,11 @@ copy_to_user:
 	.byte 0xe9	/* 32bit jump */
 	.long .Lcug-1f
 1:
+	CFI_ENDPROC
+ENDPROC(copy_to_user)
 
 	.section .altinstr_replacement,"ax"
-3:	.byte 0xe9			/* replacement jmp with 8 bit immediate */
+3:	.byte 0xe9			/* replacement jmp with 32 bit immediate */
 	.long copy_user_generic_c-1b	/* offset */
 	.previous
 	.section .altinstructions,"a"
@@ -40,27 +44,31 @@ copy_to_user:
 	.previous
 
 /* Standard copy_from_user with segment limit checking */	
-	.globl copy_from_user
-	.p2align 4	
-copy_from_user:
+ENTRY(copy_from_user)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%rax)
 	movq %rsi,%rcx
 	addq %rdx,%rcx
 	jc  bad_from_user
 	cmpq threadinfo_addr_limit(%rax),%rcx
 	jae  bad_from_user
 	/* FALL THROUGH to copy_user_generic */
+	CFI_ENDPROC
+ENDPROC(copy_from_user)
 
 	.section .fixup,"ax"
 	/* must zero dest */
 bad_from_user:
+	CFI_STARTPROC
 	movl %edx,%ecx
 	xorl %eax,%eax
 	rep
 	stosb
 bad_to_user:
 	movl	%edx,%eax
 	ret
+	CFI_ENDPROC
+END(bad_from_user)
 	.previous
 
 
@@ -75,9 +83,8 @@ bad_to_user:
  * Output:		
  * eax uncopied bytes or 0 if successful.
  */
-	.globl copy_user_generic
-	.p2align 4
-copy_user_generic:
+ENTRY(copy_user_generic)
+	CFI_STARTPROC
 	.byte 0x66,0x66,0x90	/* 5 byte nop for replacement jump */
 	.byte 0x66,0x90
 1:
@@ -95,6 +102,8 @@ copy_user_generic:
 	.previous
 .Lcug:
 	pushq %rbx
+	CFI_ADJUST_CFA_OFFSET 8
+	CFI_REL_OFFSET rbx, 0
 	xorl %eax,%eax		/*zero for the exception handler */
 
 #ifdef FIX_ALIGNMENT
@@ -168,9 +177,13 @@ copy_user_generic:
 	decl %ecx
 	jnz .Lloop_1
 
+	CFI_REMEMBER_STATE
 .Lende:
 	popq %rbx
+	CFI_ADJUST_CFA_OFFSET -8
+	CFI_RESTORE rbx
 	ret
+	CFI_RESTORE_STATE
 
 #ifdef FIX_ALIGNMENT
 	/* align destination */
@@ -261,6 +274,9 @@ copy_user_generic:
 .Le_zero:
 	movq %rdx,%rax
 	jmp .Lende
+	CFI_ENDPROC
+ENDPROC(copy_user_generic)
+
 
 	/* Some CPUs run faster using the string copy instructions.
 	   This is also a lot simpler. Use them when possible.
@@ -282,6 +298,7 @@ copy_user_generic:
   * this please consider this.
    */
 copy_user_generic_c:
+	CFI_STARTPROC
 	movl %edx,%ecx
 	shrl $3,%ecx
 	andl $7,%edx	
@@ -294,6 +311,8 @@ copy_user_generic_c:
 	ret
 3:	lea (%rdx,%rcx,8),%rax
 	ret
+	CFI_ENDPROC
+END(copy_user_generic_c)
 
 	.section __ex_table,"a"
 	.quad 1b,3b

diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
@@ -5,8 +5,9 @@
  * License.  See the file COPYING in the main directory of this archive
  * for more details. No warranty for anything given at all.
  */
- 	#include <linux/linkage.h>
-	#include <asm/errno.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/errno.h>
 
 /*
  * Checksum copy with exception handling.
@@ -53,19 +54,24 @@
 	.endm
 
 
-	.globl csum_partial_copy_generic
-	.p2align 4
-csum_partial_copy_generic:
+ENTRY(csum_partial_copy_generic)
+	CFI_STARTPROC
 	cmpl	 $3*64,%edx
 	jle	 .Lignore
 
 .Lignore:		
 	subq  $7*8,%rsp
+	CFI_ADJUST_CFA_OFFSET 7*8
 	movq  %rbx,2*8(%rsp)
+	CFI_REL_OFFSET rbx, 2*8
 	movq  %r12,3*8(%rsp)
+	CFI_REL_OFFSET r12, 3*8
 	movq  %r14,4*8(%rsp)
+	CFI_REL_OFFSET r14, 4*8
 	movq  %r13,5*8(%rsp)
+	CFI_REL_OFFSET r13, 5*8
 	movq  %rbp,6*8(%rsp)
+	CFI_REL_OFFSET rbp, 6*8
 
 	movq  %r8,(%rsp)
 	movq  %r9,1*8(%rsp)
@@ -208,14 +214,22 @@ csum_partial_copy_generic:
 	addl %ebx,%eax
 	adcl %r9d,%eax		/* carry */
 
+	CFI_REMEMBER_STATE
 .Lende:
 	movq 2*8(%rsp),%rbx
+	CFI_RESTORE rbx
 	movq 3*8(%rsp),%r12
+	CFI_RESTORE r12
 	movq 4*8(%rsp),%r14
+	CFI_RESTORE r14
 	movq 5*8(%rsp),%r13
+	CFI_RESTORE r13
 	movq 6*8(%rsp),%rbp
+	CFI_RESTORE rbp
 	addq $7*8,%rsp
+	CFI_ADJUST_CFA_OFFSET -7*8
 	ret
+	CFI_RESTORE_STATE
 
 	/* Exception handlers. Very simple, zeroing is done in the wrappers */
 .Lbad_source:
@@ -231,3 +245,5 @@ csum_partial_copy_generic:
 	jz   .Lende	
 	movl $-EFAULT,(%rax)
 	jmp .Lende
+	CFI_ENDPROC
+ENDPROC(csum_partial_copy_generic)