Skip to content

Commit

Permalink
[PATCH] annotate arch/x86_64/lib/*.S
Browse files Browse the repository at this point in the history
Add unwind annotations to arch/x86_64/lib/*.S, and also use the macros
provided by linux/linkage.h where-ever possible.

Some of the alternative instructions handling needed to be adjusted so
that the replacement code would also have valid unwind information.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
  • Loading branch information
Jan Beulich authored and Andi Kleen committed Sep 26, 2006
1 parent fb2e284 commit 8d379da
Show file tree
Hide file tree
Showing 9 changed files with 244 additions and 143 deletions.
47 changes: 28 additions & 19 deletions arch/x86_64/lib/clear_page.S
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>

/*
* Zero a page.
* rdi page
*/
.globl clear_page
.p2align 4
clear_page:
ALIGN
clear_page_c:
CFI_STARTPROC
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
ret
CFI_ENDPROC
ENDPROC(clear_page)

ENTRY(clear_page)
CFI_STARTPROC
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
Expand All @@ -23,28 +35,25 @@ clear_page:
jnz .Lloop
nop
ret
clear_page_end:
CFI_ENDPROC
.Lclear_page_end:
ENDPROC(clear_page)

/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */

#include <asm/cpufeature.h>

.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
.align 8
.quad clear_page
.quad clear_page_c
.byte X86_FEATURE_REP_GOOD
.byte clear_page_end-clear_page
.byte clear_page_c_end-clear_page_c
.previous

.section .altinstr_replacement,"ax"
clear_page_c:
movl $4096/8,%ecx
xorl %eax,%eax
rep
stosq
ret
clear_page_c_end:
.quad clear_page
.quad 1b
.byte X86_FEATURE_REP_GOOD
.byte .Lclear_page_end - clear_page
.byte 2b - 1b
.previous
53 changes: 36 additions & 17 deletions arch/x86_64/lib/copy_page.S
Original file line number Diff line number Diff line change
@@ -1,17 +1,33 @@
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */

#include <linux/config.h>
#include <linux/linkage.h>
#include <asm/dwarf2.h>

ALIGN
copy_page_c:
CFI_STARTPROC
movl $4096/8,%ecx
rep movsq
ret
CFI_ENDPROC
ENDPROC(copy_page_c)

/* Don't use streaming store because it's better when the target
ends up in cache. */

/* Could vary the prefetch distance based on SMP/UP */

.globl copy_page
.p2align 4
copy_page:
ENTRY(copy_page)
CFI_STARTPROC
subq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET 3*8
movq %rbx,(%rsp)
CFI_REL_OFFSET rbx, 0
movq %r12,1*8(%rsp)
CFI_REL_OFFSET r12, 1*8
movq %r13,2*8(%rsp)
CFI_REL_OFFSET r13, 2*8

movl $(4096/64)-5,%ecx
.p2align 4
Expand Down Expand Up @@ -72,30 +88,33 @@ copy_page:
jnz .Loop2

movq (%rsp),%rbx
CFI_RESTORE rbx
movq 1*8(%rsp),%r12
CFI_RESTORE r12
movq 2*8(%rsp),%r13
CFI_RESTORE r13
addq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET -3*8
ret
.Lcopy_page_end:
CFI_ENDPROC
ENDPROC(copy_page)

/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */

#include <asm/cpufeature.h>

.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
.align 8
.quad copy_page
.quad copy_page_c
.byte X86_FEATURE_REP_GOOD
.byte copy_page_c_end-copy_page_c
.byte copy_page_c_end-copy_page_c
.previous

.section .altinstr_replacement,"ax"
copy_page_c:
movl $4096/8,%ecx
rep
movsq
ret
copy_page_c_end:
.quad copy_page
.quad 1b
.byte X86_FEATURE_REP_GOOD
.byte .Lcopy_page_end - copy_page
.byte 2b - 1b
.previous
39 changes: 29 additions & 10 deletions arch/x86_64/lib/copy_user.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
* Functions to copy from and to user space.
*/

#include <linux/linkage.h>
#include <asm/dwarf2.h>

#define FIX_ALIGNMENT 1

#include <asm/current.h>
Expand All @@ -12,9 +15,8 @@
#include <asm/cpufeature.h>

/* Standard copy_to_user with segment limit checking */
.globl copy_to_user
.p2align 4
copy_to_user:
ENTRY(copy_to_user)
CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rdi,%rcx
addq %rdx,%rcx
Expand All @@ -25,9 +27,11 @@ copy_to_user:
.byte 0xe9 /* 32bit jump */
.long .Lcug-1f
1:
CFI_ENDPROC
ENDPROC(copy_to_user)

.section .altinstr_replacement,"ax"
3: .byte 0xe9 /* replacement jmp with 8 bit immediate */
3: .byte 0xe9 /* replacement jmp with 32 bit immediate */
.long copy_user_generic_c-1b /* offset */
.previous
.section .altinstructions,"a"
Expand All @@ -40,27 +44,31 @@ copy_to_user:
.previous

/* Standard copy_from_user with segment limit checking */
.globl copy_from_user
.p2align 4
copy_from_user:
ENTRY(copy_from_user)
CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rsi,%rcx
addq %rdx,%rcx
jc bad_from_user
cmpq threadinfo_addr_limit(%rax),%rcx
jae bad_from_user
/* FALL THROUGH to copy_user_generic */
CFI_ENDPROC
ENDPROC(copy_from_user)

.section .fixup,"ax"
/* must zero dest */
bad_from_user:
CFI_STARTPROC
movl %edx,%ecx
xorl %eax,%eax
rep
stosb
bad_to_user:
movl %edx,%eax
ret
CFI_ENDPROC
END(bad_from_user)
.previous


Expand All @@ -75,9 +83,8 @@ bad_to_user:
* Output:
* eax uncopied bytes or 0 if successful.
*/
.globl copy_user_generic
.p2align 4
copy_user_generic:
ENTRY(copy_user_generic)
CFI_STARTPROC
.byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
.byte 0x66,0x90
1:
Expand All @@ -95,6 +102,8 @@ copy_user_generic:
.previous
.Lcug:
pushq %rbx
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rbx, 0
xorl %eax,%eax /*zero for the exception handler */

#ifdef FIX_ALIGNMENT
Expand Down Expand Up @@ -168,9 +177,13 @@ copy_user_generic:
decl %ecx
jnz .Lloop_1

CFI_REMEMBER_STATE
.Lende:
popq %rbx
CFI_ADJUST_CFA_OFFSET -8
CFI_RESTORE rbx
ret
CFI_RESTORE_STATE

#ifdef FIX_ALIGNMENT
/* align destination */
Expand Down Expand Up @@ -261,6 +274,9 @@ copy_user_generic:
.Le_zero:
movq %rdx,%rax
jmp .Lende
CFI_ENDPROC
ENDPROC(copy_user_generic)


/* Some CPUs run faster using the string copy instructions.
This is also a lot simpler. Use them when possible.
Expand All @@ -282,6 +298,7 @@ copy_user_generic:
* this please consider this.
*/
copy_user_generic_c:
CFI_STARTPROC
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
Expand All @@ -294,6 +311,8 @@ copy_user_generic_c:
ret
3: lea (%rdx,%rcx,8),%rax
ret
CFI_ENDPROC
END(copy_user_generic_c)

.section __ex_table,"a"
.quad 1b,3b
Expand Down
26 changes: 21 additions & 5 deletions arch/x86_64/lib/csum-copy.S
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
* License. See the file COPYING in the main directory of this archive
* for more details. No warranty for anything given at all.
*/
#include <linux/linkage.h>
#include <asm/errno.h>
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/errno.h>

/*
* Checksum copy with exception handling.
Expand Down Expand Up @@ -53,19 +54,24 @@
.endm


.globl csum_partial_copy_generic
.p2align 4
csum_partial_copy_generic:
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
cmpl $3*64,%edx
jle .Lignore

.Lignore:
subq $7*8,%rsp
CFI_ADJUST_CFA_OFFSET 7*8
movq %rbx,2*8(%rsp)
CFI_REL_OFFSET rbx, 2*8
movq %r12,3*8(%rsp)
CFI_REL_OFFSET r12, 3*8
movq %r14,4*8(%rsp)
CFI_REL_OFFSET r14, 4*8
movq %r13,5*8(%rsp)
CFI_REL_OFFSET r13, 5*8
movq %rbp,6*8(%rsp)
CFI_REL_OFFSET rbp, 6*8

movq %r8,(%rsp)
movq %r9,1*8(%rsp)
Expand Down Expand Up @@ -208,14 +214,22 @@ csum_partial_copy_generic:
addl %ebx,%eax
adcl %r9d,%eax /* carry */

CFI_REMEMBER_STATE
.Lende:
movq 2*8(%rsp),%rbx
CFI_RESTORE rbx
movq 3*8(%rsp),%r12
CFI_RESTORE r12
movq 4*8(%rsp),%r14
CFI_RESTORE r14
movq 5*8(%rsp),%r13
CFI_RESTORE r13
movq 6*8(%rsp),%rbp
CFI_RESTORE rbp
addq $7*8,%rsp
CFI_ADJUST_CFA_OFFSET -7*8
ret
CFI_RESTORE_STATE

/* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source:
Expand All @@ -231,3 +245,5 @@ csum_partial_copy_generic:
jz .Lende
movl $-EFAULT,(%rax)
jmp .Lende
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
Loading

0 comments on commit 8d379da

Please sign in to comment.