From 8c0b5f148eccd0a4d9714e72f50f13c586af59ec Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Wed, 24 Feb 2010 15:44:29 -0800 Subject: [PATCH 01/31] Fix assertion in palloc and pvalloc as well. --- ChangeLog | 6 ++++++ malloc/malloc.c | 16 +++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index d541766e7e..5271d24ac5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2010-02-24 Ulrich Drepper + + [BZ #5553] + * malloc/malloc.c (public_vALLOc): Set ar_ptr when trying main_arena. + (public_pVALLOc): Likewise. + 2010-02-22 Jim Meyering * manual/math.texi (BSD Random): Fix a typo: s/are/is/ diff --git a/malloc/malloc.c b/malloc/malloc.c index b43e454f6e..9d60b7d173 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -1,5 +1,5 @@ /* Malloc implementation for multiple threads without lock contention. - Copyright (C) 1996-2006, 2007, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 1996-2009, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Wolfram Gloger and Doug Lea , 2001. @@ -3933,9 +3933,10 @@ public_vALLOc(size_t bytes) if(!p) { /* Maybe the failure is due to running out of mmapped areas. */ if(ar_ptr != &main_arena) { - (void)mutex_lock(&main_arena.mutex); - p = _int_memalign(&main_arena, pagesz, bytes); - (void)mutex_unlock(&main_arena.mutex); + ar_ptr = &main_arena; + (void)mutex_lock(&ar_ptr->mutex); + p = _int_memalign(&ar_ptr-> pagesz, bytes); + (void)mutex_unlock(&ar_ptr->mutex); } else { #if USE_ARENAS /* ... or sbrk() has failed and there is still a chance to mmap() */ @@ -3978,9 +3979,10 @@ public_pVALLOc(size_t bytes) if(!p) { /* Maybe the failure is due to running out of mmapped areas. */ if(ar_ptr != &main_arena) { - (void)mutex_lock(&main_arena.mutex); - p = _int_memalign(&main_arena, pagesz, rounded_bytes); - (void)mutex_unlock(&main_arena.mutex); + ar_ptr = &main_arena; + (void)mutex_lock(&ar_ptr->mutex); + p = _int_memalign(&ar_ptr-> pagesz, rounded_bytes); + (void)mutex_unlock(&ar_ptr->mutex); } else { #if USE_ARENAS /* ... or sbrk() has failed and there is still a chance to mmap() */ From 7ca890b88e6ab7624afb1742a9fffb37ad5b3fc3 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 24 Feb 2010 16:07:57 -0800 Subject: [PATCH 02/31] Fix reporting of I/O errors in *dprintf functions. --- ChangeLog | 5 +++++ libio/iovdprintf.c | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 5271d24ac5..25691cd6ec 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,13 @@ 2010-02-24 Ulrich Drepper + [BZ #11319] + * libio/iovdprintf.c (_IO_vdprintf): Explicitly flush stream before + undoing the stream because _IO_FINISH doesn't report failures. + [BZ #5553] * malloc/malloc.c (public_vALLOc): Set ar_ptr when trying main_arena. (public_pVALLOc): Likewise. + Patch by Petr Baudis. 2010-02-22 Jim Meyering diff --git a/libio/iovdprintf.c b/libio/iovdprintf.c index edab849a44..5284ff8938 100644 --- a/libio/iovdprintf.c +++ b/libio/iovdprintf.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995, 1997-2000, 2001, 2002, 2003, 2006 +/* Copyright (C) 1995, 1997-2000, 2001, 2002, 2003, 2006, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -60,6 +60,9 @@ _IO_vdprintf (d, format, arg) done = INTUSE(_IO_vfprintf) (&tmpfil.file, format, arg); + if (done != EOF && _IO_do_flush (&tmpfil.file) == EOF) + done = EOF; + _IO_FINISH (&tmpfil.file); return done; From cc50f1a4b458f769ceb72d88bb78c8429361fec1 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 24 Feb 2010 18:11:35 -0800 Subject: [PATCH 03/31] Fix issues in x86 memset-sse2.S/memset-sse2-rep.S --- ChangeLog | 15 +++++++++++++++ sysdeps/i386/i686/multiarch/memset-sse2-rep.S | 14 +++++++------- sysdeps/i386/i686/multiarch/memset-sse2.S | 19 +++++++++++-------- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index 25691cd6ec..26429c65c9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2010-02-24 H.J. Lu + + * sysdeps/i386/i686/multiarch/memset-sse2-rep.S: Remove redundant + punpcklbw. + Use unsigned conditional jumps. + (128bytesormore_nt): Renamed to ... + (128bytesormore_endof_L1): This. + Use add instead of lea if possible. + Correct unwind info. + * sysdeps/i386/i686/multiarch/memset-sse2.S: Remove redundant + punpcklbw. + Use unsigned conditional jumps. + Use add instead of lea if possible. + Correct unwind info. + 2010-02-24 Ulrich Drepper [BZ #11319] diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S index 84afffeb66..f9a0b13d0c 100644 --- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S +++ b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S @@ -243,7 +243,6 @@ L(32bytesormore): pxor %xmm0, %xmm0 #else movd %eax, %xmm0 - punpcklbw %xmm0, %xmm0 pshufd $0, %xmm0, %xmm0 #endif testl $0xf, %edx @@ -261,7 +260,7 @@ L(not_aligned_16): ALIGN (4) L(aligned_16): cmp $128, %ecx - jge L(128bytesormore) + jae L(128bytesormore) L(aligned_16_less128bytes): BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) @@ -293,7 +292,7 @@ L(128bytesormore): * fast string will prefetch and combine data efficiently. */ cmp %edi, %ecx - jae L(128bytesormore_nt) + jae L(128bytesormore_endof_L1) subl $128, %ecx L(128bytesormore_normal): sub $128, %ecx @@ -306,7 +305,7 @@ L(128bytesormore_normal): movdqa %xmm0, 0x60(%edx) movdqa %xmm0, 0x70(%edx) lea 128(%edx), %edx - jl L(128bytesless_normal) + jb L(128bytesless_normal) sub $128, %ecx @@ -319,15 +318,16 @@ L(128bytesormore_normal): movdqa %xmm0, 0x60(%edx) movdqa %xmm0, 0x70(%edx) lea 128(%edx), %edx - jge L(128bytesormore_normal) + jae L(128bytesormore_normal) L(128bytesless_normal): POP (%edi) - lea 128(%ecx), %ecx + add $128, %ecx BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) + CFI_PUSH (%edi) ALIGN (4) -L(128bytesormore_nt): +L(128bytesormore_endof_L1): mov %edx, %edi mov %ecx, %edx shr $2, %ecx diff --git a/sysdeps/i386/i686/multiarch/memset-sse2.S b/sysdeps/i386/i686/multiarch/memset-sse2.S index b2b979193e..92ad601bf2 100644 --- a/sysdeps/i386/i686/multiarch/memset-sse2.S +++ b/sysdeps/i386/i686/multiarch/memset-sse2.S @@ -243,7 +243,6 @@ L(32bytesormore): pxor %xmm0, %xmm0 #else movd %eax, %xmm0 - punpcklbw %xmm0, %xmm0 pshufd $0, %xmm0, %xmm0 #endif testl $0xf, %edx @@ -261,7 +260,7 @@ L(not_aligned_16): ALIGN (4) L(aligned_16): cmp $128, %ecx - jge L(128bytesormore) + jae L(128bytesormore) L(aligned_16_less128bytes): BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) @@ -287,14 +286,17 @@ L(128bytesormore): #ifdef DATA_CACHE_SIZE POP (%ebx) +# define RESTORE_EBX_STATE CFI_PUSH (%ebx) cmp $DATA_CACHE_SIZE, %ecx #else # ifdef SHARED +# define RESTORE_EBX_STATE call __i686.get_pc_thunk.bx add $_GLOBAL_OFFSET_TABLE_, %ebx cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx # else POP (%ebx) +# define RESTORE_EBX_STATE CFI_PUSH (%ebx) cmp __x86_data_cache_size, %ecx # endif #endif @@ -312,7 +314,7 @@ L(128bytesormore_normal): movdqa %xmm0, 0x60(%edx) movdqa %xmm0, 0x70(%edx) lea 128(%edx), %edx - jl L(128bytesless_normal) + jb L(128bytesless_normal) sub $128, %ecx @@ -325,10 +327,10 @@ L(128bytesormore_normal): movdqa %xmm0, 0x60(%edx) movdqa %xmm0, 0x70(%edx) lea 128(%edx), %edx - jge L(128bytesormore_normal) + jae L(128bytesormore_normal) L(128bytesless_normal): - lea 128(%ecx), %ecx + add $128, %ecx BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) ALIGN (4) @@ -346,11 +348,12 @@ L(128bytes_L2_normal): movaps %xmm0, 0x70(%edx) add $128, %edx cmp $128, %ecx - jge L(128bytes_L2_normal) + jae L(128bytes_L2_normal) L(128bytesless_L2_normal): BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) + RESTORE_EBX_STATE L(128bytesormore_nt_start): sub %ebx, %ecx ALIGN (4) @@ -368,7 +371,7 @@ L(128bytesormore_shared_cache_loop): movdqa %xmm0, 0x70(%edx) add $0x80, %edx cmp $0x80, %ebx - jge L(128bytesormore_shared_cache_loop) + jae L(128bytesormore_shared_cache_loop) cmp $0x80, %ecx jb L(shared_cache_loop_end) ALIGN (4) @@ -384,7 +387,7 @@ L(128bytesormore_nt): movntdq %xmm0, 0x70(%edx) add $0x80, %edx cmp $0x80, %ecx - jge L(128bytesormore_nt) + jae L(128bytesormore_nt) sfence L(shared_cache_loop_end): #if defined DATA_CACHE_SIZE || !defined SHARED From a0ac24d98ace90d1ccba6a2f3e7d55600f2fdb6e Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 24 Feb 2010 18:20:57 -0800 Subject: [PATCH 04/31] Fix issues in x86 memcpy-ssse3.S --- ChangeLog | 4 + sysdeps/i386/i686/multiarch/memcpy-ssse3.S | 113 +++++++++++++-------- 2 files changed, 77 insertions(+), 40 deletions(-) diff --git a/ChangeLog b/ChangeLog index 26429c65c9..2932b053b3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2010-02-24 H.J. Lu + * sysdeps/i386/i686/multiarch/memcpy-ssse3.S: Use unsigned + conditional jumps. + Correct unwind info. + * sysdeps/i386/i686/multiarch/memset-sse2-rep.S: Remove redundant punpcklbw. Use unsigned conditional jumps. diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S index 749c82d379..ec9eeb95e4 100644 --- a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S +++ b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S @@ -128,7 +128,7 @@ ENTRY (MEMCPY) jb L(copy_forward) je L(fwd_write_0bytes) cmp $32, %ecx - jge L(memmove_bwd) + jae L(memmove_bwd) jmp L(bk_write_less32bytes_2) L(memmove_bwd): add %ecx, %eax @@ -139,12 +139,12 @@ L(memmove_bwd): L(copy_forward): #endif cmp $48, %ecx - jge L(48bytesormore) + jae L(48bytesormore) L(fwd_write_less32bytes): #ifndef USE_AS_MEMMOVE cmp %dl, %al - jl L(bk_write) + jb L(bk_write) #endif add %ecx, %edx add %ecx, %eax @@ -162,6 +162,7 @@ L(48bytesormore): movl %edx, %edi and $-16, %edx PUSH (%esi) + cfi_remember_state add $16, %edx movl %edi, %esi sub %edx, %edi @@ -181,12 +182,14 @@ L(48bytesormore): #endif mov %eax, %edi - jge L(large_page) + jae L(large_page) and $0xf, %edi jz L(shl_0) BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_0): movdqu %xmm0, (%esi) @@ -202,7 +205,7 @@ L(shl_0_loop): movdqa %xmm0, (%edx, %edi) movdqa %xmm1, 16(%edx, %edi) lea 32(%edi), %edi - jl L(shl_0_end) + jb L(shl_0_end) movdqa (%eax, %edi), %xmm0 movdqa 16(%eax, %edi), %xmm1 @@ -210,7 +213,7 @@ L(shl_0_loop): movdqa %xmm0, (%edx, %edi) movdqa %xmm1, 16(%edx, %edi) lea 32(%edi), %edi - jl L(shl_0_end) + jb L(shl_0_end) movdqa (%eax, %edi), %xmm0 movdqa 16(%eax, %edi), %xmm1 @@ -218,7 +221,7 @@ L(shl_0_loop): movdqa %xmm0, (%edx, %edi) movdqa %xmm1, 16(%edx, %edi) lea 32(%edi), %edi - jl L(shl_0_end) + jb L(shl_0_end) movdqa (%eax, %edi), %xmm0 movdqa 16(%eax, %edi), %xmm1 @@ -234,6 +237,7 @@ L(shl_0_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + CFI_PUSH (%edi) L(shl_0_gobble): #ifdef DATA_CACHE_SIZE_HALF @@ -250,7 +254,7 @@ L(shl_0_gobble): POP (%edi) lea -128(%ecx), %ecx - jge L(shl_0_gobble_mem_loop) + jae L(shl_0_gobble_mem_loop) L(shl_0_gobble_cache_loop): movdqa (%eax), %xmm0 movdqa 0x10(%eax), %xmm1 @@ -272,8 +276,7 @@ L(shl_0_gobble_cache_loop): movdqa %xmm7, 0x70(%edx) lea 0x80(%edx), %edx - jge L(shl_0_gobble_cache_loop) -L(shl_0_gobble_cache_loop_tail): + jae L(shl_0_gobble_cache_loop) cmp $-0x40, %ecx lea 0x80(%ecx), %ecx jl L(shl_0_cache_less_64bytes) @@ -294,7 +297,7 @@ L(shl_0_gobble_cache_loop_tail): add $0x40, %edx L(shl_0_cache_less_64bytes): cmp $0x20, %ecx - jl L(shl_0_cache_less_32bytes) + jb L(shl_0_cache_less_32bytes) movdqa (%eax), %xmm0 sub $0x20, %ecx movdqa 0x10(%eax), %xmm1 @@ -304,7 +307,7 @@ L(shl_0_cache_less_64bytes): add $0x20, %edx L(shl_0_cache_less_32bytes): cmp $0x10, %ecx - jl L(shl_0_cache_less_16bytes) + jb L(shl_0_cache_less_16bytes) sub $0x10, %ecx movdqa (%eax), %xmm0 add $0x10, %eax @@ -342,7 +345,7 @@ L(shl_0_gobble_mem_loop): movdqa %xmm7, 0x70(%edx) lea 0x80(%edx), %edx - jge L(shl_0_gobble_mem_loop) + jae L(shl_0_gobble_mem_loop) cmp $-0x40, %ecx lea 0x80(%ecx), %ecx jl L(shl_0_mem_less_64bytes) @@ -363,7 +366,7 @@ L(shl_0_gobble_mem_loop): add $0x40, %edx L(shl_0_mem_less_64bytes): cmp $0x20, %ecx - jl L(shl_0_mem_less_32bytes) + jb L(shl_0_mem_less_32bytes) movdqa (%eax), %xmm0 sub $0x20, %ecx movdqa 0x10(%eax), %xmm1 @@ -373,7 +376,7 @@ L(shl_0_mem_less_64bytes): add $0x20, %edx L(shl_0_mem_less_32bytes): cmp $0x10, %ecx - jl L(shl_0_mem_less_16bytes) + jb L(shl_0_mem_less_16bytes) sub $0x10, %ecx movdqa (%eax), %xmm0 add $0x10, %eax @@ -384,7 +387,8 @@ L(shl_0_mem_less_16bytes): add %ecx, %eax BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_1): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -406,7 +410,7 @@ L(shl_1_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_1_end) + jb L(shl_1_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -428,6 +432,8 @@ L(shl_1_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_2): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -449,7 +455,7 @@ L(shl_2_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_2_end) + jb L(shl_2_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -471,6 +477,8 @@ L(shl_2_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_3): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -492,7 +500,7 @@ L(shl_3_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_3_end) + jb L(shl_3_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -514,6 +522,8 @@ L(shl_3_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_4): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -535,7 +545,7 @@ L(shl_4_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_4_end) + jb L(shl_4_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -557,6 +567,8 @@ L(shl_4_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_5): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -578,7 +590,7 @@ L(shl_5_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_5_end) + jb L(shl_5_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -600,7 +612,8 @@ L(shl_5_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_6): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -622,7 +635,7 @@ L(shl_6_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_6_end) + jb L(shl_6_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -644,6 +657,8 @@ L(shl_6_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_7): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -665,7 +680,7 @@ L(shl_7_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_7_end) + jb L(shl_7_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -687,6 +702,8 @@ L(shl_7_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_8): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -708,7 +725,7 @@ L(shl_8_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_8_end) + jb L(shl_8_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -730,6 +747,8 @@ L(shl_8_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_9): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -751,7 +770,7 @@ L(shl_9_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_9_end) + jb L(shl_9_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -773,6 +792,8 @@ L(shl_9_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_10): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -794,7 +815,7 @@ L(shl_10_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_10_end) + jb L(shl_10_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -816,6 +837,8 @@ L(shl_10_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_11): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -837,7 +860,7 @@ L(shl_11_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_11_end) + jb L(shl_11_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -859,6 +882,8 @@ L(shl_11_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_12): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -880,7 +905,7 @@ L(shl_12_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_12_end) + jb L(shl_12_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -902,6 +927,8 @@ L(shl_12_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_13): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -923,7 +950,7 @@ L(shl_13_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_13_end) + jb L(shl_13_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -945,6 +972,8 @@ L(shl_13_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_14): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -966,7 +995,7 @@ L(shl_14_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_14_end) + jb L(shl_14_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -988,7 +1017,8 @@ L(shl_14_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_15): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -1010,7 +1040,7 @@ L(shl_15_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_15_end) + jb L(shl_15_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -1229,8 +1259,10 @@ L(fwd_write_3bytes): movl DEST(%esp), %eax # endif #endif - RETURN + RETURN_END + cfi_restore_state + cfi_remember_state ALIGN (4) L(large_page): movdqu (%eax), %xmm1 @@ -1281,7 +1313,7 @@ L(large_page_loop): sub $0x40, %ecx L(large_page_less_64bytes): cmp $32, %ecx - jl L(large_page_less_32bytes) + jb L(large_page_less_32bytes) movdqu (%eax), %xmm0 movdqu 0x10(%eax), %xmm1 lea 0x20(%eax), %eax @@ -1617,11 +1649,11 @@ L(copy_backward): L(bk_aligned_4): cmp $64, %ecx - jge L(bk_write_more64bytes) + jae L(bk_write_more64bytes) L(bk_write_64bytesless): cmp $32, %ecx - jl L(bk_write_less32bytes) + jb L(bk_write_less32bytes) L(bk_write_more32bytes): /* Copy 32 bytes at a time. */ @@ -1653,10 +1685,11 @@ L(bk_write_less32bytes): L(bk_write_less32bytes_2): BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) + CFI_PUSH (%esi) ALIGN (4) L(bk_align): cmp $8, %ecx - jle L(bk_write_less32bytes) + jbe L(bk_write_less32bytes) testl $1, %edx /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, then (EDX & 2) must be != 0. */ @@ -1712,7 +1745,7 @@ L(bk_ssse3_align): L(bk_ssse3_cpy_pre): cmp $64, %ecx - jl L(bk_write_more32bytes) + jb L(bk_write_more32bytes) L(bk_ssse3_cpy): sub $64, %esi @@ -1727,7 +1760,7 @@ L(bk_ssse3_cpy): movdqu (%esi), %xmm0 movdqa %xmm0, (%edx) cmp $64, %ecx - jge L(bk_ssse3_cpy) + jae L(bk_ssse3_cpy) jmp L(bk_write_64bytesless) #endif From 3093e0c713306755b364e59393e2ca18706d8a47 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 24 Feb 2010 18:26:30 -0800 Subject: [PATCH 05/31] Fix issues in x86 memcpy-ssse3-rep.S --- ChangeLog | 9 + .../i386/i686/multiarch/memcpy-ssse3-rep.S | 246 ++++++++++-------- 2 files changed, 149 insertions(+), 106 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2932b053b3..ec890ead08 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,14 @@ 2010-02-24 H.J. Lu + * sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S + (bk_write_less32bytes_2): Renamed to ... + (bk_write_less48bytes): This. + Use unsigned conditional jumps. + Correct unwind info. + Use add/sub instead of lea if possible. + (shl_0_gobble_cache_loop_tail): Removed. + (large_page): Properly adjust ECX. + * sysdeps/i386/i686/multiarch/memcpy-ssse3.S: Use unsigned conditional jumps. Correct unwind info. diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S index b26037d279..48a109ccd6 100644 --- a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S +++ b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S @@ -127,10 +127,8 @@ ENTRY (MEMCPY) cmp %eax, %edx jb L(copy_forward) je L(fwd_write_0bytes) - cmp $32, %ecx - jge L(memmove_bwd) - jmp L(bk_write_less32bytes_2) -L(memmove_bwd): + cmp $48, %ecx + jb L(bk_write_less48bytes) add %ecx, %eax cmp %eax, %edx movl SRC(%esp), %eax @@ -139,12 +137,12 @@ L(memmove_bwd): L(copy_forward): #endif cmp $48, %ecx - jge L(48bytesormore) + jae L(48bytesormore) L(fwd_write_less32bytes): #ifndef USE_AS_MEMMOVE cmp %dl, %al - jl L(bk_write) + jb L(bk_write) #endif add %ecx, %edx add %ecx, %eax @@ -162,6 +160,7 @@ L(48bytesormore): movl %edx, %edi and $-16, %edx PUSH (%esi) + cfi_remember_state add $16, %edx movl %edi, %esi sub %edx, %edi @@ -181,7 +180,7 @@ L(48bytesormore): #endif mov %eax, %edi - jge L(large_page) + jae L(large_page) and $0xf, %edi jz L(shl_0) @@ -201,7 +200,7 @@ L(shl_0_loop): movdqa %xmm0, (%edx, %edi) movdqa %xmm1, 16(%edx, %edi) lea 32(%edi), %edi - jl L(shl_0_end) + jb L(shl_0_end) movdqa (%eax, %edi), %xmm0 movdqa 16(%eax, %edi), %xmm1 @@ -209,7 +208,7 @@ L(shl_0_loop): movdqa %xmm0, (%edx, %edi) movdqa %xmm1, 16(%edx, %edi) lea 32(%edi), %edi - jl L(shl_0_end) + jb L(shl_0_end) movdqa (%eax, %edi), %xmm0 movdqa 16(%eax, %edi), %xmm1 @@ -217,7 +216,7 @@ L(shl_0_loop): movdqa %xmm0, (%edx, %edi) movdqa %xmm1, 16(%edx, %edi) lea 32(%edi), %edi - jl L(shl_0_end) + jb L(shl_0_end) movdqa (%eax, %edi), %xmm0 movdqa 16(%eax, %edi), %xmm1 @@ -234,6 +233,8 @@ L(shl_0_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state L(shl_0_gobble): #ifdef DATA_CACHE_SIZE_HALF @@ -251,8 +252,8 @@ L(shl_0_gobble): shr $3, %esi sub %esi, %edi cmp %edi, %ecx - jge L(shl_0_gobble_mem_start) - lea -128(%ecx), %ecx + jae L(shl_0_gobble_mem_start) + sub $128, %ecx ALIGN (4) L(shl_0_gobble_cache_loop): movdqa (%eax), %xmm0 @@ -275,11 +276,10 @@ L(shl_0_gobble_cache_loop): movaps %xmm7, 0x70(%edx) lea 0x80(%edx), %edx - jge L(shl_0_gobble_cache_loop) -L(shl_0_gobble_cache_loop_tail): - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(shl_0_cache_less_64bytes) + jae L(shl_0_gobble_cache_loop) + add $0x80, %ecx + cmp $0x40, %ecx + jb L(shl_0_cache_less_64bytes) movdqa (%eax), %xmm0 sub $0x40, %ecx @@ -297,7 +297,7 @@ L(shl_0_gobble_cache_loop_tail): add $0x40, %edx L(shl_0_cache_less_64bytes): cmp $0x20, %ecx - jl L(shl_0_cache_less_32bytes) + jb L(shl_0_cache_less_32bytes) movdqa (%eax), %xmm0 sub $0x20, %ecx movdqa 0x10(%eax), %xmm1 @@ -307,7 +307,7 @@ L(shl_0_cache_less_64bytes): add $0x20, %edx L(shl_0_cache_less_32bytes): cmp $0x10, %ecx - jl L(shl_0_cache_less_16bytes) + jb L(shl_0_cache_less_16bytes) sub $0x10, %ecx movdqa (%eax), %xmm0 add $0x10, %eax @@ -320,12 +320,13 @@ L(shl_0_cache_less_16bytes): POP (%edi) BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_0_gobble_mem_start): cmp %al, %dl je L(copy_page_by_rep) - lea -128(%ecx), %ecx + sub $128, %ecx L(shl_0_gobble_mem_loop): prefetchnta 0x1c0(%eax) prefetchnta 0x280(%eax) @@ -352,10 +353,10 @@ L(shl_0_gobble_mem_loop): movaps %xmm7, 0x70(%edx) lea 0x80(%edx), %edx - jge L(shl_0_gobble_mem_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(shl_0_mem_less_64bytes) + jae L(shl_0_gobble_mem_loop) + add $0x80, %ecx + cmp $0x40, %ecx + jb L(shl_0_mem_less_64bytes) movdqa (%eax), %xmm0 sub $0x40, %ecx @@ -373,7 +374,7 @@ L(shl_0_gobble_mem_loop): add $0x40, %edx L(shl_0_mem_less_64bytes): cmp $0x20, %ecx - jl L(shl_0_mem_less_32bytes) + jb L(shl_0_mem_less_32bytes) movdqa (%eax), %xmm0 sub $0x20, %ecx movdqa 0x10(%eax), %xmm1 @@ -383,7 +384,7 @@ L(shl_0_mem_less_64bytes): add $0x20, %edx L(shl_0_mem_less_32bytes): cmp $0x10, %ecx - jl L(shl_0_mem_less_16bytes) + jb L(shl_0_mem_less_16bytes) sub $0x10, %ecx movdqa (%eax), %xmm0 add $0x10, %eax @@ -396,14 +397,15 @@ L(shl_0_mem_less_16bytes): POP (%edi) BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_1): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -1(%eax), %eax + sub $1, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_1_loop): @@ -418,7 +420,7 @@ L(shl_1_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_1_end) + jb L(shl_1_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -433,20 +435,22 @@ L(shl_1_loop): jae L(shl_1_loop) L(shl_1_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 1(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_2): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -2(%eax), %eax + sub $2, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_2_loop): @@ -461,7 +465,7 @@ L(shl_2_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_2_end) + jb L(shl_2_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -476,20 +480,22 @@ L(shl_2_loop): jae L(shl_2_loop) L(shl_2_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 2(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_3): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -3(%eax), %eax + sub $3, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_3_loop): @@ -504,7 +510,7 @@ L(shl_3_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_3_end) + jb L(shl_3_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -519,20 +525,22 @@ L(shl_3_loop): jae L(shl_3_loop) L(shl_3_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 3(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_4): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -4(%eax), %eax + sub $4, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_4_loop): @@ -547,7 +555,7 @@ L(shl_4_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_4_end) + jb L(shl_4_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -562,20 +570,22 @@ L(shl_4_loop): jae L(shl_4_loop) L(shl_4_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 4(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_5): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -5(%eax), %eax + sub $5, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_5_loop): @@ -590,7 +600,7 @@ L(shl_5_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_5_end) + jb L(shl_5_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -605,21 +615,22 @@ L(shl_5_loop): jae L(shl_5_loop) L(shl_5_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 5(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_6): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -6(%eax), %eax + sub $6, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_6_loop): @@ -634,7 +645,7 @@ L(shl_6_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_6_end) + jb L(shl_6_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -649,20 +660,22 @@ L(shl_6_loop): jae L(shl_6_loop) L(shl_6_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 6(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_7): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -7(%eax), %eax + sub $7, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_7_loop): @@ -677,7 +690,7 @@ L(shl_7_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_7_end) + jb L(shl_7_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -692,20 +705,22 @@ L(shl_7_loop): jae L(shl_7_loop) L(shl_7_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 7(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_8): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -8(%eax), %eax + sub $8, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_8_loop): @@ -720,7 +735,7 @@ L(shl_8_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_8_end) + jb L(shl_8_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -735,20 +750,22 @@ L(shl_8_loop): jae L(shl_8_loop) L(shl_8_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 8(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_9): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -9(%eax), %eax + sub $9, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_9_loop): @@ -763,7 +780,7 @@ L(shl_9_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_9_end) + jb L(shl_9_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -778,20 +795,22 @@ L(shl_9_loop): jae L(shl_9_loop) L(shl_9_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 9(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_10): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -10(%eax), %eax + sub $10, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_10_loop): @@ -806,7 +825,7 @@ L(shl_10_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_10_end) + jb L(shl_10_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -821,20 +840,22 @@ L(shl_10_loop): jae L(shl_10_loop) L(shl_10_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 10(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_11): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -11(%eax), %eax + sub $11, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_11_loop): @@ -849,7 +870,7 @@ L(shl_11_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_11_end) + jb L(shl_11_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -864,20 +885,22 @@ L(shl_11_loop): jae L(shl_11_loop) L(shl_11_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 11(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_12): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -12(%eax), %eax + sub $12, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_12_loop): @@ -892,7 +915,7 @@ L(shl_12_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_12_end) + jb L(shl_12_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -907,20 +930,22 @@ L(shl_12_loop): jae L(shl_12_loop) L(shl_12_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 12(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_13): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -13(%eax), %eax + sub $13, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_13_loop): @@ -935,7 +960,7 @@ L(shl_13_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_13_end) + jb L(shl_13_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -950,20 +975,22 @@ L(shl_13_loop): jae L(shl_13_loop) L(shl_13_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 13(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_14): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -14(%eax), %eax + sub $14, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_14_loop): @@ -978,7 +1005,7 @@ L(shl_14_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_14_end) + jb L(shl_14_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -993,21 +1020,22 @@ L(shl_14_loop): jae L(shl_14_loop) L(shl_14_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 14(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_15): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -15(%eax), %eax + sub $15, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_15_loop): @@ -1022,7 +1050,7 @@ L(shl_15_loop): movdqa %xmm2, -32(%edx, %edi) movdqa %xmm3, -16(%edx, %edi) - jl L(shl_15_end) + jb L(shl_15_end) movdqa 16(%eax, %edi), %xmm2 sub $32, %ecx @@ -1037,7 +1065,7 @@ L(shl_15_loop): jae L(shl_15_loop) L(shl_15_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 15(%edi, %eax), %eax @@ -1241,20 +1269,23 @@ L(fwd_write_3bytes): movl DEST(%esp), %eax # endif #endif - RETURN + RETURN_END + cfi_restore_state + cfi_remember_state ALIGN (4) L(large_page): movdqu (%eax), %xmm1 - lea 16(%eax), %eax movdqu %xmm0, (%esi) movntdq %xmm1, (%edx) - lea 16(%edx), %edx + add $0x10, %eax + add $0x10, %edx + sub $0x10, %ecx cmp %al, %dl je L(copy_page_by_rep) L(large_page_loop_init): POP (%esi) - lea -0x90(%ecx), %ecx + sub $0x80, %ecx POP (%edi) L(large_page_loop): prefetchnta 0x1c0(%eax) @@ -1280,9 +1311,9 @@ L(large_page_loop): movntdq %xmm7, 0x70(%edx) lea 0x80(%edx), %edx jae L(large_page_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(large_page_less_64bytes) + add $0x80, %ecx + cmp $0x40, %ecx + jb L(large_page_less_64bytes) movdqu (%eax), %xmm0 movdqu 0x10(%eax), %xmm1 @@ -1298,7 +1329,7 @@ L(large_page_loop): sub $0x40, %ecx L(large_page_less_64bytes): cmp $32, %ecx - jl L(large_page_less_32bytes) + jb L(large_page_less_32bytes) movdqu (%eax), %xmm0 movdqu 0x10(%eax), %xmm1 lea 0x20(%eax), %eax @@ -1312,6 +1343,8 @@ L(large_page_less_32bytes): sfence BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(copy_page_by_rep): mov %eax, %esi @@ -1658,18 +1691,18 @@ L(table_48_bytes_bwd): L(copy_backward): PUSH (%esi) movl %eax, %esi - lea (%ecx,%edx,1),%edx - lea (%ecx,%esi,1),%esi + add %ecx, %edx + add %ecx, %esi testl $0x3, %edx jnz L(bk_align) L(bk_aligned_4): cmp $64, %ecx - jge L(bk_write_more64bytes) + jae L(bk_write_more64bytes) L(bk_write_64bytesless): cmp $32, %ecx - jl L(bk_write_less32bytes) + jb L(bk_write_less32bytes) L(bk_write_more32bytes): /* Copy 32 bytes at a time. */ @@ -1698,13 +1731,14 @@ L(bk_write_less32bytes): sub %ecx, %edx sub %ecx, %eax POP (%esi) -L(bk_write_less32bytes_2): +L(bk_write_less48bytes): BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) + CFI_PUSH (%esi) ALIGN (4) L(bk_align): cmp $8, %ecx - jle L(bk_write_less32bytes) + jbe L(bk_write_less32bytes) testl $1, %edx /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, then (EDX & 2) must be != 0. */ @@ -1760,7 +1794,7 @@ L(bk_ssse3_align): L(bk_ssse3_cpy_pre): cmp $64, %ecx - jl L(bk_write_more32bytes) + jb L(bk_write_more32bytes) L(bk_ssse3_cpy): sub $64, %esi @@ -1775,7 +1809,7 @@ L(bk_ssse3_cpy): movdqu (%esi), %xmm0 movdqa %xmm0, (%edx) cmp $64, %ecx - jge L(bk_ssse3_cpy) + jae L(bk_ssse3_cpy) jmp L(bk_write_64bytesless) #endif From 844c394a0501e4f46cb0496ddeaf9643ae2c1ebb Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 24 Feb 2010 19:27:24 -0800 Subject: [PATCH 06/31] Fix comment. --- elf/dl-load.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/elf/dl-load.c b/elf/dl-load.c index 597193c043..e8c7be55f7 100644 --- a/elf/dl-load.c +++ b/elf/dl-load.c @@ -1,5 +1,5 @@ /* Map in a shared object's segments from the file. - Copyright (C) 1995-2005, 2006, 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 1995-2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -313,7 +313,7 @@ static char * expand_dynamic_string_token (struct link_map *l, const char *s) { /* We make two runs over the string. First we determine how large the - resulting string is and then we copy it over. Since this is now + resulting string is and then we copy it over. Since this is no frequently executed operation we are looking here not for performance but rather for code size. */ size_t cnt; @@ -391,7 +391,7 @@ fillin_rpath (char *rpath, struct r_search_path_elem **result, const char *sep, size_t len = strlen (cp); /* `strsep' can pass an empty string. This has to be - interpreted as `use the current directory'. */ + interpreted as `use the current directory'. */ if (len == 0) { static const char curwd[] = "./"; @@ -1519,7 +1519,7 @@ cannot enable executable stack as shared object requires"); /* Print search path. */ static void print_search_path (struct r_search_path_elem **list, - const char *what, const char *name) + const char *what, const char *name) { char buf[max_dirnamelen + max_capstrlen]; int first = 1; @@ -2044,7 +2044,7 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded, fd = -1; /* When the object has the RUNPATH information we don't use any - RPATHs. */ + RPATHs. */ if (loader == NULL || loader->l_info[DT_RUNPATH] == NULL) { /* This is the executable's map (if there is one). Make sure that @@ -2067,7 +2067,7 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded, } /* If dynamically linked, try the DT_RPATH of the executable - itself. NB: we do this for lookups in any namespace. */ + itself. NB: we do this for lookups in any namespace. */ if (fd == -1 && !did_main_map && main_map != NULL && main_map->l_type != lt_loaded && cache_rpath (main_map, &main_map->l_rpath_dirs, DT_RPATH, @@ -2164,7 +2164,7 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded, /* Add another newline when we are tracing the library loading. */ if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_LIBS, 0)) - _dl_debug_printf ("\n"); + _dl_debug_printf ("\n"); } else { From 9d2569846c520933546353856aafea86a80d9638 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 24 Feb 2010 19:47:09 -0800 Subject: [PATCH 07/31] Fix typos I added in malloc.c --- malloc/malloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/malloc/malloc.c b/malloc/malloc.c index 9d60b7d173..763852ea3b 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -3935,7 +3935,7 @@ public_vALLOc(size_t bytes) if(ar_ptr != &main_arena) { ar_ptr = &main_arena; (void)mutex_lock(&ar_ptr->mutex); - p = _int_memalign(&ar_ptr-> pagesz, bytes); + p = _int_memalign(ar_ptr, pagesz, bytes); (void)mutex_unlock(&ar_ptr->mutex); } else { #if USE_ARENAS @@ -3981,7 +3981,7 @@ public_pVALLOc(size_t bytes) if(ar_ptr != &main_arena) { ar_ptr = &main_arena; (void)mutex_lock(&ar_ptr->mutex); - p = _int_memalign(&ar_ptr-> pagesz, rounded_bytes); + p = _int_memalign(ar_ptr, pagesz, rounded_bytes); (void)mutex_unlock(&ar_ptr->mutex); } else { #if USE_ARENAS From 4a1297d761c3754ca2643ab6cd9e9cf952d77cf5 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 24 Feb 2010 20:00:30 -0800 Subject: [PATCH 08/31] We can use the 64-bit register versions of the double functions. --- ChangeLog | 4 ++++ sysdeps/x86_64/Implies | 1 + 2 files changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index ec890ead08..dc3c8d7347 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2010-02-24 Ulrich Drepper + + * sysdeps/x86_64/Implies: Add ieee754/dbl-64/wordsize-64 entry. + 2010-02-24 H.J. Lu * sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S diff --git a/sysdeps/x86_64/Implies b/sysdeps/x86_64/Implies index 2b8412b0b6..2e0a323e13 100644 --- a/sysdeps/x86_64/Implies +++ b/sysdeps/x86_64/Implies @@ -1,4 +1,5 @@ wordsize-64 ieee754/ldbl-96 +ieee754/dbl-64/wordsize-64 ieee754/dbl-64 ieee754/flt-32 From dbcaf07c326e18b14d19aebe011b9ffbe4a45972 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Feb 2010 20:12:29 -0800 Subject: [PATCH 09/31] sparc: Reimplement 64-bit aligned copy routines and remove from memcpy files. 2010-02-25 David S. Miller * sysdeps/sparc/sparc64/Makefile: Add align-cpy rule. * sysdeps/sparc/sparc64/align-cpy.S: New. * sysdeps/sparc/sparc64/memcpy.S (__align_cpy_1, __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16): Remove. * sysdeps/sparc/sparc64/sparcv9b/memcpy.S (__align_cpy_1, __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16): Remove. * sysdeps/sparc/sparc64/sparcv9v/memcpy.S (__align_cpy_1, __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16): Remove. * sysdeps/sparc/sparc64/sparcv9v2/memcpy.S (__align_cpy_1, __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16): Remove. --- ChangeLog | 16 +++++ sysdeps/sparc/sparc64/Makefile | 4 ++ sysdeps/sparc/sparc64/align-cpy.S | 85 ++++++++++++++++++++++++ sysdeps/sparc/sparc64/memcpy.S | 63 ------------------ sysdeps/sparc/sparc64/sparcv9b/memcpy.S | 7 -- sysdeps/sparc/sparc64/sparcv9v/memcpy.S | 7 -- sysdeps/sparc/sparc64/sparcv9v2/memcpy.S | 7 -- 7 files changed, 105 insertions(+), 84 deletions(-) create mode 100644 sysdeps/sparc/sparc64/align-cpy.S diff --git a/ChangeLog b/ChangeLog index ac36832f8e..1f49dbd8c6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +2010-02-25 David S. Miller + + * sysdeps/sparc/sparc64/Makefile: Add align-cpy rule. + * sysdeps/sparc/sparc64/align-cpy.S: New. + * sysdeps/sparc/sparc64/memcpy.S (__align_cpy_1, __align_cpy_2, + __align_cpy_4, __align_cpy_8, __align_cpy_16): Remove. + * sysdeps/sparc/sparc64/sparcv9b/memcpy.S (__align_cpy_1, + __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16): + Remove. + * sysdeps/sparc/sparc64/sparcv9v/memcpy.S (__align_cpy_1, + __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16): + Remove. + * sysdeps/sparc/sparc64/sparcv9v2/memcpy.S (__align_cpy_1, + __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16): + Remove. + 2009-02-20 David S. Miller * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_runtime_setup): diff --git a/sysdeps/sparc/sparc64/Makefile b/sysdeps/sparc/sparc64/Makefile index 3bb0238832..1a859dffc0 100644 --- a/sysdeps/sparc/sparc64/Makefile +++ b/sysdeps/sparc/sparc64/Makefile @@ -6,3 +6,7 @@ endif ifeq ($(subdir),csu) CFLAGS-initfini.s += -mcpu=v9 endif + +ifeq ($(subdir),string) +sysdep_routines += align-cpy +endif diff --git a/sysdeps/sparc/sparc64/align-cpy.S b/sysdeps/sparc/sparc64/align-cpy.S new file mode 100644 index 0000000000..bae788fe44 --- /dev/null +++ b/sysdeps/sparc/sparc64/align-cpy.S @@ -0,0 +1,85 @@ +/* Aligned copy routines specified by Sparc V9 ABI. + For 64-bit sparc. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + + .text + .align 8 +ENTRY(__align_cpy_8) +10: cmp %o0, %o1 + be,pn %xcc, 9f + mov %o0, %o3 + subcc %o2, 0x08, %o2 + be,pn %xcc, 8f +1: ldx [%o1 + 0x00], %o5 + ldx [%o1 + 0x08], %o4 + subcc %o2, 0x10, %o2 + add %o1, 0x10, %o1 + stx %o5, [%o3 + 0x00] + stx %o4, [%o3 + 0x08] + bg,pt %xcc, 1b + add %o3, 0x10, %o3 + bne,pn %xcc, 9f + nop + ldx [%o1 + 0x00], %o5 +8: stx %o5, [%o3 + 0x00] +9: retl + nop +END(__align_cpy_8) + + .align 8 +ENTRY(__align_cpy_4) +20: cmp %o0, %o1 + be,pn %xcc, 9f + mov %o0, %o3 + subcc %o2, 0x04, %o2 + be,pn %xcc, 8f +1: lduw [%o1 + 0x00], %o5 + lduw [%o1 + 0x04], %o4 + subcc %o2, 0x08, %o2 + add %o1, 0x08, %o1 + stw %o5, [%o3 + 0x00] + stw %o4, [%o3 + 0x04] + bg,pt %xcc, 1b + add %o3, 0x08, %o3 + bne,pn %xcc, 9f + nop + lduw [%o1 + 0x00], %o5 +8: stw %o5, [%o3 + 0x00] +9: retl + nop +END(__align_cpy_4) + + .align 8 +ENTRY(__align_cpy_2) + or %o0, %o1, %o3 + or %o2, %o3, %o3 + andcc %o3, 0x7, %g0 + be,pt %xcc, 10b + andcc %o3, 0x3, %g0 + be,pt %xcc, 20b + mov %o7, %g1 + call HIDDEN_JUMPTARGET(memcpy) + mov %o7, %g1 +END(__align_cpy_2) + +weak_alias (__align_cpy_8, __align_cpy_16) +weak_alias (__align_cpy_2, __align_cpy_1) diff --git a/sysdeps/sparc/sparc64/memcpy.S b/sysdeps/sparc/sparc64/memcpy.S index 5993358017..709d366304 100644 --- a/sysdeps/sparc/sparc64/memcpy.S +++ b/sysdeps/sparc/sparc64/memcpy.S @@ -446,65 +446,6 @@ ENTRY(__memcpy_large) mov %g4, %o0 END(__memcpy_large) -#ifdef USE_BPR - - /* void *__align_cpy_4(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 3)) - */ - - .align 32 -ENTRY(__align_cpy_4) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, 15 /* IEU1 */ - bleu,pn %xcc, 208b /* CTI */ - cmp %o2, (64 * 6) /* IEU1 Group */ - bgeu,pn %xcc, 200b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - ba,pt %xcc, 216f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ -END(__align_cpy_4) - - /* void *__align_cpy_8(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 7)) - */ - - .align 32 -ENTRY(__align_cpy_8) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, 15 /* IEU1 */ - bleu,pn %xcc, 208b /* CTI */ - cmp %o2, (64 * 6) /* IEU1 Group */ - bgeu,pn %xcc, 201b /* CTI */ - andcc %o0, 0x38, %g5 /* IEU1 Group */ - andcc %o2, -128, %g6 /* IEU1 Group */ - bne,a,pt %xcc, 82f + 4 /* CTI */ - ldx [%o1], %g1 /* Load */ - ba,pt %xcc, 41f /* CTI Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ -END(__align_cpy_8) - - /* void *__align_cpy_16(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 15)) - */ - - .align 32 -ENTRY(__align_cpy_16) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, (64 * 6) /* IEU1 */ - bgeu,pn %xcc, 201b /* CTI */ - andcc %o0, 0x38, %g5 /* IEU1 Group */ - andcc %o2, -128, %g6 /* IEU1 Group */ - bne,a,pt %xcc, 82f + 4 /* CTI */ - ldx [%o1], %g1 /* Load */ - ba,pt %xcc, 41f /* CTI Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ -END(__align_cpy_16) - -#endif - .align 32 ENTRY(memcpy) 210: @@ -917,9 +858,5 @@ ENTRY(memmove) mov %g4, %o0 END(memmove) -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -#endif libc_hidden_builtin_def (memcpy) libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/sparcv9b/memcpy.S b/sysdeps/sparc/sparc64/sparcv9b/memcpy.S index 760d526630..2a4146feeb 100644 --- a/sysdeps/sparc/sparc64/sparcv9b/memcpy.S +++ b/sysdeps/sparc/sparc64/sparcv9b/memcpy.S @@ -599,12 +599,5 @@ ENTRY(memmove) mov %g4, %o0 END(memmove) -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -weak_alias (memcpy, __align_cpy_4) -weak_alias (memcpy, __align_cpy_8) -weak_alias (memcpy, __align_cpy_16) -#endif libc_hidden_builtin_def (memcpy) libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S b/sysdeps/sparc/sparc64/sparcv9v/memcpy.S index ad2b0f742c..61db9ff81f 100644 --- a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S +++ b/sysdeps/sparc/sparc64/sparcv9v/memcpy.S @@ -585,12 +585,5 @@ ENTRY(memmove) mov %g4, %o0 END(memmove) -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -weak_alias (memcpy, __align_cpy_4) -weak_alias (memcpy, __align_cpy_8) -weak_alias (memcpy, __align_cpy_16) -#endif libc_hidden_builtin_def (memcpy) libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S b/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S index b261f461a4..2b1444ec7f 100644 --- a/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S +++ b/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S @@ -735,12 +735,5 @@ ENTRY(memmove) mov %g4, %o0 END(memmove) -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -weak_alias (memcpy, __align_cpy_4) -weak_alias (memcpy, __align_cpy_8) -weak_alias (memcpy, __align_cpy_16) -#endif libc_hidden_builtin_def (memcpy) libc_hidden_builtin_def (memmove) From e44f6f89da537816aff71d926a0b774a77545b48 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Feb 2010 03:59:49 -0800 Subject: [PATCH 10/31] sparc: Pull bcopy/memmove out of memcpy.S implementations. 2010-02-25 David S. Miller * sysdeps/sparc/sparc32/bcopy.c: Delete. * sysdeps/sparc/sparc32/memmove.c: Delete. * sysdeps/sparc/sparc32/sparcv9/bcopy.c: Delete. * sysdeps/sparc/sparc32/sparcv9/memmove.c: Delete. * sysdeps/sparc/sparc64/bcopy.c: Delete. * sysdeps/sparc/sparc64/memmove.c: Delete. * sysdeps/sparc/sparc64/memcopy.h: New. * sysdeps/sparc/sparc32/memcpy.S (bcopy, memmove): Remove. * sysdeps/sparc/sparc64/memcpy.S (bcopy, memmove): Likewise. * sysdeps/sparc/sparc64/sparcv9b/memcpy.S (bcopy, memmove): Likewise. * sysdeps/sparc/sparc64/sparcv9v/memcpy.S (bcopy, memmove): Likewise. * sysdeps/sparc/sparc64/sparcv9v2/memcpy.S (bcopy, memmove): Likewise. --- ChangeLog | 13 + sysdeps/sparc/sparc32/bcopy.c | 1 - sysdeps/sparc/sparc32/memcpy.S | 347 +---------------------- sysdeps/sparc/sparc32/memmove.c | 1 - sysdeps/sparc/sparc32/sparcv9/bcopy.c | 1 - sysdeps/sparc/sparc32/sparcv9/memmove.c | 1 - sysdeps/sparc/sparc64/bcopy.c | 1 - sysdeps/sparc/sparc64/memcopy.h | 1 + sysdeps/sparc/sparc64/memcpy.S | 290 ------------------- sysdeps/sparc/sparc64/memmove.c | 1 - sysdeps/sparc/sparc64/sparcv9b/memcpy.S | 285 ------------------- sysdeps/sparc/sparc64/sparcv9v/memcpy.S | 249 ---------------- sysdeps/sparc/sparc64/sparcv9v2/memcpy.S | 249 ---------------- 13 files changed, 15 insertions(+), 1425 deletions(-) delete mode 100644 sysdeps/sparc/sparc32/bcopy.c delete mode 100644 sysdeps/sparc/sparc32/memmove.c delete mode 100644 sysdeps/sparc/sparc32/sparcv9/bcopy.c delete mode 100644 sysdeps/sparc/sparc32/sparcv9/memmove.c delete mode 100644 sysdeps/sparc/sparc64/bcopy.c create mode 100644 sysdeps/sparc/sparc64/memcopy.h delete mode 100644 sysdeps/sparc/sparc64/memmove.c diff --git a/ChangeLog b/ChangeLog index 1f49dbd8c6..ed2409a539 100644 --- a/ChangeLog +++ b/ChangeLog @@ -14,6 +14,19 @@ __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16): Remove. + * sysdeps/sparc/sparc32/bcopy.c: Delete. + * sysdeps/sparc/sparc32/memmove.c: Delete. + * sysdeps/sparc/sparc32/sparcv9/bcopy.c: Delete. + * sysdeps/sparc/sparc32/sparcv9/memmove.c: Delete. + * sysdeps/sparc/sparc64/bcopy.c: Delete. + * sysdeps/sparc/sparc64/memmove.c: Delete. + * sysdeps/sparc/sparc64/memcopy.h: New. + * sysdeps/sparc/sparc32/memcpy.S (bcopy, memmove): Remove. + * sysdeps/sparc/sparc64/memcpy.S (bcopy, memmove): Likewise. + * sysdeps/sparc/sparc64/sparcv9b/memcpy.S (bcopy, memmove): Likewise. + * sysdeps/sparc/sparc64/sparcv9v/memcpy.S (bcopy, memmove): Likewise. + * sysdeps/sparc/sparc64/sparcv9v2/memcpy.S (bcopy, memmove): Likewise. + 2009-02-20 David S. Miller * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_runtime_setup): diff --git a/sysdeps/sparc/sparc32/bcopy.c b/sysdeps/sparc/sparc32/bcopy.c deleted file mode 100644 index 9a455f33c4..0000000000 --- a/sysdeps/sparc/sparc32/bcopy.c +++ /dev/null @@ -1 +0,0 @@ -/* bcopy is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/memcpy.S b/sysdeps/sparc/sparc32/memcpy.S index 6bd55c06a1..c9c7c40e81 100644 --- a/sysdeps/sparc/sparc32/memcpy.S +++ b/sysdeps/sparc/sparc32/memcpy.S @@ -68,45 +68,6 @@ stb %t0, [%dst - offset - 0x02]; \ stb %t1, [%dst - offset - 0x01]; -/* Both these macros have to start with exactly the same insn */ -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src - offset - 0x20], %t0; \ - ldd [%src - offset - 0x18], %t2; \ - ldd [%src - offset - 0x10], %t4; \ - ldd [%src - offset - 0x08], %t6; \ - st %t0, [%dst - offset - 0x20]; \ - st %t1, [%dst - offset - 0x1c]; \ - st %t2, [%dst - offset - 0x18]; \ - st %t3, [%dst - offset - 0x14]; \ - st %t4, [%dst - offset - 0x10]; \ - st %t5, [%dst - offset - 0x0c]; \ - st %t6, [%dst - offset - 0x08]; \ - st %t7, [%dst - offset - 0x04]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src - offset - 0x20], %t0; \ - ldd [%src - offset - 0x18], %t2; \ - ldd [%src - offset - 0x10], %t4; \ - ldd [%src - offset - 0x08], %t6; \ - std %t0, [%dst - offset - 0x20]; \ - std %t2, [%dst - offset - 0x18]; \ - std %t4, [%dst - offset - 0x10]; \ - std %t6, [%dst - offset - 0x08]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldd [%src + offset + 0x00], %t0; \ - ldd [%src + offset + 0x08], %t2; \ - st %t0, [%dst + offset + 0x00]; \ - st %t1, [%dst + offset + 0x04]; \ - st %t2, [%dst + offset + 0x08]; \ - st %t3, [%dst + offset + 0x0c]; - -#define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ - ldub [%src + offset + 0x00], %t0; \ - ldub [%src + offset + 0x01], %t1; \ - stb %t0, [%dst + offset + 0x00]; \ - stb %t1, [%dst + offset + 0x01]; - #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ ldd [%src + offset + 0x00], %t0; \ ldd [%src + offset + 0x08], %t2; \ @@ -146,312 +107,6 @@ .text .align 4 -ENTRY(bcopy) - mov %o0, %o3 - mov %o1, %o0 - mov %o3, %o1 -END(bcopy) - -ENTRY(memmove) - cmp %o0, %o1 - st %o0, [%sp + 64] - bleu 9f - sub %o0, %o1, %o4 - - add %o1, %o2, %o3 - cmp %o3, %o0 - bleu 0f - andcc %o4, 3, %o5 - - add %o1, %o2, %o1 - add %o0, %o2, %o0 - bne 77f - cmp %o2, 15 - bleu 91f - andcc %o1, 3, %g0 - be 3f - nop - - andcc %o1, 1, %g0 - be 4f - andcc %o1, 2, %g0 - - ldub [%o1 - 1], %g2 - sub %o1, 1, %o1 - stb %g2, [%o0 - 1] - sub %o2, 1, %o2 - be 3f - sub %o0, 1, %o0 -4: lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o2, 2, %o2 - sub %o0, 2, %o0 - -3: andcc %o1, 4, %g0 - - be 2f - mov %o2, %g1 - - ld [%o1 - 4], %o4 - sub %g1, 4, %g1 - st %o4, [%o0 - 4] - sub %o1, 4, %o1 - sub %o0, 4, %o0 -2: andcc %g1, 0xffffff80, %g6 - be 3f - andcc %o0, 4, %g0 - - be 74f + 4 -5: RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne 5b - sub %o0, 128, %o0 - -3: andcc %g1, 0x70, %g6 - be 72f - andcc %g1, 8, %g0 - - srl %g6, 1, %o4 - mov %o7, %g2 - add %g6, %o4, %o4 -101: call 100f - sub %o1, %g6, %o1 - mov %g2, %o7 - jmpl %o5 + (72f - 101b), %g0 - sub %o0, %g6, %o0 - -71: RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) -72: be 73f - andcc %g1, 4, %g0 - - ldd [%o1 - 0x08], %g2 - sub %o0, 8, %o0 - sub %o1, 8, %o1 - st %g2, [%o0] - st %g3, [%o0 + 0x04] -73: be 1f - andcc %g1, 2, %g0 - - ld [%o1 - 4], %g2 - sub %o1, 4, %o1 - st %g2, [%o0 - 4] - sub %o0, 4, %o0 -1: be 1f - andcc %g1, 1, %g0 - - lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o0, 2, %o0 -1: be 1f - nop - - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - ld [%sp + 64], %o0 - -74: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne 74b - sub %o0, 128, %o0 - - andcc %g1, 0x70, %g6 - be 72b - andcc %g1, 8, %g0 - - srl %g6, 1, %o4 - mov %o7, %g2 - add %g6, %o4, %o4 -102: call 100f - sub %o1, %g6, %o1 - mov %g2, %o7 - jmpl %o5 + (72b - 102b), %g0 - sub %o0, %g6, %o0 - -75: and %o2, 0xe, %o3 - mov %o7, %g2 - sll %o3, 3, %o4 - sub %o0, %o3, %o0 -103: call 100f - sub %o1, %o3, %o1 - mov %g2, %o7 - jmpl %o5 + (76f - 103b), %g0 - andcc %o2, 1, %g0 - - RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) - -76: be 1f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - ld [%sp + 64], %o0 - -91: bne 75b - andcc %o2, 8, %g0 - - be 1f - andcc %o2, 4, %g0 - - ld [%o1 - 0x08], %g2 - ld [%o1 - 0x04], %g3 - sub %o1, 8, %o1 - st %g2, [%o0 - 0x08] - st %g3, [%o0 - 0x04] - sub %o0, 8, %o0 -1: b 73b - mov %o2, %g1 - -77: cmp %o2, 15 - bleu 75b - andcc %o0, 3, %g0 - be 64f - andcc %o0, 1, %g0 - be 63f - andcc %o0, 2, %g0 - ldub [%o1 - 1], %g5 - sub %o1, 1, %o1 - stb %g5, [%o0 - 1] - sub %o0, 1, %o0 - be 64f - sub %o2, 1, %o2 - -63: ldub [%o1 - 1], %g5 - sub %o1, 2, %o1 - stb %g5, [%o0 - 1] - sub %o0, 2, %o0 - ldub [%o1], %g5 - sub %o2, 2, %o2 - stb %g5, [%o0] -64: and %o1, 3, %g2 - and %o1, -4, %o1 - and %o2, 0xc, %g3 - add %o1, 4, %o1 - cmp %g3, 4 - sll %g2, 3, %g4 - mov 32, %g2 - be 4f - sub %g2, %g4, %g6 - - blu 3f - cmp %g3, 8 - - be 2f - srl %o2, 2, %g3 - - ld [%o1 - 4], %o3 - add %o0, -8, %o0 - ld [%o1 - 8], %o4 - add %o1, -16, %o1 - b 7f - add %g3, 1, %g3 -2: ld [%o1 - 4], %o4 - add %o0, -4, %o0 - ld [%o1 - 8], %g1 - add %o1, -12, %o1 - b 8f - add %g3, 2, %g3 -3: ld [%o1 - 4], %o5 - add %o0, -12, %o0 - ld [%o1 - 8], %o3 - add %o1, -20, %o1 - b 6f - srl %o2, 2, %g3 -4: ld [%o1 - 4], %g1 - srl %o2, 2, %g3 - ld [%o1 - 8], %o5 - add %o1, -24, %o1 - add %o0, -16, %o0 - add %g3, -1, %g3 - - ld [%o1 + 12], %o3 -5: sll %o5, %g4, %g2 - srl %g1, %g6, %g5 - or %g2, %g5, %g2 - st %g2, [%o0 + 12] -6: ld [%o1 + 8], %o4 - sll %o3, %g4, %g2 - srl %o5, %g6, %g5 - or %g2, %g5, %g2 - st %g2, [%o0 + 8] -7: ld [%o1 + 4], %g1 - sll %o4, %g4, %g2 - srl %o3, %g6, %g5 - or %g2, %g5, %g2 - st %g2, [%o0 + 4] -8: ld [%o1], %o5 - sll %g1, %g4, %g2 - srl %o4, %g6, %g5 - addcc %g3, -4, %g3 - or %g2, %g5, %g2 - add %o1, -16, %o1 - st %g2, [%o0] - add %o0, -16, %o0 - bne,a 5b - ld [%o1 + 12], %o3 - sll %o5, %g4, %g2 - srl %g1, %g6, %g5 - srl %g4, 3, %g3 - or %g2, %g5, %g2 - add %o1, %g3, %o1 - andcc %o2, 2, %g0 - st %g2, [%o0 + 12] - be 1f - andcc %o2, 1, %g0 - - ldub [%o1 + 15], %g5 - add %o1, -2, %o1 - stb %g5, [%o0 + 11] - add %o0, -2, %o0 - ldub [%o1 + 16], %g5 - stb %g5, [%o0 + 12] -1: be 1f - nop - ldub [%o1 + 15], %g5 - stb %g5, [%o0 + 11] -1: retl - ld [%sp + 64], %o0 - -78: andcc %o1, 1, %g0 - be 4f - andcc %o1, 2, %g0 - - ldub [%o1], %g2 - add %o1, 1, %o1 - stb %g2, [%o0] - sub %o2, 1, %o2 - bne 3f - add %o0, 1, %o0 -4: lduh [%o1], %g2 - add %o1, 2, %o1 - sth %g2, [%o0] - sub %o2, 2, %o2 - b 3f - add %o0, 2, %o0 -END(memmove) - ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */ sub %o0, %o1, %o4 st %o0, [%sp + 64] @@ -968,5 +623,5 @@ ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */ 110: retl sub %o7, %g6, %o5 END(memcpy) + libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc32/memmove.c b/sysdeps/sparc/sparc32/memmove.c deleted file mode 100644 index a8d2d49948..0000000000 --- a/sysdeps/sparc/sparc32/memmove.c +++ /dev/null @@ -1 +0,0 @@ -/* memmove is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/sparcv9/bcopy.c b/sysdeps/sparc/sparc32/sparcv9/bcopy.c deleted file mode 100644 index 9a455f33c4..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/bcopy.c +++ /dev/null @@ -1 +0,0 @@ -/* bcopy is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/sparcv9/memmove.c b/sysdeps/sparc/sparc32/sparcv9/memmove.c deleted file mode 100644 index a8d2d49948..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/memmove.c +++ /dev/null @@ -1 +0,0 @@ -/* memmove is in memcpy.S */ diff --git a/sysdeps/sparc/sparc64/bcopy.c b/sysdeps/sparc/sparc64/bcopy.c deleted file mode 100644 index 9a455f33c4..0000000000 --- a/sysdeps/sparc/sparc64/bcopy.c +++ /dev/null @@ -1 +0,0 @@ -/* bcopy is in memcpy.S */ diff --git a/sysdeps/sparc/sparc64/memcopy.h b/sysdeps/sparc/sparc64/memcopy.h new file mode 100644 index 0000000000..ec978e3c80 --- /dev/null +++ b/sysdeps/sparc/sparc64/memcopy.h @@ -0,0 +1 @@ +#include diff --git a/sysdeps/sparc/sparc64/memcpy.S b/sysdeps/sparc/sparc64/memcpy.S index 709d366304..71e7100658 100644 --- a/sysdeps/sparc/sparc64/memcpy.S +++ b/sysdeps/sparc/sparc64/memcpy.S @@ -136,79 +136,8 @@ stx %t0, [%dst - offset - 0x10]; \ stx %t1, [%dst - offset - 0x08]; - /* Macros for non-VIS memmove code. */ -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; - .text .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 /* IEU0 Group */ - mov %o0, %g3 /* IEU1 */ - cmp %o4, %o2 /* IEU1 Group */ - mov %o1, %o0 /* IEU0 */ - bgeu,pt %XCC, 210f /* CTI */ - mov %g3, %o1 /* IEU0 Group */ -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 */ -#endif - brnz,pn %o2, 220f /* CTI Group */ - add %o0, %o2, %o0 /* IEU0 */ - retl - nop -END(bcopy) - - .align 32 ENTRY(__memcpy_large) 200: be,pt %xcc, 201f /* CTI */ andcc %o0, 0x38, %g5 /* IEU1 Group */ @@ -640,223 +569,4 @@ ENTRY(memcpy) mov %g4, %o0 END(memcpy) - .align 32 -ENTRY(__memmove_slowpath) -228: andcc %o2, 1, %g0 /* IEU1 Group */ - be,pt %icc, 2f+4 /* CTI */ -1: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 Group */ - be,pn %xcc, 229f /* CTI */ - stb %o5, [%o0] /* Store */ -2: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o0, 2, %o0 /* IEU0 */ - ldub [%o1 - 2], %g5 /* LOAD Group */ - sub %o1, 2, %o1 /* IEU0 */ - subcc %o2, 2, %o2 /* IEU1 Group */ - stb %o5, [%o0 + 1] /* Store */ - bne,pt %xcc, 2b /* CTI */ - stb %g5, [%o0] /* Store */ -229: retl - mov %g4, %o0 -219: retl - nop -END(__memmove_slowpath) - - .align 32 -ENTRY(memmove) -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 Group */ -#endif - brz,pn %o2, 219b /* CTI Group */ - sub %o0, %o1, %o4 /* IEU0 */ - cmp %o4, %o2 /* IEU1 Group */ - bgeu,pt %XCC, 218b /* CTI */ - mov %o0, %g4 /* IEU0 */ - add %o0, %o2, %o0 /* IEU0 Group */ -220: add %o1, %o2, %o1 /* IEU1 */ - cmp %o2, 15 /* IEU1 Group */ - bleu,pn %xcc, 228b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - sub %o0, %o1, %g5 /* IEU0 */ - andcc %g5, 3, %o5 /* IEU1 Group */ - bne,pn %xcc, 232f /* CTI */ - andcc %o1, 3, %g0 /* IEU1 Group */ - be,a,pt %xcc, 236f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ - andcc %o1, 1, %g0 /* IEU1 Group */ - be,pn %xcc, 4f /* CTI */ - andcc %o1, 2, %g0 /* IEU1 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - sub %o2, 1, %o2 /* IEU0 Group */ - be,pn %xcc, 5f /* CTI Group */ - stb %g2, [%o0] /* Store */ -4: lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sub %o0, 2, %o0 /* IEU1 */ - sub %o2, 2, %o2 /* IEU0 */ - sth %g2, [%o0] /* Store Group + bubble */ -5: andcc %o1, 4, %g0 /* IEU1 */ -236: be,a,pn %xcc, 2f /* CTI */ - andcc %o2, -128, %g6 /* IEU1 Group */ - lduw [%o1 - 4], %g5 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - sub %o0, 4, %o0 /* IEU1 */ - sub %o2, 4, %o2 /* IEU0 Group */ - stw %g5, [%o0] /* Store */ - andcc %o2, -128, %g6 /* IEU1 Group */ -2: be,pn %xcc, 235f /* CTI */ - andcc %o0, 4, %g0 /* IEU1 Group */ - be,pn %xcc, 282f + 4 /* CTI Group */ -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 5b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ -235: andcc %o2, 0x70, %g6 /* IEU1 Group */ -41: be,pn %xcc, 280f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -279: rd %pc, %o5 /* PDU Group */ - sll %g6, 1, %g5 /* IEU0 Group */ - sub %o1, %g6, %o1 /* IEU1 */ - sub %o5, %g5, %o5 /* IEU0 Group */ - jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f /* CTI */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - stw %g2, [%o0 + 4] /* Store Group */ - sub %o1, 8, %o1 /* IEU1 */ - srlx %g2, 32, %g2 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -281: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - stw %g2, [%o0 - 4] /* Store Group */ - sub %o0, 4, %o0 /* IEU0 */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sth %g2, [%o0 - 2] /* Store Group */ - sub %o0, 2, %o0 /* IEU0 */ -1: be,pt %xcc, 211f /* CTI */ - nop /* IEU1 */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 282b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ - be,pn %xcc, 284f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -283: rd %pc, %o5 /* PDU Group */ - sub %o1, %g6, %o1 /* IEU0 Group */ - sub %o5, %g6, %o5 /* IEU1 */ - jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f /* CTI Group */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - sub %o1, 8, %o1 /* IEU0 Group */ - stx %g2, [%o0] /* Store */ -285: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o0, 4, %o0 /* IEU0 */ - sub %o1, 4, %o1 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o0, 2, %o0 /* IEU0 */ - sub %o1, 2, %o1 /* IEU0 Group */ - sth %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - nop /* IEU0 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -1: retl - mov %g4, %o0 - -232: brz,pt %g2, 2f /* CTI Group */ - sub %o2, %g2, %o2 /* IEU0 Group */ -1: ldub [%o1 - 1], %g5 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %g2, 1, %g2 /* IEU1 Group */ - bne,pt %xcc, 1b /* CTI */ - stb %g5, [%o0] /* Store */ -2: andn %o2, 7, %g5 /* IEU0 Group */ - and %o2, 7, %o2 /* IEU1 */ - fmovd %f0, %f2 /* FPU */ - alignaddr %o1, %g0, %g1 /* GRU Group */ - ldd [%g1], %f4 /* Load Group */ -1: ldd [%g1 - 8], %f6 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 Group */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f6, %f4, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 Group */ - be,pn %xcc, 233f /* CTI */ - sub %o0, 8, %o0 /* IEU1 */ - ldd [%g1 - 8], %f4 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f4, %f6, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 */ - bne,pn %xcc, 1b /* CTI Group */ - sub %o0, 8, %o0 /* IEU0 */ -233: brz,pn %o2, 234f /* CTI Group */ - nop /* IEU0 */ -237: ldub [%o1 - 1], %g5 /* LOAD */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 */ - bne,pt %xcc, 237b /* CTI */ - stb %g5, [%o0] /* Store Group */ -234: wr %g0, FPRS_FEF, %fprs - retl - mov %g4, %o0 -END(memmove) - libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/memmove.c b/sysdeps/sparc/sparc64/memmove.c deleted file mode 100644 index a8d2d49948..0000000000 --- a/sysdeps/sparc/sparc64/memmove.c +++ /dev/null @@ -1 +0,0 @@ -/* memmove is in memcpy.S */ diff --git a/sysdeps/sparc/sparc64/sparcv9b/memcpy.S b/sysdeps/sparc/sparc64/sparcv9b/memcpy.S index 2a4146feeb..389e09d820 100644 --- a/sysdeps/sparc/sparc64/sparcv9b/memcpy.S +++ b/sysdeps/sparc/sparc64/sparcv9b/memcpy.S @@ -36,23 +36,6 @@ .register %g6,#scratch .text - .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 - mov %o0, %g4 - cmp %o4, %o2 - mov %o1, %o0 - bgeu,pt %XCC, 100f - mov %g4, %o1 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brnz,pn %o2, 220f - add %o0, %o2, %o0 - retl - nop -END(bcopy) /* Special/non-trivial issues of this code: * @@ -332,272 +315,4 @@ small_copy_unaligned: END(memcpy) -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; - - .align 32 -228: andcc %o2, 1, %g0 /* IEU1 Group */ - be,pt %icc, 2f+4 /* CTI */ -1: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 Group */ - be,pn %xcc, 229f /* CTI */ - stb %o5, [%o0] /* Store */ -2: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o0, 2, %o0 /* IEU0 */ - ldub [%o1 - 2], %g5 /* LOAD Group */ - sub %o1, 2, %o1 /* IEU0 */ - subcc %o2, 2, %o2 /* IEU1 Group */ - stb %o5, [%o0 + 1] /* Store */ - bne,pt %xcc, 2b /* CTI */ - stb %g5, [%o0] /* Store */ -229: retl - mov %g4, %o0 - - .align 32 -ENTRY(memmove) - mov %o0, %g5 -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 Group */ -#endif - brz,pn %o2, out /* CTI Group */ - sub %o0, %o1, %o4 /* IEU0 */ - cmp %o4, %o2 /* IEU1 Group */ - bgeu,pt %XCC, 218b /* CTI */ - mov %o0, %g4 /* IEU0 */ - add %o0, %o2, %o0 /* IEU0 Group */ -220: add %o1, %o2, %o1 /* IEU1 */ - cmp %o2, 15 /* IEU1 Group */ - bleu,pn %xcc, 228b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - sub %o0, %o1, %g5 /* IEU0 */ - andcc %g5, 3, %o5 /* IEU1 Group */ - bne,pn %xcc, 232f /* CTI */ - andcc %o1, 3, %g0 /* IEU1 Group */ - be,a,pt %xcc, 236f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ - andcc %o1, 1, %g0 /* IEU1 Group */ - be,pn %xcc, 4f /* CTI */ - andcc %o1, 2, %g0 /* IEU1 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - sub %o2, 1, %o2 /* IEU0 Group */ - be,pn %xcc, 5f /* CTI Group */ - stb %g2, [%o0] /* Store */ -4: lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sub %o0, 2, %o0 /* IEU1 */ - sub %o2, 2, %o2 /* IEU0 */ - sth %g2, [%o0] /* Store Group + bubble */ -5: andcc %o1, 4, %g0 /* IEU1 */ -236: be,a,pn %xcc, 2f /* CTI */ - andcc %o2, -128, %g6 /* IEU1 Group */ - lduw [%o1 - 4], %g5 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - sub %o0, 4, %o0 /* IEU1 */ - sub %o2, 4, %o2 /* IEU0 Group */ - stw %g5, [%o0] /* Store */ - andcc %o2, -128, %g6 /* IEU1 Group */ -2: be,pn %xcc, 235f /* CTI */ - andcc %o0, 4, %g0 /* IEU1 Group */ - be,pn %xcc, 282f + 4 /* CTI Group */ -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 5b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ -235: andcc %o2, 0x70, %g6 /* IEU1 Group */ -41: be,pn %xcc, 280f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -279: rd %pc, %o5 /* PDU Group */ - sll %g6, 1, %g5 /* IEU0 Group */ - sub %o1, %g6, %o1 /* IEU1 */ - sub %o5, %g5, %o5 /* IEU0 Group */ - jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f /* CTI */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - stw %g2, [%o0 + 4] /* Store Group */ - sub %o1, 8, %o1 /* IEU1 */ - srlx %g2, 32, %g2 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -281: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - stw %g2, [%o0 - 4] /* Store Group */ - sub %o0, 4, %o0 /* IEU0 */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sth %g2, [%o0 - 2] /* Store Group */ - sub %o0, 2, %o0 /* IEU0 */ -1: be,pt %xcc, 211f /* CTI */ - nop /* IEU1 */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 282b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ - be,pn %xcc, 284f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -283: rd %pc, %o5 /* PDU Group */ - sub %o1, %g6, %o1 /* IEU0 Group */ - sub %o5, %g6, %o5 /* IEU1 */ - jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f /* CTI Group */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - sub %o1, 8, %o1 /* IEU0 Group */ - stx %g2, [%o0] /* Store */ -285: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o0, 4, %o0 /* IEU0 */ - sub %o1, 4, %o1 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o0, 2, %o0 /* IEU0 */ - sub %o1, 2, %o1 /* IEU0 Group */ - sth %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - nop /* IEU0 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -1: retl - mov %g4, %o0 - -232: brz,pt %g2, 2f /* CTI Group */ - sub %o2, %g2, %o2 /* IEU0 Group */ -1: ldub [%o1 - 1], %g5 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %g2, 1, %g2 /* IEU1 Group */ - bne,pt %xcc, 1b /* CTI */ - stb %g5, [%o0] /* Store */ -2: andn %o2, 7, %g5 /* IEU0 Group */ - and %o2, 7, %o2 /* IEU1 */ - fmovd %f0, %f2 /* FPU */ - alignaddr %o1, %g0, %g1 /* GRU Group */ - ldd [%g1], %f4 /* Load Group */ -1: ldd [%g1 - 8], %f6 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 Group */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f6, %f4, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 Group */ - be,pn %xcc, 233f /* CTI */ - sub %o0, 8, %o0 /* IEU1 */ - ldd [%g1 - 8], %f4 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f4, %f6, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 */ - bne,pn %xcc, 1b /* CTI Group */ - sub %o0, 8, %o0 /* IEU0 */ -233: brz,pn %o2, 234f /* CTI Group */ - nop /* IEU0 */ -237: ldub [%o1 - 1], %g5 /* LOAD */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 */ - bne,pt %xcc, 237b /* CTI */ - stb %g5, [%o0] /* Store Group */ -234: wr %g0, FPRS_FEF, %fprs - retl - mov %g4, %o0 -END(memmove) - libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S b/sysdeps/sparc/sparc64/sparcv9v/memcpy.S index 61db9ff81f..116c7113ff 100644 --- a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S +++ b/sysdeps/sparc/sparc64/sparcv9v/memcpy.S @@ -41,23 +41,6 @@ .register %g6,#scratch .text - .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 - mov %o0, %g4 - cmp %o4, %o2 - mov %o1, %o0 - bgeu,pt %XCC, 100f - mov %g4, %o1 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brnz,pn %o2, 220f - add %o0, %o2, %o0 - retl - nop -END(bcopy) .align 32 ENTRY(memcpy) @@ -354,236 +337,4 @@ ENTRY(memcpy) END(memcpy) -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; - - .align 32 -228: andcc %o2, 1, %g0 - be,pt %icc, 2f+4 -1: ldub [%o1 - 1], %o5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - be,pn %xcc, 229f - stb %o5, [%o0] -2: ldub [%o1 - 1], %o5 - sub %o0, 2, %o0 - ldub [%o1 - 2], %g5 - sub %o1, 2, %o1 - subcc %o2, 2, %o2 - stb %o5, [%o0 + 1] - bne,pt %xcc, 2b - stb %g5, [%o0] -229: retl - mov %g4, %o0 -out: retl - mov %g5, %o0 - - .align 32 -ENTRY(memmove) - mov %o0, %g5 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brz,pn %o2, out - sub %o0, %o1, %o4 - cmp %o4, %o2 - bgeu,pt %XCC, 218b - mov %o0, %g4 - add %o0, %o2, %o0 -220: add %o1, %o2, %o1 - cmp %o2, 15 - bleu,pn %xcc, 228b - andcc %o0, 7, %g2 - sub %o0, %o1, %g5 - andcc %g5, 3, %o5 - bne,pn %xcc, 232f - andcc %o1, 3, %g0 - be,a,pt %xcc, 236f - andcc %o1, 4, %g0 - andcc %o1, 1, %g0 - be,pn %xcc, 4f - andcc %o1, 2, %g0 - ldub [%o1 - 1], %g2 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - sub %o2, 1, %o2 - be,pn %xcc, 5f - stb %g2, [%o0] -4: lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sub %o0, 2, %o0 - sub %o2, 2, %o2 - sth %g2, [%o0] -5: andcc %o1, 4, %g0 -236: be,a,pn %xcc, 2f - andcc %o2, -128, %g6 - lduw [%o1 - 4], %g5 - sub %o1, 4, %o1 - sub %o0, 4, %o0 - sub %o2, 4, %o2 - stw %g5, [%o0] - andcc %o2, -128, %g6 -2: be,pn %xcc, 235f - andcc %o0, 4, %g0 - be,pn %xcc, 282f + 4 -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %xcc, 5b - sub %o0, 128, %o0 -235: andcc %o2, 0x70, %g6 -41: be,pn %xcc, 280f - andcc %o2, 8, %g0 - -279: rd %pc, %o5 - sll %g6, 1, %g5 - sub %o1, %g6, %o1 - sub %o5, %g5, %o5 - jmpl %o5 + %lo(280f - 279b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - stw %g2, [%o0 + 4] - sub %o1, 8, %o1 - srlx %g2, 32, %g2 - stw %g2, [%o0] -281: be,pt %xcc, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o1, 4, %o1 - stw %g2, [%o0 - 4] - sub %o0, 4, %o0 -1: be,pt %xcc, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o0, 2, %o0 -1: be,pt %xcc, 211f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %xcc, 282b - sub %o0, 128, %o0 - andcc %o2, 0x70, %g6 - be,pn %xcc, 284f - andcc %o2, 8, %g0 - -283: rd %pc, %o5 - sub %o1, %g6, %o1 - sub %o5, %g6, %o5 - jmpl %o5 + %lo(284f - 283b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - sub %o1, 8, %o1 - stx %g2, [%o0] -285: be,pt %xcc, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o0, 4, %o0 - sub %o1, 4, %o1 - stw %g2, [%o0] -1: be,pt %xcc, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o0, 2, %o0 - sub %o1, 2, %o1 - sth %g2, [%o0] -1: be,pt %xcc, 1f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - mov %g4, %o0 - -232: ldub [%o1 - 1], %g5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %xcc, 232b - stb %g5, [%o0] -234: retl - mov %g4, %o0 -END(memmove) - libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S b/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S index 2b1444ec7f..300b12f427 100644 --- a/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S +++ b/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S @@ -143,23 +143,6 @@ .register %g6,#scratch .text - .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 - mov %o0, %g4 - cmp %o4, %o2 - mov %o1, %o0 - bgeu,pt %XCC, 100f - mov %g4, %o1 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brnz,pn %o2, 220f - add %o0, %o2, %o0 - retl - nop -END(bcopy) .align 32 ENTRY(memcpy) @@ -504,236 +487,4 @@ ENTRY(memcpy) END(memcpy) -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; - - .align 32 -228: andcc %o2, 1, %g0 - be,pt %icc, 2f+4 -1: ldub [%o1 - 1], %o5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - be,pn %XCC, 229f - stb %o5, [%o0] -2: ldub [%o1 - 1], %o5 - sub %o0, 2, %o0 - ldub [%o1 - 2], %g5 - sub %o1, 2, %o1 - subcc %o2, 2, %o2 - stb %o5, [%o0 + 1] - bne,pt %XCC, 2b - stb %g5, [%o0] -229: retl - mov %g4, %o0 -out: retl - mov %g5, %o0 - - .align 32 -ENTRY(memmove) - mov %o0, %g5 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brz,pn %o2, out - sub %o0, %o1, %o4 - cmp %o4, %o2 - bgeu,pt %XCC, 218b - mov %o0, %g4 - add %o0, %o2, %o0 -220: add %o1, %o2, %o1 - cmp %o2, 15 - bleu,pn %XCC, 228b - andcc %o0, 7, %g2 - sub %o0, %o1, %g5 - andcc %g5, 3, %o5 - bne,pn %XCC, 232f - andcc %o1, 3, %g0 - be,a,pt %XCC, 236f - andcc %o1, 4, %g0 - andcc %o1, 1, %g0 - be,pn %XCC, 4f - andcc %o1, 2, %g0 - ldub [%o1 - 1], %g2 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - sub %o2, 1, %o2 - be,pn %XCC, 5f - stb %g2, [%o0] -4: lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sub %o0, 2, %o0 - sub %o2, 2, %o2 - sth %g2, [%o0] -5: andcc %o1, 4, %g0 -236: be,a,pn %XCC, 2f - andcc %o2, -128, %g6 - lduw [%o1 - 4], %g5 - sub %o1, 4, %o1 - sub %o0, 4, %o0 - sub %o2, 4, %o2 - stw %g5, [%o0] - andcc %o2, -128, %g6 -2: be,pn %XCC, 235f - andcc %o0, 4, %g0 - be,pn %XCC, 282f + 4 -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %XCC, 5b - sub %o0, 128, %o0 -235: andcc %o2, 0x70, %g6 -41: be,pn %XCC, 280f - andcc %o2, 8, %g0 - -279: rd %pc, %o5 - sll %g6, 1, %g5 - sub %o1, %g6, %o1 - sub %o5, %g5, %o5 - jmpl %o5 + %lo(280f - 279b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %XCC, 281f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - stw %g2, [%o0 + 4] - sub %o1, 8, %o1 - srlx %g2, 32, %g2 - stw %g2, [%o0] -281: be,pt %XCC, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o1, 4, %o1 - stw %g2, [%o0 - 4] - sub %o0, 4, %o0 -1: be,pt %XCC, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o0, 2, %o0 -1: be,pt %XCC, 211f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %XCC, 282b - sub %o0, 128, %o0 - andcc %o2, 0x70, %g6 - be,pn %XCC, 284f - andcc %o2, 8, %g0 - -283: rd %pc, %o5 - sub %o1, %g6, %o1 - sub %o5, %g6, %o5 - jmpl %o5 + %lo(284f - 283b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %XCC, 285f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - sub %o1, 8, %o1 - stx %g2, [%o0] -285: be,pt %XCC, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o0, 4, %o0 - sub %o1, 4, %o1 - stw %g2, [%o0] -1: be,pt %XCC, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o0, 2, %o0 - sub %o1, 2, %o1 - sth %g2, [%o0] -1: be,pt %XCC, 1f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - mov %g4, %o0 - -232: ldub [%o1 - 1], %g5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %XCC, 232b - stb %g5, [%o0] -234: retl - mov %g4, %o0 -END(memmove) - libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) From 3afd5a3b555694361181f854f658f669105d0ad6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 25 Feb 2010 23:52:07 -0800 Subject: [PATCH 11/31] sparc: Add multiarch support for memset/bzero/memcpy. 2010-02-25 David S. Miller * sysdeps/sparc/elf/rtld-global-offsets.sym: New file. * sysdeps/sparc/Makefile (csu): Add rtld-global-offsets.sym to gen-as-const-headers. * sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile: New file. * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S: New file. * sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S: New file. * sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S: Move to... * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S: ...here. * sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S: Move to... * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S: ...here. * sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S: Move to... * sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S: ...here. * sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S: Move to... * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S: ...here. * sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S: Removed. * sysdeps/sparc/sparc64/multiarch/Makefile: New file. * sysdeps/sparc/sparc64/sparcv9v/memcpy.S: Move to... * sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S: ...here. * sysdeps/sparc/sparc64/sparcv9v2/memcpy.S: Move to... * sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: ...here. * sysdeps/sparc/sparc64/sparcv9b/memcpy.S: Move to... * sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: ...here. * sysdeps/sparc/sparc64/sparcv9v/memset.S: Move to... * sysdeps/sparc/sparc64/multiarch/memset-niagara1.S: ...here. * sysdeps/sparc/sparc64/sparcv9v2/memset.S: Removed. * sysdeps/sparc/sparc64/multiarch/memcpy.S: New file. * sysdeps/sparc/sparc64/multiarch/memset.S: New file. --- ChangeLog | 28 ++++ sysdeps/sparc/Makefile | 5 + sysdeps/sparc/elf/rtld-global-offsets.sym | 7 + .../sparc/sparc32/sparcv9/multiarch/Makefile | 4 + .../sparcv9/multiarch/memcpy-niagara1.S | 2 + .../sparcv9/multiarch/memcpy-niagara2.S | 2 + .../sparc32/sparcv9/multiarch/memcpy-ultra3.S | 2 + .../sparc/sparc32/sparcv9/multiarch/memcpy.S | 4 + .../sparcv9/multiarch/memset-niagara1.S | 2 + .../sparc/sparc32/sparcv9/multiarch/memset.S | 4 + .../sparc/sparc32/sparcv9/sparcv9b/memcpy.S | 2 - .../sparc/sparc32/sparcv9/sparcv9v/memcpy.S | 2 - .../sparc/sparc32/sparcv9/sparcv9v/memset.S | 2 - .../sparc/sparc32/sparcv9/sparcv9v2/memcpy.S | 2 - .../sparc/sparc32/sparcv9/sparcv9v2/memset.S | 2 - sysdeps/sparc/sparc64/multiarch/Makefile | 4 + .../memcpy.S => multiarch/memcpy-niagara1.S} | 12 +- .../memcpy.S => multiarch/memcpy-niagara2.S} | 12 +- .../memcpy.S => multiarch/memcpy-ultra3.S} | 8 +- sysdeps/sparc/sparc64/multiarch/memcpy.S | 107 +++++++++++++ .../memset.S => multiarch/memset-niagara1.S} | 17 +- sysdeps/sparc/sparc64/multiarch/memset.S | 145 ++++++++++++++++++ sysdeps/sparc/sparc64/sparcv9v2/memset.S | 1 - 23 files changed, 344 insertions(+), 32 deletions(-) create mode 100644 sysdeps/sparc/elf/rtld-global-offsets.sym create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S delete mode 100644 sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S delete mode 100644 sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S delete mode 100644 sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S delete mode 100644 sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S delete mode 100644 sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S create mode 100644 sysdeps/sparc/sparc64/multiarch/Makefile rename sysdeps/sparc/sparc64/{sparcv9v/memcpy.S => multiarch/memcpy-niagara1.S} (98%) rename sysdeps/sparc/sparc64/{sparcv9v2/memcpy.S => multiarch/memcpy-niagara2.S} (99%) rename sysdeps/sparc/sparc64/{sparcv9b/memcpy.S => multiarch/memcpy-ultra3.S} (98%) create mode 100644 sysdeps/sparc/sparc64/multiarch/memcpy.S rename sysdeps/sparc/sparc64/{sparcv9v/memset.S => multiarch/memset-niagara1.S} (94%) create mode 100644 sysdeps/sparc/sparc64/multiarch/memset.S delete mode 100644 sysdeps/sparc/sparc64/sparcv9v2/memset.S diff --git a/ChangeLog b/ChangeLog index ed2409a539..f21abc5c3e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -27,6 +27,34 @@ * sysdeps/sparc/sparc64/sparcv9v/memcpy.S (bcopy, memmove): Likewise. * sysdeps/sparc/sparc64/sparcv9v2/memcpy.S (bcopy, memmove): Likewise. + * sysdeps/sparc/elf/rtld-global-offsets.sym: New file. + * sysdeps/sparc/Makefile (csu): Add rtld-global-offsets.sym to + gen-as-const-headers. + * sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile: New file. + * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S: New file. + * sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S: New file. + * sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S: Move to... + * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S: ...here. + * sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S: Move to... + * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S: ...here. + * sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S: Move to... + * sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S: ...here. + * sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S: Move to... + * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S: ...here. + * sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S: Removed. + * sysdeps/sparc/sparc64/multiarch/Makefile: New file. + * sysdeps/sparc/sparc64/sparcv9v/memcpy.S: Move to... + * sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S: ...here. + * sysdeps/sparc/sparc64/sparcv9v2/memcpy.S: Move to... + * sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: ...here. + * sysdeps/sparc/sparc64/sparcv9b/memcpy.S: Move to... + * sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: ...here. + * sysdeps/sparc/sparc64/sparcv9v/memset.S: Move to... + * sysdeps/sparc/sparc64/multiarch/memset-niagara1.S: ...here. + * sysdeps/sparc/sparc64/sparcv9v2/memset.S: Removed. + * sysdeps/sparc/sparc64/multiarch/memcpy.S: New file. + * sysdeps/sparc/sparc64/multiarch/memset.S: New file. + 2009-02-20 David S. Miller * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_runtime_setup): diff --git a/sysdeps/sparc/Makefile b/sysdeps/sparc/Makefile index 73b926554e..735e4a40db 100644 --- a/sysdeps/sparc/Makefile +++ b/sysdeps/sparc/Makefile @@ -10,3 +10,8 @@ endif ifeq ($(subdir),db2) CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_SPARC_GCC=1 endif + +ifeq ($(subdir),csu) +# get offset to rtld_global._dl_hwcap +gen-as-const-headers += rtld-global-offsets.sym +endif diff --git a/sysdeps/sparc/elf/rtld-global-offsets.sym b/sysdeps/sparc/elf/rtld-global-offsets.sym new file mode 100644 index 0000000000..ff4e97f2a6 --- /dev/null +++ b/sysdeps/sparc/elf/rtld-global-offsets.sym @@ -0,0 +1,7 @@ +#define SHARED 1 + +#include + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_HWCAP_OFFSET rtld_global_ro_offsetof (_dl_hwcap) diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile new file mode 100644 index 0000000000..4d45042a95 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ + memset-niagara1 +endif diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S new file mode 100644 index 0000000000..10aef85fe1 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S @@ -0,0 +1,2 @@ +#define XCC icc +#include diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S new file mode 100644 index 0000000000..6b1bf6ea70 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S @@ -0,0 +1,2 @@ +#define XCC icc +#include diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S new file mode 100644 index 0000000000..77adf151aa --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S @@ -0,0 +1,2 @@ +#define XCC icc +#include diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S new file mode 100644 index 0000000000..14df91e005 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S @@ -0,0 +1,4 @@ +#define ASI_PNF 0x82 +#define ASI_BLK_P 0xf0 +#define XCC icc +#include diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S new file mode 100644 index 0000000000..b432420876 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S @@ -0,0 +1,2 @@ +#define XCC icc +#include diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S new file mode 100644 index 0000000000..8f8264337d --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S @@ -0,0 +1,4 @@ +#define ASI_PNF 0x82 +#define ASI_BLK_P 0xf0 +#define XCC icc +#include diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S deleted file mode 100644 index 61960dce61..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S deleted file mode 100644 index 4c05f57bc2..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S deleted file mode 100644 index 5e46c7489f..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S deleted file mode 100644 index 7f4606037c..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S deleted file mode 100644 index 72de7bb0cf..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile new file mode 100644 index 0000000000..4d45042a95 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ + memset-niagara1 +endif diff --git a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S similarity index 98% rename from sysdeps/sparc/sparc64/sparcv9v/memcpy.S rename to sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S index 116c7113ff..6a78295e81 100644 --- a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S @@ -36,6 +36,8 @@ #define XCC xcc #endif +#if !defined NOT_IN_libc + .register %g2,#scratch .register %g3,#scratch .register %g6,#scratch @@ -43,10 +45,10 @@ .text .align 32 -ENTRY(memcpy) -#ifndef USE_BPR +ENTRY(__memcpy_niagara1) +# ifndef USE_BPR srl %o2, 0, %o2 -#endif +# endif 100: /* %o0=dst, %o1=src, %o2=len */ mov %o0, %g5 cmp %o2, 0 @@ -335,6 +337,6 @@ ENTRY(memcpy) retl mov %g5, %o0 -END(memcpy) +END(__memcpy_niagara1) -libc_hidden_builtin_def (memcpy) +#endif diff --git a/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S similarity index 99% rename from sysdeps/sparc/sparc64/sparcv9v2/memcpy.S rename to sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S index 300b12f427..35f6989aca 100644 --- a/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S @@ -138,6 +138,8 @@ LOAD(ldd, base + 0x28, %x5); \ LOAD(ldd, base + 0x30, %x6); +#if !defined NOT_IN_libc + .register %g2,#scratch .register %g3,#scratch .register %g6,#scratch @@ -145,10 +147,10 @@ .text .align 32 -ENTRY(memcpy) -#ifndef USE_BPR +ENTRY(__memcpy_niagara2) +# ifndef USE_BPR srl %o2, 0, %o2 -#endif +# endif 100: /* %o0=dst, %o1=src, %o2=len */ mov %o0, %g5 cmp %o2, 0 @@ -485,6 +487,6 @@ ENTRY(memcpy) retl mov %g5, %o0 -END(memcpy) +END(__memcpy_niagara2) -libc_hidden_builtin_def (memcpy) +#endif diff --git a/sysdeps/sparc/sparc64/sparcv9b/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S similarity index 98% rename from sysdeps/sparc/sparc64/sparcv9b/memcpy.S rename to sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S index 389e09d820..34ca089f93 100644 --- a/sysdeps/sparc/sparc64/sparcv9b/memcpy.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S @@ -31,6 +31,8 @@ #define XCC xcc #endif +#if !defined NOT_IN_libc + .register %g2,#scratch .register %g3,#scratch .register %g6,#scratch @@ -52,7 +54,7 @@ * of up to 2.4GB per second. */ .align 32 -ENTRY(memcpy) +ENTRY(__memcpy_ultra3) 100: /* %o0=dst, %o1=src, %o2=len */ mov %o0, %g5 @@ -313,6 +315,6 @@ small_copy_unaligned: retl mov %g5, %o0 -END(memcpy) +END(__memcpy_ultra3) -libc_hidden_builtin_def (memcpy) +#endif \ No newline at end of file diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy.S new file mode 100644 index 0000000000..a708de10e2 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memcpy.S @@ -0,0 +1,107 @@ +/* Multiple versions of memcpy + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + +#if !defined NOT_IN_libc + .text +ENTRY(memcpy) + .type memcpy, @gnu_indirect_function +# ifdef SHARED + mov %o7, %o5 + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3 + call 1f + or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3 +1: add %o7, %o3, %o3 + mov %o5, %o7 + sethi %hi(_rtld_global_ro), %o2 + or %o2, %lo(_rtld_global_ro), %o2 +# ifdef __arch64__ + ldx [%o3 + %o2], %o2 + ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2 +# else + ld [%o3 + %o2], %o2 + ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2 +# endif +# else + set _dl_hwcap, %o3 +# ifdef __arch64__ + ldx [%o3], %o2 +# else + ld [%o3 + 4], %o2 +# endif +# endif + andcc %o2, 0x80, %g0 ! HWCAP_SPARC_N2 + be 1f + andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT +# ifdef SHARED + sethi %gdop_hix22(__memcpy_niagara2), %o1 + xor %o1, %gdop_lox10(__memcpy_niagara2), %o1 +# else + set __memcpy_niagara2, %o1 +# endif + ba 10f + nop +1: be 1f + andcc %o2, 0x20, %g0 ! HWCAP_SPARC_ULTRA3 +# ifdef SHARED + sethi %gdop_hix22(__memcpy_niagara1), %o1 + xor %o1, %gdop_lox10(__memcpy_niagara1), %o1 +# else + set __memcpy_niagara1, %o1 +# endif + ba 10f + nop +1: be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__memcpy_ultra3), %o1 + xor %o1, %gdop_lox10(__memcpy_ultra3), %o1 +# else + set __memcpy_ultra3, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memcpy_ultra1), %o1 + xor %o1, %gdop_lox10(__memcpy_ultra1), %o1 +# else + set __memcpy_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(memcpy) + +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in a shared library. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_memcpy; __GI_memcpy = __memcpy_ultra1 + +#define memcpy __memcpy_ultra1 + +#endif + +#include "../memcpy.S" diff --git a/sysdeps/sparc/sparc64/sparcv9v/memset.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S similarity index 94% rename from sysdeps/sparc/sparc64/sparcv9v/memset.S rename to sysdeps/sparc/sparc64/multiarch/memset-niagara1.S index 64817b8871..20ea056216 100644 --- a/sysdeps/sparc/sparc64/sparcv9v/memset.S +++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S @@ -29,12 +29,14 @@ #define XCC xcc #endif +#if !defined NOT_IN_libc + .register %g2,#scratch .text .align 32 -ENTRY(memset) +ENTRY(__memset_niagara1) /* %o0=buf, %o1=pat, %o2=len */ and %o1, 0xff, %o3 mov %o2, %o1 @@ -45,14 +47,14 @@ ENTRY(memset) sllx %o2, 32, %g1 ba,pt %XCC, 1f or %g1, %o2, %o2 -END(memset) +END(__memset_niagara1) -ENTRY(__bzero) +ENTRY(__bzero_niagara1) clr %o2 1: -#ifndef USE_BRP +# ifndef USE_BRP srl %o1, 0, %o1 -#endif +# endif brz,pn %o1, 90f mov %o0, %o3 @@ -125,7 +127,6 @@ ENTRY(__bzero) 90: retl mov %o3, %o0 -END(__bzero) +END(__bzero_niagara1) -libc_hidden_builtin_def (memset) -weak_alias (__bzero, bzero) +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memset.S b/sysdeps/sparc/sparc64/multiarch/memset.S new file mode 100644 index 0000000000..23e513f18f --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memset.S @@ -0,0 +1,145 @@ +/* Multiple versions of memset and bzero + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + +#if !defined NOT_IN_libc + .text +ENTRY(memset) + .type memset, @gnu_indirect_function +# ifdef SHARED + mov %o7, %o5 + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3 + call 1f + or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3 +1: add %o7, %o3, %o3 + mov %o5, %o7 + sethi %hi(_rtld_global_ro), %o2 + or %o2, %lo(_rtld_global_ro), %o2 +# ifdef __arch64__ + ldx [%o3 + %o2], %o2 + ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2 +# else + ld [%o3 + %o2], %o2 + ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2 +# endif +# else + set _dl_hwcap, %o3 +# ifdef __arch64__ + ldx [%o3], %o2 +# else + ld [%o3 + 4], %o2 +# endif +# endif + andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT + be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__memset_niagara1), %o1 + xor %o1, %gdop_lox10(__memset_niagara1), %o1 +# else + set __memset_niagara1, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memset_ultra1), %o1 + xor %o1, %gdop_lox10(__memset_ultra1), %o1 +# else + set __memset_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(memset) + +ENTRY(__bzero) + .type bzero, @gnu_indirect_function +# ifdef SHARED + mov %o7, %o5 + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3 + call 1f + or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3 +1: add %o7, %o3, %o3 + mov %o5, %o7 + sethi %hi(_rtld_global_ro), %o2 + or %o2, %lo(_rtld_global_ro), %o2 +# ifdef __arch64__ + ldx [%o3 + %o2], %o2 + ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2 +# else + ld [%o3 + %o2], %o2 + ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2 +# endif +# else + set _dl_hwcap, %o3 +# ifdef __arch64__ + ldx [%o3], %o2 +# else + ld [%o3 + 4], %o2 +# endif +# endif + andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT + be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__bzero_niagara1), %o1 + xor %o1, %gdop_lox10(__bzero_niagara1), %o1 +# else + set __bzero_niagara1, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memset_ultra1), %o1 + xor %o1, %gdop_lox10(__memset_ultra1), %o1 +# else + set __bzero_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__bzero) + +weak_alias (__bzero, bzero) + +# undef weak_alias +# define weak_alias(a, b) + +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in a shared library. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_memset; __GI_memset = __memset_ultra1 + +#define memset __memset_ultra1 +#define __bzero __bzero_ultra1 + +#endif + +#include "../memset.S" diff --git a/sysdeps/sparc/sparc64/sparcv9v2/memset.S b/sysdeps/sparc/sparc64/sparcv9v2/memset.S deleted file mode 100644 index 809d3ed9c6..0000000000 --- a/sysdeps/sparc/sparc64/sparcv9v2/memset.S +++ /dev/null @@ -1 +0,0 @@ -#include From dc21aae6c11c65b26cc27fc1172d669956fbacb7 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 26 Feb 2010 19:54:50 -0800 Subject: [PATCH 12/31] Fix unwind info in x86 strcmp-sse4.S --- ChangeLog | 5 +++++ sysdeps/i386/i686/multiarch/strcmp-sse4.S | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index dc3c8d7347..e72551b471 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2010-02-20 H.J. Lu + + * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Use cfi_remember_state + and cfi_restore_state only if USE_AS_STRNCMP is defined. + 2010-02-24 Ulrich Drepper * sysdeps/x86_64/Implies: Add ieee754/dbl-64/wordsize-64 entry. diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S index d5fd23e15c..81d6ec66f7 100644 --- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S +++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S @@ -178,7 +178,9 @@ L(first4bytes): PUSH (%ebx) PUSH (%edi) PUSH (%esi) +#ifdef USE_AS_STRNCMP cfi_remember_state +#endif mov %edx, %edi mov %eax, %esi xorl %eax, %eax @@ -246,8 +248,8 @@ L(ret): ret .p2align 4 - cfi_restore_state #ifdef USE_AS_STRNCMP + cfi_restore_state L(more16byteseq): POP (%esi) POP (%edi) From 741895aa0a5e00a70c37f8f097af55cb9a8318e9 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 26 Feb 2010 19:56:04 -0800 Subject: [PATCH 13/31] Add BZ number. --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index e72551b471..4b03d9dbfd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,6 @@ 2010-02-20 H.J. Lu + [BZ #11332] * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Use cfi_remember_state and cfi_restore_state only if USE_AS_STRNCMP is defined. From f2899a62cfd037ac59dddb4dc62c7a2d91238fa7 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 28 Feb 2010 16:26:27 -0800 Subject: [PATCH 14/31] Define NT_X86_XSTATE constant for ELF core files. --- ChangeLog | 4 ++++ elf/elf.h | 1 + 2 files changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4b03d9dbfd..27b4e35e4a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2010-02-28 Roland McGrath + + * elf/elf.h (NT_X86_XSTATE): New macro. + 2010-02-20 H.J. Lu [BZ #11332] diff --git a/elf/elf.h b/elf/elf.h index 204a0f9e19..b6b6410c51 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -619,6 +619,7 @@ typedef struct #define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ +#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ /* Legal values for the note segment descriptor types for object files. */ From 7ec1221ff7a5e3faa4e58cdfeb3722b2958499e2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 3 Mar 2010 02:10:22 -0800 Subject: [PATCH 15/31] sparc: Use ba,a,pt in PLTs and fix bugs in R_SPARC_JMP_IREL handling. 2010-03-03 David S. Miller * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_lazy_rel): Must pass '1' for 't' argument to sparc_fixup_plt. * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_lazy_rel): Likewise. * sysdeps/sparc/sparc32/dl-plt.h (OPCODE_BA_PT): Define. (sparc_fixup_plt): Document 't' argument. Enable branch optimization and use v9 branches when possible. Explain why we cannot unconditionally patch the branch into the first PLT instruction. * sysdeps/sparc/sparc64/dl-plt.h (sparc64_fixup_plt): Document 't' argument. Use v9 branches when possible. Explain why we can in fact unconditionally use a branch in the first PLT instruction here. --- ChangeLog | 16 ++++++++++ sysdeps/sparc/sparc32/dl-machine.h | 2 +- sysdeps/sparc/sparc32/dl-plt.h | 47 ++++++++++++++++++++++++++---- sysdeps/sparc/sparc64/dl-machine.h | 2 +- sysdeps/sparc/sparc64/dl-plt.h | 29 ++++++++++++++---- 5 files changed, 83 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index b8118dd8f0..bd9f6ecab4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +2010-03-03 David S. Miller + + * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_lazy_rel): Must + pass '1' for 't' argument to sparc_fixup_plt. + * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_lazy_rel): + Likewise. + * sysdeps/sparc/sparc32/dl-plt.h (OPCODE_BA_PT): Define. + (sparc_fixup_plt): Document 't' argument. Enable branch + optimization and use v9 branches when possible. Explain why we + cannot unconditionally patch the branch into the first PLT + instruction. + * sysdeps/sparc/sparc64/dl-plt.h (sparc64_fixup_plt): Document 't' + argument. Use v9 branches when possible. Explain why we can in + fact unconditionally use a branch in the first PLT instruction + here. + 2010-02-28 Roland McGrath * elf/elf.h (NT_X86_XSTATE): New macro. diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h index 53257104a6..9631db32e1 100644 --- a/sysdeps/sparc/sparc32/dl-machine.h +++ b/sysdeps/sparc/sparc32/dl-machine.h @@ -563,7 +563,7 @@ elf_machine_lazy_rel (struct link_map *map, { Elf32_Addr value = map->l_addr + reloc->r_addend; value = ((Elf32_Addr (*) (void)) value) (); - sparc_fixup_plt (reloc, reloc_addr, value, 0, 1); + sparc_fixup_plt (reloc, reloc_addr, value, 1, 1); } else if (r_type == R_SPARC_NONE) ; diff --git a/sysdeps/sparc/sparc32/dl-plt.h b/sysdeps/sparc/sparc32/dl-plt.h index edcc5c1374..bfb891fe69 100644 --- a/sysdeps/sparc/sparc32/dl-plt.h +++ b/sysdeps/sparc/sparc32/dl-plt.h @@ -25,19 +25,55 @@ #define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */ #define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */ #define OPCODE_BA 0x30800000 /* b,a ?; add PC-rel word address */ +#define OPCODE_BA_PT 0x30480000 /* ba,a,pt %icc, ?; add PC-rel word address */ static inline __attribute__ ((always_inline)) Elf32_Addr sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr, Elf32_Addr value, int t, int do_flush) { - Elf32_Sword disp = value - (Elf32_Addr) reloc_addr; + Elf32_Sword disp; - if (0 && disp >= -0x800000 && disp < 0x800000) + /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap, + in which case we'll be resolving all PLT entries and thus can + optimize by overwriting instructions starting at the first PLT entry + instruction and we need not be mindful of thread safety. + + Otherwise, 't' is '1'. */ + reloc_addr += t; + disp = value - (Elf32_Addr) reloc_addr; + + if (disp >= -0x800000 && disp < 0x800000) { - /* Don't need to worry about thread safety. We're writing just one - instruction. */ + unsigned int insn = OPCODE_BA | ((disp >> 2) & 0x3fffff); + +#ifdef __sparc_v9__ + /* On V9 we can do even better by using a branch with + prediction if we fit into the even smaller 19-bit + displacement field. */ + if (disp >= -0x100000 && disp < 0x100000) + insn = OPCODE_BA_PT | ((disp >> 2) & 0x07ffff); +#endif + + /* Even if we are writing just a single branch, we must not + ignore the 't' offset. Consider a case where we have some + PLT slots which can be optimized into a single branch and + some which cannot. Then we can end up with a PLT which looks + like: + + PLT4.0: sethi %(PLT_4_INDEX), %g1 + sethi %(fully_resolved_sym_4), %g1 + jmp %g1 + %lo(fully_resolved_sym_4) + PLT5.0: ba,a fully_resolved_sym_5 + ba,a PLT0.0 + ... + + The delay slot of that jmp must always be either a sethi to + %g1 or a nop. But if we try to place this displacement + branch there, PLT4.0 will jump to fully_resolved_sym_4 for 1 + instruction and then go immediately to + fully_resolved_sym_5. */ - reloc_addr[0] = OPCODE_BA | ((disp >> 2) & 0x3fffff); + reloc_addr[0] = insn; if (do_flush) __asm __volatile ("flush %0" : : "r"(reloc_addr)); } @@ -48,7 +84,6 @@ sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr, need not be done during bootstrapping, since there are no threads. But we also can't tell if we _can_ use flush, so don't. */ - reloc_addr += t; reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff); if (do_flush) __asm __volatile ("flush %0+4" : : "r"(reloc_addr)); diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h index 4c915eb586..fcfbb06ac2 100644 --- a/sysdeps/sparc/sparc64/dl-machine.h +++ b/sysdeps/sparc/sparc64/dl-machine.h @@ -661,7 +661,7 @@ elf_machine_lazy_rel (struct link_map *map, { /* 'high' is always zero, for large PLT entries the linker emits an R_SPARC_IRELATIVE. */ - sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 0); + sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 1); } else *reloc_addr = value; diff --git a/sysdeps/sparc/sparc64/dl-plt.h b/sysdeps/sparc/sparc64/dl-plt.h index e06be43a0a..ca2fe3bbd8 100644 --- a/sysdeps/sparc/sparc64/dl-plt.h +++ b/sysdeps/sparc/sparc64/dl-plt.h @@ -28,7 +28,14 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc, Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr; Elf64_Sxword disp = value - plt_vaddr; - /* Now move plt_vaddr up to the call instruction. */ + /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap, + in which case we'll be resolving all PLT entries and thus can + optimize by overwriting instructions starting at the first PLT entry + instruction and we need not be mindful of thread safety. + + Otherwise, 't' is '1'. + + Now move plt_vaddr up to the call instruction. */ plt_vaddr += ((t + 1) * 4); /* PLT entries .PLT32768 and above look always the same. */ @@ -39,10 +46,22 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc, /* Near destination. */ else if (disp >= -0x800000 && disp < 0x800000) { - /* As this is just one instruction, it is thread safe and so - we can avoid the unnecessary sethi FOO, %g1. - b,a target */ - insns[0] = 0x30800000 | ((disp >> 2) & 0x3fffff); + unsigned int insn; + + /* ba,a */ + insn = 0x30800000 | ((disp >> 2) & 0x3fffff); + + if (disp >= -0x100000 && disp < 0x100000) + { + /* ba,a,pt %icc */ + insn = 0x30480000 | ((disp >> 2) & 0x07ffff); + } + + /* As this is just one instruction, it is thread safe and so we + can avoid the unnecessary sethi FOO, %g1. Each 64-bit PLT + entry is 8 instructions long, so we can't run into the 'jmp' + delay slot problems 32-bit PLTs can. */ + insns[0] = insn; __asm __volatile ("flush %0" : : "r" (insns)); } /* 32-bit Sparc style, the target is in the lower 32-bits of From 321029f1c35db68c9d2e15408871030a36694541 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Wed, 3 Mar 2010 02:11:46 -0800 Subject: [PATCH 16/31] sparc64: Fix msgrcv() msgrcv() does not work on sparc64, as it passes the 6th argument using the ipc kludge, while the kernel waits for a 6 arguments syscall. This patches fixes the problem by using a sparc64 specific version of msgrcv.c. 2010-03-03 Aurelien Jarno * sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c: New file. --- ChangeLog | 4 ++ .../unix/sysv/linux/sparc/sparc64/msgrcv.c | 49 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c diff --git a/ChangeLog b/ChangeLog index bd9f6ecab4..b3b1acf224 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2010-03-03 Aurelien Jarno + + * sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c: New file. + 2010-03-03 David S. Miller * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_lazy_rel): Must diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c b/sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c new file mode 100644 index 0000000000..117762c779 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c @@ -0,0 +1,49 @@ +/* Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include + +#include +#include + +#include + +ssize_t +__libc_msgrcv (msqid, msgp, msgsz, msgtyp, msgflg) + int msqid; + void *msgp; + size_t msgsz; + long int msgtyp; + int msgflg; +{ + if (SINGLE_THREAD_P) + return INLINE_SYSCALL (ipc, 6, IPCOP_msgrcv, msqid, msgsz, msgflg, + CHECK_N (msgp, msgsz), msgtyp); + + int oldtype = LIBC_CANCEL_ASYNC (); + + ssize_t result = INLINE_SYSCALL (ipc, 6, IPCOP_msgrcv, msqid, msgsz, msgflg, + CHECK_N (msgp, msgsz), msgtyp); + + LIBC_CANCEL_RESET (oldtype); + + return result; +} +weak_alias (__libc_msgrcv, msgrcv) From 2e9337f58ab3165b54acd8ebd84577e3a51f79be Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 3 Mar 2010 05:58:09 -0800 Subject: [PATCH 17/31] (INSERT_WORDS64): Fix argument order. --- ChangeLog | 4 ++++ math/math_private.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index b3b1acf224..a9e7a04b4f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2010-03-03 David S. Miller + + * math/math_private.h (INSERT_WORDS64): Fix argument order. + 2010-03-03 Aurelien Jarno * sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c: New file. diff --git a/math/math_private.h b/math/math_private.h index fade7e1181..e5ca61f0b8 100644 --- a/math/math_private.h +++ b/math/math_private.h @@ -111,7 +111,7 @@ do { \ } while (0) /* Get all in one, efficient on 64-bit machines. */ -#define INSERT_WORDS64(i,d) \ +#define INSERT_WORDS64(d,i) \ do { \ ieee_double_shape_type iw_u; \ iw_u.word = (i); \ From d8c34cd19fca0b33776db2ce727cbc03fbe04df1 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 3 Mar 2010 15:37:12 -0800 Subject: [PATCH 18/31] More in.h definitions from the Linux kernel. --- ChangeLog | 5 +++++ sysdeps/unix/sysv/linux/bits/in.h | 30 +++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index a9e7a04b4f..d4a70ae76b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2010-03-03 Ulrich Drepper + + * sysdeps/unix/sysv/linux/bits/in.h: Add a few more defintions from the + kernel headers. + 2010-03-03 David S. Miller * math/math_private.h (INSERT_WORDS64): Fix argument order. diff --git a/sysdeps/unix/sysv/linux/bits/in.h b/sysdeps/unix/sysv/linux/bits/in.h index b457a1790f..0aa0d6638a 100644 --- a/sysdeps/unix/sysv/linux/bits/in.h +++ b/sysdeps/unix/sysv/linux/bits/in.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1991-1999, 2000, 2004, 2008 Free Software Foundation, Inc. +/* Copyright (C) 1991-1999, 2000, 2004, 2008, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -56,14 +56,26 @@ # define MCAST_INCLUDE 1 #endif -#define IP_ROUTER_ALERT 5 /* bool */ -#define IP_PKTINFO 8 /* bool */ -#define IP_PKTOPTIONS 9 -#define IP_PMTUDISC 10 /* obsolete name? */ -#define IP_MTU_DISCOVER 10 /* int; see below */ -#define IP_RECVERR 11 /* bool */ -#define IP_RECVTTL 12 /* bool */ -#define IP_RECVTOS 13 /* bool */ +#define IP_ROUTER_ALERT 5 /* bool */ +#define IP_PKTINFO 8 /* bool */ +#define IP_PKTOPTIONS 9 +#define IP_PMTUDISC 10 /* obsolete name? */ +#define IP_MTU_DISCOVER 10 /* int; see below */ +#define IP_RECVERR 11 /* bool */ +#define IP_RECVTTL 12 /* bool */ +#define IP_RECVTOS 13 /* bool */ +#define IP_MTU 14 /* int */ +#define IP_FREEBIND 15 +#define IP_IPSEC_POLICY 16 +#define IP_XFRM_POLICY 17 +#define IP_PASSSEC 18 +#define IP_TRANSPARENT 19 + +/* TProxy original addresses */ +#define IP_ORIGDSTADDR 20 +#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR + +#define IP_MINTTL 21 /* IP_MTU_DISCOVER arguments. */ From 605277213e41805f45390adb122e6a320d571f6d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 4 Mar 2010 01:02:45 -0800 Subject: [PATCH 19/31] sparc64: Use dbl-64/wordsize-64 math lib code. * sysdeps/sparc/sparc64/Implies: Add ieee754/dbl-64/wordsize-64 entry. --- ChangeLog | 4 ++++ sysdeps/sparc/sparc64/Implies | 1 + 2 files changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index a9e7a04b4f..94220551dc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2010-03-04 David S. Miller + + * sysdeps/sparc/sparc64/Implies: Add ieee754/dbl-64/wordsize-64 entry. + 2010-03-03 David S. Miller * math/math_private.h (INSERT_WORDS64): Fix argument order. diff --git a/sysdeps/sparc/sparc64/Implies b/sysdeps/sparc/sparc64/Implies index 01bf14e73f..7abc50efcc 100644 --- a/sysdeps/sparc/sparc64/Implies +++ b/sysdeps/sparc/sparc64/Implies @@ -1,6 +1,7 @@ wordsize-64 # SPARC uses IEEE 754 floating point. ieee754/ldbl-128 +ieee754/dbl-64/wordsize-64 ieee754/dbl-64 ieee754/flt-32 sparc/sparc64/soft-fp From 65ba5b4661fc328e7ac2a709cdf9470cb80b2491 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 4 Mar 2010 04:24:15 -0800 Subject: [PATCH 20/31] sparc32: Remove unused code. 2010-03-04 David S. Miller * sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S: Delete. * sysdeps/sparc/sparc32/udiv_qrnnd.S: Delete. --- ChangeLog | 3 + sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S | 215 --------------------- sysdeps/sparc/sparc32/udiv_qrnnd.S | 168 ---------------- 3 files changed, 3 insertions(+), 383 deletions(-) delete mode 100644 sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S delete mode 100644 sysdeps/sparc/sparc32/udiv_qrnnd.S diff --git a/ChangeLog b/ChangeLog index 94220551dc..8277d14736 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,9 @@ * sysdeps/sparc/sparc64/Implies: Add ieee754/dbl-64/wordsize-64 entry. + * sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S: Delete. + * sysdeps/sparc/sparc32/udiv_qrnnd.S: Delete. + 2010-03-03 David S. Miller * math/math_private.h (INSERT_WORDS64): Fix argument order. diff --git a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S b/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S deleted file mode 100644 index c3f097118f..0000000000 --- a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S +++ /dev/null @@ -1,215 +0,0 @@ -! SPARC __udiv_qrnnd division support, used from longlong.h. - -! Copyright (C) 1993, 1994 Free Software Foundation, Inc. - -! This file is part of the GNU MP Library. - -! The GNU MP Library is free software; you can redistribute it and/or modify -! it under the terms of the GNU Lesser General Public License as published by -! the Free Software Foundation; either version 2.1 of the License, or (at your -! option) any later version. - -! The GNU MP Library is distributed in the hope that it will be useful, but -! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -! License for more details. - -! You should have received a copy of the GNU Lesser General Public License -! along with the GNU MP Library; see the file COPYING.LIB. If not, write to -! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - -! INPUT PARAMETERS -! rem_ptr o0 -! n1 o1 -! n0 o2 -! d o3 - -#include "sysdep.h" - -ENTRY(__udiv_qrnnd) - tst %o3 - bneg LOC(largedivisor) - mov 8,%g1 - - b LOC(p1) - addxcc %o2,%o2,%o2 - -LOC(plop): - bcc LOC(n1) - addxcc %o2,%o2,%o2 -LOC(p1): - addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc LOC(n2) - addxcc %o2,%o2,%o2 -LOC(p2): - addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc LOC(n3) - addxcc %o2,%o2,%o2 -LOC(p3): - addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc LOC(n4) - addxcc %o2,%o2,%o2 -LOC(p4): - addx %o1,%o1,%o1 - addcc %g1,-1,%g1 - bne LOC(plop) - subcc %o1,%o3,%o4 - bcc LOC(n5) - addxcc %o2,%o2,%o2 -LOC(p5): - st %o1,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(nlop): - bcc LOC(p1) - addxcc %o2,%o2,%o2 -LOC(n1): - addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc LOC(p2) - addxcc %o2,%o2,%o2 -LOC(n2): - addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc LOC(p3) - addxcc %o2,%o2,%o2 -LOC(n3): - addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc LOC(p4) - addxcc %o2,%o2,%o2 -LOC(n4): - addx %o4,%o4,%o4 - addcc %g1,-1,%g1 - bne LOC(nlop) - subcc %o4,%o3,%o1 - bcc LOC(p5) - addxcc %o2,%o2,%o2 -LOC(n5): - st %o4,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(largedivisor): - and %o2,1,%o5 ! %o5 = n0 & 1 - - srl %o2,1,%o2 - sll %o1,31,%g2 - or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1) - srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1) - - and %o3,1,%g2 - srl %o3,1,%g3 ! %g3 = floor(d / 2) - add %g3,%g2,%g3 ! %g3 = ceil(d / 2) - - b LOC(Lp1) - addxcc %o2,%o2,%o2 - -LOC(Lplop): - bcc LOC(Ln1) - addxcc %o2,%o2,%o2 -LOC(Lp1): - addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc LOC(Ln2) - addxcc %o2,%o2,%o2 -LOC(Lp2): - addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc LOC(Ln3) - addxcc %o2,%o2,%o2 -LOC(Lp3): - addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc LOC(Ln4) - addxcc %o2,%o2,%o2 -LOC(Lp4): - addx %o1,%o1,%o1 - addcc %g1,-1,%g1 - bne LOC(Lplop) - subcc %o1,%g3,%o4 - bcc LOC(Ln5) - addxcc %o2,%o2,%o2 -LOC(Lp5): - add %o1,%o1,%o1 ! << 1 - tst %g2 - bne LOC(Oddp) - add %o5,%o1,%o1 - st %o1,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(Lnlop): - bcc LOC(Lp1) - addxcc %o2,%o2,%o2 -LOC(Ln1): - addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc LOC(Lp2) - addxcc %o2,%o2,%o2 -LOC(Ln2): - addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc LOC(Lp3) - addxcc %o2,%o2,%o2 -LOC(Ln3): - addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc LOC(Lp4) - addxcc %o2,%o2,%o2 -LOC(Ln4): - addx %o4,%o4,%o4 - addcc %g1,-1,%g1 - bne LOC(Lnlop) - subcc %o4,%g3,%o1 - bcc LOC(Lp5) - addxcc %o2,%o2,%o2 -LOC(Ln5): - add %o4,%o4,%o4 ! << 1 - tst %g2 - bne LOC(Oddn) - add %o5,%o4,%o4 - st %o4,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(Oddp): - xnor %g0,%o2,%o2 - ! q' in %o2. r' in %o1 - addcc %o1,%o2,%o1 - bcc LOC(Lp6) - addx %o2,0,%o2 - sub %o1,%o3,%o1 -LOC(Lp6): - subcc %o1,%o3,%g0 - bcs LOC(Lp7) - subx %o2,-1,%o2 - sub %o1,%o3,%o1 -LOC(Lp7): - st %o1,[%o0] - retl - mov %o2,%o0 - -LOC(Oddn): - xnor %g0,%o2,%o2 - ! q' in %o2. r' in %o4 - addcc %o4,%o2,%o4 - bcc LOC(Ln6) - addx %o2,0,%o2 - sub %o4,%o3,%o4 -LOC(Ln6): - subcc %o4,%o3,%g0 - bcs LOC(Ln7) - subx %o2,-1,%o2 - sub %o4,%o3,%o4 -LOC(Ln7): - st %o4,[%o0] - retl - mov %o2,%o0 - -END(__udiv_qrnnd) diff --git a/sysdeps/sparc/sparc32/udiv_qrnnd.S b/sysdeps/sparc/sparc32/udiv_qrnnd.S deleted file mode 100644 index 4955318a66..0000000000 --- a/sysdeps/sparc/sparc32/udiv_qrnnd.S +++ /dev/null @@ -1,168 +0,0 @@ -! SPARC __udiv_qrnnd division support, used from longlong.h. -! -! Copyright (C) 1993, 1994, 1997 Free Software Foundation, Inc. -! -! This file is part of the GNU MP Library. -! -! The GNU MP Library is free software; you can redistribute it and/or modify -! it under the terms of the GNU Lesser General Public License as published by -! the Free Software Foundation; either version 2.1 of the License, or (at your -! option) any later version. -! -! The GNU MP Library is distributed in the hope that it will be useful, but -! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -! License for more details. -! -! You should have received a copy of the GNU Lesser General Public License -! along with the GNU MP Library; see the file COPYING.LIB. If not, write to -! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -! -! Added PIC support - May/96, Miguel de Icaza -! -! INPUT PARAMETERS -! rem_ptr i0 -! n1 i1 -! n0 i2 -! d i3 - -#include -#undef ret /* Kludge for glibc */ - -#ifdef PIC - .text -#else - .section .rodata,#alloc -#endif - .align 8 - - .type two_to_32,@object - .size two_to_32,8 -two_to_32: - .double 0r4294967296 - - .type two_to_31,@object - .size two_to_31,8 -two_to_31: - .double 0r2147483648 - - .text -ENTRY(__udiv_qrnnd) - !#PROLOGUE# 0 - save %sp,-104,%sp - !#PROLOGUE# 1 - st %i1,[%fp-8] - ld [%fp-8],%f10 -#ifdef PIC -LOC(base): - call 1f - fitod %f10,%f4 -1: ldd [%o7-(LOC(base)-two_to_32)],%f8 -#else - sethi %hi(two_to_32),%o7 - fitod %f10,%f4 - ldd [%o7+%lo(two_to_32)],%f8 -#endif - cmp %i1,0 - bge LOC(248) - mov %i0,%i5 - faddd %f4,%f8,%f4 -LOC(248): - st %i2,[%fp-8] - ld [%fp-8],%f10 - fmuld %f4,%f8,%f6 - cmp %i2,0 - bge LOC(249) - fitod %f10,%f2 - faddd %f2,%f8,%f2 -LOC(249): - st %i3,[%fp-8] - faddd %f6,%f2,%f2 - ld [%fp-8],%f10 - cmp %i3,0 - bge LOC(250) - fitod %f10,%f4 - faddd %f4,%f8,%f4 -LOC(250): - fdivd %f2,%f4,%f2 -#ifdef PIC - ldd [%o7-(LOC(base)-two_to_31)],%f4 -#else - sethi %hi(two_to_31),%o7 - ldd [%o7+%lo(two_to_31)],%f4 -#endif - fcmped %f2,%f4 - nop - fbge,a LOC(251) - fsubd %f2,%f4,%f2 - fdtoi %f2,%f2 - st %f2,[%fp-8] - b LOC(252) - ld [%fp-8],%i4 -LOC(251): - fdtoi %f2,%f2 - st %f2,[%fp-8] - ld [%fp-8],%i4 - sethi %hi(-2147483648),%g2 - xor %i4,%g2,%i4 -LOC(252): - wr %g0,%i4,%y - sra %i3,31,%g2 - and %i4,%g2,%g2 - andcc %g0,0,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,0,%g1 - add %g1,%g2,%i0 - rd %y,%g3 - subcc %i2,%g3,%o7 - subxcc %i1,%i0,%g0 - be LOC(253) - cmp %o7,%i3 - - add %i4,-1,%i0 - add %o7,%i3,%o7 - st %o7,[%i5] - ret - restore -LOC(253): - blu LOC(246) - mov %i4,%i0 - add %i4,1,%i0 - sub %o7,%i3,%o7 -LOC(246): - st %o7,[%i5] - ret - restore - -END(__udiv_qrnnd) From e39acb1f165c467e99509146c95f69c7444521e3 Mon Sep 17 00:00:00 2001 From: Richard Guenther Date: Thu, 4 Mar 2010 19:33:41 -0800 Subject: [PATCH 21/31] Fix R_X86_64_PC32 overflow detection --- ChangeLog | 5 +++++ sysdeps/x86_64/dl-machine.h | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1946aa666b..eac8ecd965 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2010-03-02 Richard Guenther + + * sysdeps/x86_64/dl-machine.h (elf_machine_rela): R_X86_64_PC32 + is sign-extending. + 2010-03-03 Ulrich Drepper * sysdeps/unix/sysv/linux/bits/in.h: Add a few more definitions from diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index 61a0556d5e..f615e9591f 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -1,5 +1,5 @@ /* Machine-dependent ELF dynamic relocation inline functions. x86-64 version. - Copyright (C) 2001-2005, 2006, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 2001-2006, 2008, 2009, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger . @@ -419,7 +419,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, case R_X86_64_PC32: value += reloc->r_addend - (Elf64_Addr) reloc_addr; *(unsigned int *) reloc_addr = value; - if (__builtin_expect (value != (unsigned int) value, 0)) + if (__builtin_expect (value != (int) value, 0)) { fmt = "\ %s: Symbol `%s' causes overflow in R_X86_64_PC32 relocation\n"; From 3df9903e416a1c6d8157bb806c5fd342aabecbed Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Thu, 4 Mar 2010 19:49:04 -0800 Subject: [PATCH 22/31] Define new x86-64 relocation symbols --- ChangeLog | 5 +++++ elf/elf.h | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index eac8ecd965..e9832a632c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2010-03-04 Ulrich Drepper + + * elf/elf.h (R_X86_64_SIZE32): Define. + (R_X86_64_SIZE64): Define. + 2010-03-02 Richard Guenther * sysdeps/x86_64/dl-machine.h (elf_machine_rela): R_X86_64_PC32 diff --git a/elf/elf.h b/elf/elf.h index b6b6410c51..fd6236b8cf 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -2666,7 +2666,9 @@ typedef Elf32_Addr Elf32_Conflict; #define R_X86_64_GOTOFF64 25 /* 64 bit offset to GOT */ #define R_X86_64_GOTPC32 26 /* 32 bit signed pc relative offset to GOT */ -/* 27 .. 33 */ +/* 27 .. 31 */ +#define R_X86_64_SIZE32 32 /* Size of symbol plus 32-bit addend */ +#define R_X86_64_SIZE64 33 /* Size of symbol plus 64-bit addend */ #define R_X86_64_GOTPC32_TLSDESC 34 /* GOT offset for TLS descriptor. */ #define R_X86_64_TLSDESC_CALL 35 /* Marker for call through TLS descriptor. */ From 01f1f5ee8b58d5a3dce5250ed3514e9dfa5ceff4 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 4 Mar 2010 21:30:17 -0800 Subject: [PATCH 23/31] Pass -mtune=i686 to assembler when compiling for i686 --- ChangeLog | 9 +++++++++ config.make.in | 1 + configure | 2 ++ configure.in | 1 + sysdeps/i386/configure | 19 +++++++++++++++++++ sysdeps/i386/configure.in | 8 ++++++++ sysdeps/i386/i686/Makefile | 16 ++++++++++++++++ 7 files changed, 56 insertions(+) diff --git a/ChangeLog b/ChangeLog index e9832a632c..57a64e9179 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2010-02-18 H.J. Lu + + * config.make.in (config-asflags-i686): Define. + * configure.in: Substitute libc_cv_as_i686. + * sysdeps/i386/configure.in: Check if assembler supports -mtune=i686. + * sysdeps/i386/i686/Makefile (CFLAGS-.oX): Add -Wa,-mtune=i686 + if assembler supports -mtune=i686. + (ASFLAGS-.oX): Likewise. + 2010-03-04 Ulrich Drepper * elf/elf.h (R_X86_64_SIZE32): Define. diff --git a/config.make.in b/config.make.in index d65706ceac..adfda9d9a4 100644 --- a/config.make.in +++ b/config.make.in @@ -36,6 +36,7 @@ asflags-cpu = @libc_cv_cc_submachine@ config-cflags-sse4 = @libc_cv_cc_sse4@ config-cflags-avx = @libc_cv_cc_avx@ +config-asflags-i686 = @libc_cv_as_i686@ defines = @DEFINES@ sysincludes = @SYSINCLUDES@ diff --git a/configure b/configure index 49ac30db46..b6752d147c 100755 --- a/configure +++ b/configure @@ -657,6 +657,7 @@ xcoff elf ldd_rewrite_script use_ldconfig +libc_cv_as_i686 libc_cv_cc_avx libc_cv_cc_sse4 libc_cv_cpp_asm_debuginfo @@ -8830,6 +8831,7 @@ fi + if test $elf = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_ELF 1 diff --git a/configure.in b/configure.in index 8e506026b8..56849dfc48 100644 --- a/configure.in +++ b/configure.in @@ -2323,6 +2323,7 @@ dnl sysdeps/CPU/configure.in checks set this via arch-specific asm tests AC_SUBST(libc_cv_cpp_asm_debuginfo) AC_SUBST(libc_cv_cc_sse4) AC_SUBST(libc_cv_cc_avx) +AC_SUBST(libc_cv_as_i686) AC_SUBST(use_ldconfig) AC_SUBST(ldd_rewrite_script) diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure index f0c2758a5e..7814b3b313 100644 --- a/sysdeps/i386/configure +++ b/sysdeps/i386/configure @@ -637,3 +637,22 @@ if test $libc_cv_cc_sse4 = yes; then _ACEOF fi + +{ $as_echo "$as_me:$LINENO: checking for assembler -mtune=i686 support" >&5 +$as_echo_n "checking for assembler -mtune=i686 support... " >&6; } +if test "${libc_cv_as_i686+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if { ac_try='${CC-cc} -Wa,-mtune=i686 -xc /dev/null -S -o /dev/null' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + libc_cv_as_i686=yes +else + libc_cv_as_i686=no +fi +fi +{ $as_echo "$as_me:$LINENO: result: $libc_cv_as_i686" >&5 +$as_echo "$libc_cv_as_i686" >&6; } diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in index 12dceaf844..9fc7fa59fe 100644 --- a/sysdeps/i386/configure.in +++ b/sysdeps/i386/configure.in @@ -47,3 +47,11 @@ fi]) if test $libc_cv_cc_sse4 = yes; then AC_DEFINE(HAVE_SSE4_SUPPORT) fi + +dnl Check if -Wa,-mtune=i686 works. +AC_CACHE_CHECK(for assembler -mtune=i686 support, libc_cv_as_i686, [dnl +if AC_TRY_COMMAND([${CC-cc} -Wa,-mtune=i686 -xc /dev/null -S -o /dev/null]); then + libc_cv_as_i686=yes +else + libc_cv_as_i686=no +fi]) diff --git a/sysdeps/i386/i686/Makefile b/sysdeps/i386/i686/Makefile index dbcf1c33d3..e6b2924584 100644 --- a/sysdeps/i386/i686/Makefile +++ b/sysdeps/i386/i686/Makefile @@ -9,3 +9,19 @@ stack-align-test-flags += -msse ifeq ($(subdir),string) sysdep_routines += cacheinfo endif + +ifeq (yes,$(config-asflags-i686)) +CFLAGS-.o += -Wa,-mtune=i686 +CFLAGS-.os += -Wa,-mtune=i686 +CFLAGS-.op += -Wa,-mtune=i686 +CFLAGS-.og += -Wa,-mtune=i686 +CFLAGS-.ob += -Wa,-mtune=i686 +CFLAGS-.oS += -Wa,-mtune=i686 + +ASFLAGS-.o += -Wa,-mtune=i686 +ASFLAGS-.os += -Wa,-mtune=i686 +ASFLAGS-.op += -Wa,-mtune=i686 +ASFLAGS-.og += -Wa,-mtune=i686 +ASFLAGS-.ob += -Wa,-mtune=i686 +ASFLAGS-.oS += -Wa,-mtune=i686 +endif From 66f1b8eeb20c49c8566abeeacf36540f258ad5f4 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 5 Mar 2010 11:23:28 -0800 Subject: [PATCH 24/31] Fix setxid race with thread creation --- nptl/ChangeLog | 11 +++++++++++ nptl/allocatestack.c | 13 ++++++++++--- nptl/pthread_create.c | 9 ++++++++- nptl/sysdeps/pthread/createthread.c | 14 ++++++++++---- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/nptl/ChangeLog b/nptl/ChangeLog index c81eb03b75..e0e1a5392f 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,3 +1,14 @@ +2010-03-05 Andreas Schwab + Ulrich Drepper + + * allocatestack.c (setxid_mark_thread): Delay handling of thread if + it is creating a thread or it is just being created. + * pthread_create.c (start_thread): Wake setxid thread if it is + waiting. + (__pthread_create_2_1): Initialize setxid_futex. + * sysdeps/pthread/createthread.c (do_clone): Wake setxid thread if it + is waiting. + 2010-01-15 Ulrich Drepper * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S: diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c index 3c3585fe37..899c0e8eee 100644 --- a/nptl/allocatestack.c +++ b/nptl/allocatestack.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2007, 2009 Free Software Foundation, Inc. +/* Copyright (C) 2002-2007, 2009, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 2002. @@ -380,7 +380,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, - TLS_TCB_SIZE - adj); #elif TLS_DTV_AT_TP pd = (struct pthread *) (((uintptr_t) attr->stackaddr - - __static_tls_size - adj) + - __static_tls_size - adj) - TLS_PRE_TCB_SIZE); #endif @@ -546,7 +546,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, #ifndef __ASSUME_PRIVATE_FUTEX /* The thread must know when private futexes are supported. */ pd->header.private_futex = THREAD_GETMEM (THREAD_SELF, - header.private_futex); + header.private_futex); #endif #ifdef NEED_DL_SYSINFO @@ -969,6 +969,13 @@ setxid_mark_thread (struct xid_command *cmdp, struct pthread *t) { int ch; + /* Wait until this thread is cloned. */ + if (t->setxid_futex == -1 + && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1)) + do + lll_futex_wait (&t->setxid_futex, -2, LLL_PRIVATE); + while (t->setxid_futex == -2); + /* Don't let the thread exit before the setxid handler runs. */ t->setxid_futex = 0; diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c index 89938b3fb8..194a8ba0ab 100644 --- a/nptl/pthread_create.c +++ b/nptl/pthread_create.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2007,2008,2009 Free Software Foundation, Inc. +/* Copyright (C) 2002-2007,2008,2009,2010 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 2002. @@ -239,6 +239,10 @@ start_thread (void *arg) /* Initialize resolver state pointer. */ __resp = &pd->res; + /* Allow setxid from now onwards. */ + if (__builtin_expect (atomic_exchange_acq (&pd->setxid_futex, 0) == -2, 0)) + lll_futex_wake (&pd->setxid_futex, 1, LLL_PRIVATE); + #ifdef __NR_set_robust_list # ifndef __ASSUME_SET_ROBUST_LIST if (__set_robust_list_avail >= 0) @@ -538,6 +542,9 @@ __pthread_create_2_1 (newthread, attr, start_routine, arg) } } + /* Don't allow setxid until cloned. */ + pd->setxid_futex = -1; + /* Pass the descriptor to the caller. */ *newthread = (pthread_t) pd; diff --git a/nptl/sysdeps/pthread/createthread.c b/nptl/sysdeps/pthread/createthread.c index 66fafe8050..3bb3915281 100644 --- a/nptl/sysdeps/pthread/createthread.c +++ b/nptl/sysdeps/pthread/createthread.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2007, 2008 Free Software Foundation, Inc. +/* Copyright (C) 2002-2007, 2008, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 2002. @@ -28,7 +28,7 @@ #include "kernel-features.h" -#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) +#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) /* Unless otherwise specified, the thread "register" is going to be initialized with a pointer to the TCB. */ @@ -72,8 +72,14 @@ do_clone (struct pthread *pd, const struct pthread_attr *attr, that cares whether the thread count is correct. */ atomic_increment (&__nptl_nthreads); - if (ARCH_CLONE (fct, STACK_VARIABLES_ARGS, clone_flags, - pd, &pd->tid, TLS_VALUE, &pd->tid) == -1) + int rc = ARCH_CLONE (fct, STACK_VARIABLES_ARGS, clone_flags, + pd, &pd->tid, TLS_VALUE, &pd->tid); + + /* Allow setxid from now onwards. */ + if (__builtin_expect (atomic_exchange_acq (&pd->setxid_futex, 0) == -2, 0)) + lll_futex_wake (&pd->setxid_futex, 1, LLL_PRIVATE); + + if (__builtin_expect (rc == -1, 0)) { atomic_decrement (&__nptl_nthreads); /* Oops, we lied for a second. */ From a092b645f84c2af4a302f00e242be595ed1a2117 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Fri, 5 Mar 2010 16:52:31 -0800 Subject: [PATCH 25/31] Fix DEBUG statements in resolv/res_send.c --- ChangeLog | 6 ++++++ resolv/res_send.c | 39 ++++++++++++++++++++------------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/ChangeLog b/ChangeLog index 57a64e9179..9bdbb0a161 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2010-02-18 Yann Droneaud + + * resolv/res_send.c: Fixed DEBUG statements. + Moved tmpbuf declaration to block beginning and + updated pointer names used in multiple DprintQ() calls + 2010-02-18 H.J. Lu * config.make.in (config-asflags-i686): Define. diff --git a/resolv/res_send.c b/resolv/res_send.c index 28a47e42b8..b0966ae036 100644 --- a/resolv/res_send.c +++ b/resolv/res_send.c @@ -490,6 +490,9 @@ __libc_res_nsend(res_state statp, const u_char *buf, int buflen, for (try = 0; try < statp->retry; try++) { for (ns = 0; ns < MAXNS; ns++) { +#ifdef DEBUG + char tmpbuf[40]; +#endif struct sockaddr_in6 *nsap = EXT(statp).nsaddrs[ns]; if (nsap == NULL) @@ -530,9 +533,6 @@ __libc_res_nsend(res_state statp, const u_char *buf, int buflen, } #endif -#ifdef DEBUG - char tmpbuf[40]; -#endif Dprint(statp->options & RES_DEBUG, (stdout, ";; Querying server (# %d) address = %s\n", ns + 1, inet_ntop(AF_INET6, &nsap->sin6_addr, @@ -575,11 +575,12 @@ __libc_res_nsend(res_state statp, const u_char *buf, int buflen, (statp->pfcode & RES_PRF_REPLY), (stdout, "%s", ""), ans, (resplen > anssiz) ? anssiz : resplen); - if (buf2 != NULL) + if (buf2 != NULL) { DprintQ((statp->options & RES_DEBUG) || (statp->pfcode & RES_PRF_REPLY), (stdout, "%s", ""), *ansp2, (*resplen2 > *nansp2) ? *nansp2 : *resplen2); + } /* * If we have temporarily opened a virtual circuit, @@ -883,7 +884,7 @@ send_vc(res_state statp, (statp->pfcode & RES_PRF_REPLY), (stdout, ";; old answer (unexpected):\n"), *thisansp, - (rlen > *thisanssiz) ? *thisanssiz: rlen); + (rlen > *thisanssizp) ? *thisanssizp: rlen); goto read_len; } @@ -1186,7 +1187,7 @@ send_dg(res_state statp, */ Dprint(statp->options & RES_DEBUG, (stdout, ";; undersized: %d\n", - *thisresplen)); + *thisresplenp)); *terrno = EMSGSIZE; goto err_out; } @@ -1201,8 +1202,8 @@ send_dg(res_state statp, (statp->pfcode & RES_PRF_REPLY), (stdout, ";; old answer:\n"), thisansp, - (*thisresplen > *thisanssiz) - ? *thisanssiz : *thisresplen); + (*thisresplenp > *thisanssizp) + ? *thisanssizp : *thisresplenp); goto wait; } if (!(statp->options & RES_INSECURE1) && @@ -1216,8 +1217,8 @@ send_dg(res_state statp, (statp->pfcode & RES_PRF_REPLY), (stdout, ";; not our server:\n"), thisansp, - (*thisresplen > *thisanssiz) - ? *thisanssiz : *thisresplen); + (*thisresplenp > *thisanssizp) + ? *thisanssizp : *thisresplenp); goto wait; } #ifdef RES_USE_EDNS0 @@ -1232,9 +1233,9 @@ send_dg(res_state statp, DprintQ(statp->options & RES_DEBUG, (stdout, "server rejected query with EDNS0:\n"), - thisans, - (*thisresplen > *thisanssiz) - ? *thisanssiz : *thisresplen); + thisansp, + (*thisresplenp > *thisanssizp) + ? *thisanssizp : *thisresplenp); /* record the error */ statp->_flags |= RES_F_EDNS0ERR; goto err_out; @@ -1258,8 +1259,8 @@ send_dg(res_state statp, (statp->pfcode & RES_PRF_REPLY), (stdout, ";; wrong query name:\n"), thisansp, - (*thisresplen > *thisanssiz) - ? *thisanssiz : *thisresplen); + (*thisresplenp > *thisanssizp) + ? *thisanssizp : *thisresplenp); goto wait; } if (anhp->rcode == SERVFAIL || @@ -1268,8 +1269,8 @@ send_dg(res_state statp, DprintQ(statp->options & RES_DEBUG, (stdout, "server rejected query:\n"), thisansp, - (*thisresplen > *thisanssiz) - ? *thisanssiz : *thisresplen); + (*thisresplenp > *thisanssizp) + ? *thisanssizp : *thisresplenp); if (recvresp1 || (buf2 != NULL && recvresp2)) return resplen; @@ -1295,8 +1296,8 @@ send_dg(res_state statp, DprintQ(statp->options & RES_DEBUG, (stdout, "referred query:\n"), thisansp, - (*thisresplen > *thisanssiz) - ? *thisanssiz : *thisresplen); + (*thisresplenp > *thisanssizp) + ? *thisanssizp : *thisresplenp); goto next_ns; } if (!(statp->options & RES_IGNTC) && anhp->tc) { From 6e0a06fa40320187544d8daccd87dca728192253 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 6 Mar 2010 15:40:50 -0800 Subject: [PATCH 26/31] Handle ext4 and logfs in statvfs functions. --- ChangeLog | 5 ++ sysdeps/unix/sysv/linux/internal_statvfs.c | 15 ++++-- sysdeps/unix/sysv/linux/linux_fsinfo.h | 61 ++++++++++++---------- 3 files changed, 48 insertions(+), 33 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9bdbb0a161..0b18684168 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2010-03-06 Ulrich Drepper + + * sysdeps/unix/sysv/linux/internal_statvfs.c: Handle ext4 and logfs. + * sysdeps/unix/sysv/linux/linux_fsinfo.h: Add entry for logfs. + 2010-02-18 Yann Droneaud * resolv/res_send.c: Fixed DEBUG statements. diff --git a/sysdeps/unix/sysv/linux/internal_statvfs.c b/sysdeps/unix/sysv/linux/internal_statvfs.c index 28c1cb691f..9a6f4edac1 100644 --- a/sysdeps/unix/sysv/linux/internal_statvfs.c +++ b/sysdeps/unix/sysv/linux/internal_statvfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1998-2003, 2004, 2005, 2006 Free Software Foundation, Inc. +/* Copyright (C) 1998-2006, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1998. @@ -45,13 +45,15 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st) const char *fsname = NULL; const char *fsname2 = NULL; + const char *fsname3 = NULL; /* Map the filesystem type we got from the statfs call to a string. */ switch (fstype) { case EXT2_SUPER_MAGIC: - fsname = "ext3"; - fsname2 = "ext2"; + fsname = "ext4"; + fsname2 = "ext3"; + fsname3 = "ext2"; break; case DEVPTS_SUPER_MAGIC: fsname= "devpts"; @@ -98,6 +100,9 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st) case NTFS_SUPER_MAGIC: fsname = "ntfs"; break; + case LOGFS_MAGIC_U32: + fsname = "logfs"; + break; } FILE *mtab = __setmntent ("/proc/mounts", "r"); @@ -126,7 +131,9 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st) else if (fsname != NULL && strcmp (fsname, mntbuf.mnt_type) != 0 && (fsname2 == NULL - || strcmp (fsname2, mntbuf.mnt_type) != 0)) + || strcmp (fsname2, mntbuf.mnt_type) != 0) + && (fsname3 == NULL + || strcmp (fsname3, mntbuf.mnt_type) != 0)) continue; /* Find out about the device the current entry is for. */ diff --git a/sysdeps/unix/sysv/linux/linux_fsinfo.h b/sysdeps/unix/sysv/linux/linux_fsinfo.h index 8c6591ada3..b10e98b46f 100644 --- a/sysdeps/unix/sysv/linux/linux_fsinfo.h +++ b/sysdeps/unix/sysv/linux/linux_fsinfo.h @@ -1,5 +1,5 @@ /* Constants from kernel header for various FSes. - Copyright (C) 1998,1999,2000,2001,2002,2003,2005 Free Software Foundation, Inc. + Copyright (C) 1998-2003,2005,2010 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -25,22 +25,22 @@ filesystem types will become available we have to add the appropriate definitions here.*/ -/* Constants that identify the `adfs' filesystem. */ +/* Constant that identifies the `adfs' filesystem. */ #define ADFS_SUPER_MAGIC 0xadf5 -/* Constants that identify the `affs' filesystem. */ +/* Constant that identifies the `affs' filesystem. */ #define AFFS_SUPER_MAGIC 0xadff -/* Constants that identify the `autofs' filesystem. */ +/* Constant that identifies the `autofs' filesystem. */ #define AUTOFS_SUPER_MAGIC 0x187 -/* Constants that identify the `bfs' filesystem. */ +/* Constant that identifies the `bfs' filesystem. */ #define BFS_MAGIC 0x1BADFACE -/* Constants that identify the `coda' filesystem. */ +/* Constant that identifies the `coda' filesystem. */ #define CODA_SUPER_MAGIC 0x73757245 -/* Constants that identify the `coherent' filesystem. */ +/* Constant that identifies the `coherent' filesystem. */ #define COH_SUPER_MAGIC 0x012ff7b7 /* Constant that identifies the `ramfs' filesystem. */ @@ -52,7 +52,7 @@ /* Constant that identifies the `devpts' filesystem. */ #define DEVPTS_SUPER_MAGIC 0x1cd1 -/* Constant that identifies the `efs' filesystem. */ +/* Constants that identifies the `efs' filesystem. */ #define EFS_SUPER_MAGIC 0x414A53 #define EFS_MAGIC 0x072959 @@ -74,6 +74,9 @@ /* Constant that identifies the `jfs' filesystem. */ #define JFS_SUPER_MAGIC 0x3153464a +/* Constant that identifies the `logfs´ filesystem. */ +#define LOGFS_MAGIC_U32 0xc97e8168u + /* Constants that identify the `minix2' filesystem. */ #define MINIX2_SUPER_MAGIC 0x2468 #define MINIX2_SUPER_MAGIC2 0x2478 @@ -82,62 +85,62 @@ #define MINIX_SUPER_MAGIC 0x137f #define MINIX_SUPER_MAGIC2 0x138F -/* Constants that identify the `msdos' filesystem. */ +/* Constant that identifies the `msdos' filesystem. */ #define MSDOS_SUPER_MAGIC 0x4d44 -/* Constants that identify the `ncp' filesystem. */ +/* Constant that identifies the `ncp' filesystem. */ #define NCP_SUPER_MAGIC 0x564c -/* Constants that identify the `nfs' filesystem. */ +/* Constant that identifies the `nfs' filesystem. */ #define NFS_SUPER_MAGIC 0x6969 -/* Constants that identify the `ntfs' filesystem. */ +/* Constant that identifies the `ntfs' filesystem. */ #define NTFS_SUPER_MAGIC 0x5346544e -/* Constants that identify the `proc' filesystem. */ +/* Constant that identifies the `proc' filesystem. */ #define PROC_SUPER_MAGIC 0x9fa0 /* Constant that identifies the `usbdevfs' filesystem. */ #define USBDEVFS_SUPER_MAGIC 0x9fa2 -/* Constants that identify the `qnx4' filesystem. */ +/* Constant that identifies the `qnx4' filesystem. */ #define QNX4_SUPER_MAGIC 0x002f -/* Constants that identify the `reiser' filesystem. */ +/* Constant that identifies the `reiser' filesystem. */ #define REISERFS_SUPER_MAGIC 0x52654973 /* Constant that identifies the `romfs' filesystem. */ #define ROMFS_SUPER_MAGIC 0x7275 -/* Constants that identify the `smb' filesystem. */ +/* Constant that identifies the `shm' filesystem. */ +#define SHMFS_SUPER_MAGIC 0x01021994 + +/* Constant that identifies the `smb' filesystem. */ #define SMB_SUPER_MAGIC 0x517b +/* Constant that identifies the `sysfs´ filesystem. */ +#define SYSFS_MAGIC 0x62656572 + /* Constants that identify the `sysV' filesystem. */ #define SYSV2_SUPER_MAGIC 0x012ff7b6 #define SYSV4_SUPER_MAGIC 0x012ff7b5 -/* Constants that identify the `udf' filesystem. */ +/* Constant that identifies the `udf' filesystem. */ #define UDF_SUPER_MAGIC 0x15013346 -/* Constants that identify the `ufs' filesystem. */ +/* Constant that identify the `ufs' filesystem. */ #define UFS_MAGIC 0x00011954 #define UFS_CIGAM 0x54190100 /* byteswapped MAGIC */ -/* Constants that identify the `xenix' filesystem. */ -#define XENIX_SUPER_MAGIC 0x012ff7b4 +/* Constant that identifies the `vxfs' filesystem. */ +#define VXFS_SUPER_MAGIC 0xa501fcf5 -/* Constant that identifies the `shm' filesystem. */ -#define SHMFS_SUPER_MAGIC 0x01021994 +/* Constant that identifies the `xenix' filesystem. */ +#define XENIX_SUPER_MAGIC 0x012ff7b4 -/* Constants that identify the `xfs' filesystem. */ +/* Constant that identifies the `xfs' filesystem. */ #define XFS_SUPER_MAGIC 0x58465342 -/* Constants that identify the `vxfs' filesystem. */ -#define VXFS_SUPER_MAGIC 0xa501fcf5 - -/* Constants that identify the `sysfs´ filesystem. */ -#define SYSFS_MAGIC 0x62656572 - /* Maximum link counts. */ #define COH_LINK_MAX 10000 #define EXT2_LINK_MAX 32000 From b886abfbacc5799dd29d4068338a488b17964e51 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 7 Mar 2010 18:22:52 -0800 Subject: [PATCH 27/31] Fix one left over from last change to statvfs. --- ChangeLog | 5 +++++ sysdeps/unix/sysv/linux/internal_statvfs.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 0b18684168..87c7717468 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2010-03-07 Ulrich Drepper + + * sysdeps/unix/sysv/linux/internal_statvfs.c (__statvfs_getflags): + Little follow-up patch from last change. + 2010-03-06 Ulrich Drepper * sysdeps/unix/sysv/linux/internal_statvfs.c: Handle ext4 and logfs. diff --git a/sysdeps/unix/sysv/linux/internal_statvfs.c b/sysdeps/unix/sysv/linux/internal_statvfs.c index 9a6f4edac1..59b173ed73 100644 --- a/sysdeps/unix/sysv/linux/internal_statvfs.c +++ b/sysdeps/unix/sysv/linux/internal_statvfs.c @@ -183,7 +183,7 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st) { /* Try without a filesystem name. */ assert (fsname != NULL); - fsname = fsname2 = NULL; + fsname = fsname2 = fsname3 = NULL; } /* It is not strictly allowed to use rewind here. But From 058e9ba9fd03b86246b8ed3c49392fa783a5c337 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 8 Mar 2010 17:01:14 +0100 Subject: [PATCH 28/31] Revert to original version of setxid race fix --- nptl/ChangeLog | 6 ++++++ nptl/allocatestack.c | 9 +++++++++ nptl/pthread_create.c | 3 --- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/nptl/ChangeLog b/nptl/ChangeLog index e0e1a5392f..fa4c8f2f9f 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,3 +1,9 @@ +2010-03-08 Andreas Schwab + + * pthread_create.c (__pthread_create_2_1): Don't set setxid_futex. + * allocatestack.c (get_cached_stack): Set setxid_futex. + (allocate_stack): Likewise. + 2010-03-05 Andreas Schwab Ulrich Drepper diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c index 899c0e8eee..831e98e4ce 100644 --- a/nptl/allocatestack.c +++ b/nptl/allocatestack.c @@ -213,6 +213,9 @@ get_cached_stack (size_t *sizep, void **memp) return NULL; } + /* Don't allow setxid until cloned. */ + result->setxid_futex = -1; + /* Dequeue the entry. */ stack_list_del (&result->list); @@ -418,6 +421,9 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, /* The process ID is also the same as that of the caller. */ pd->pid = THREAD_GETMEM (THREAD_SELF, pid); + /* Don't allow setxid until cloned. */ + pd->setxid_futex = -1; + /* Allocate the DTV for this thread. */ if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL) { @@ -554,6 +560,9 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO; #endif + /* Don't allow setxid until cloned. */ + pd->setxid_futex = -1; + /* The process ID is also the same as that of the caller. */ pd->pid = THREAD_GETMEM (THREAD_SELF, pid); diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c index 194a8ba0ab..14e3cf784b 100644 --- a/nptl/pthread_create.c +++ b/nptl/pthread_create.c @@ -542,9 +542,6 @@ __pthread_create_2_1 (newthread, attr, start_routine, arg) } } - /* Don't allow setxid until cloned. */ - pd->setxid_futex = -1; - /* Pass the descriptor to the caller. */ *newthread = (pthread_t) pd; From d6d1c4c87c3f2e60d9aab570aefc3e63d0467f33 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Mar 2010 03:48:24 -0800 Subject: [PATCH 29/31] sparc64: Fix handling of R_SPARC_TLS_LE_* relocations. --- ChangeLog | 6 ++++++ sysdeps/sparc/sparc64/dl-machine.h | 10 ++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 87c7717468..2963971d48 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2010-03-09 David S. Miller + + * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_rela): Handling + of R_SPARC_TLS_LE_* needs to use 32-bit loads and stores, not + 64-bit ones. + 2010-03-07 Ulrich Drepper * sysdeps/unix/sysv/linux/internal_statvfs.c (__statvfs_getflags): diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h index fcfbb06ac2..82ab5a4547 100644 --- a/sysdeps/sparc/sparc64/dl-machine.h +++ b/sysdeps/sparc/sparc64/dl-machine.h @@ -513,11 +513,13 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, value = sym->st_value - sym_map->l_tls_offset + reloc->r_addend; if (r_type == R_SPARC_TLS_LE_HIX22) - *reloc_addr = (*reloc_addr & 0xffc00000) - | (((~value) >> 10) & 0x3fffff); + *(unsigned int *)reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffc00000) + | (((~value) >> 10) & 0x3fffff)); else - *reloc_addr = (*reloc_addr & 0xffffe000) | (value & 0x3ff) - | 0x1c00; + *(unsigned int *)reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffffe000) | (value & 0x3ff) + | 0x1c00); } break; # endif From 2fe000dfd673859eb3b1e1c9739de66445d9fe08 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Mar 2010 06:37:45 -0800 Subject: [PATCH 30/31] sparc32: Fix non-v9 build failure in memcpy. --- ChangeLog | 2 ++ sysdeps/sparc/sparc32/memcpy.S | 23 ++++++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2963971d48..afb549b93f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,8 @@ of R_SPARC_TLS_LE_* needs to use 32-bit loads and stores, not 64-bit ones. + * sysdeps/sparc/sparc32/memcpy.S: Fix build. + 2010-03-07 Ulrich Drepper * sysdeps/unix/sysv/linux/internal_statvfs.c (__statvfs_getflags): diff --git a/sysdeps/sparc/sparc32/memcpy.S b/sysdeps/sparc/sparc32/memcpy.S index c9c7c40e81..748a0862fe 100644 --- a/sysdeps/sparc/sparc32/memcpy.S +++ b/sysdeps/sparc/sparc32/memcpy.S @@ -117,10 +117,27 @@ ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */ bleu 90f andcc %o1, 3, %g0 - bne 78b -3: andcc %o1, 4, %g0 + be 78f + andcc %o1, 4, %g0 - be 2f + andcc %o1, 1, %g0 + be 4f + andcc %o1, 2, %g0 + + ldub [%o1], %g2 + add %o1, 1, %o1 + stb %g2, [%o0] + sub %o2, 1, %o2 + bne 77f + add %o0, 1, %o0 +4: lduh [%o1], %g2 + add %o1, 2, %o1 + sth %g2, [%o0] + sub %o2, 2, %o2 + add %o0, 2, %o0 + +77: andcc %o1, 4, %g0 +78: be 2f mov %o2, %g1 ld [%o1], %o4 From 462a5227b0d3220ab68f65272bd5b9d6d4f49b1f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Mar 2010 06:42:53 -0800 Subject: [PATCH 31/31] sparc: Optimize strlen using techniques from powerpc implementation. --- ChangeLog | 5 + sysdeps/sparc/sparc32/sparcv9/strlen.S | 3 - sysdeps/sparc/sparc32/strlen.S | 128 ++++++--------- sysdeps/sparc/sparc64/strlen.S | 210 +++++++------------------ 4 files changed, 115 insertions(+), 231 deletions(-) diff --git a/ChangeLog b/ChangeLog index afb549b93f..7e5e7bb5b2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,11 @@ * sysdeps/sparc/sparc32/memcpy.S: Fix build. + * sysdeps/sparc/sparc32/strlen.S: Optimize. + * sysdeps/sparc/sparc64/strlen.S: Likewise. + * sysdeps/sparc/sparc32/sparcv9/strlen.S (ASI_PNF, ASI_BLK_P, + XCC): Delete definitions, not needed. + 2010-03-07 Ulrich Drepper * sysdeps/unix/sysv/linux/internal_statvfs.c (__statvfs_getflags): diff --git a/sysdeps/sparc/sparc32/sparcv9/strlen.S b/sysdeps/sparc/sparc32/sparcv9/strlen.S index b8f4dba4f4..28a216c076 100644 --- a/sysdeps/sparc/sparc32/sparcv9/strlen.S +++ b/sysdeps/sparc/sparc32/sparcv9/strlen.S @@ -1,4 +1 @@ -#define ASI_PNF 0x82 -#define ASI_BLK_P 0xf0 -#define XCC icc #include diff --git a/sysdeps/sparc/sparc32/strlen.S b/sysdeps/sparc/sparc32/strlen.S index ed92f20e28..2945bb5484 100644 --- a/sysdeps/sparc/sparc32/strlen.S +++ b/sysdeps/sparc/sparc32/strlen.S @@ -1,8 +1,9 @@ /* Determine the length of a string. For SPARC v7. - Copyright (C) 1996, 1999, 2003 Free Software Foundation, Inc. + Copyright (C) 1996, 1999, 2003, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Jakub Jelinek . + Contributed by Jakub Jelinek and + David S. Miller . The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -21,86 +22,55 @@ #include - /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x01010101) & (~xword) & 0x80808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - .text .align 4 ENTRY(strlen) - mov %o0, %o1 - andcc %o0, 3, %g0 - be 20f - sethi %hi(0x80808080), %o4 - - ldub [%o0], %o5 - cmp %o5, 0 - be 21f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be 4f - or %o4, %lo(0x80808080), %o3 - ldub [%o0], %o5 - cmp %o5, 0 - be 22f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be 5f - sethi %hi(0x01010101), %o4 - ldub [%o0], %o5 - cmp %o5, 0 - be 23f - add %o0, 1, %o0 - b 11f - or %o4, %lo(0x01010101), %o2 -21: retl - mov 0, %o0 -22: retl - mov 1, %o0 -23: retl - mov 2, %o0 - -20: or %o4, %lo(0x80808080), %o3 -4: sethi %hi(0x01010101), %o4 -5: or %o4, %lo(0x01010101), %o2 -11: ld [%o0], %o5 -12: sub %o5, %o2, %o4 -#ifdef EIGHTBIT_NOT_RARE - andn %o4, %o5, %o4 -#endif - andcc %o4, %o3, %g0 - be 11b - add %o0, 4, %o0 - - srl %o5, 24, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o0, -4, %o4 - srl %o5, 16, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o4, 1, %o4 - srl %o5, 8, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o4, 1, %o4 - andcc %o5, 0xff, %g0 - bne,a 12b - ld [%o0], %o5 - add %o4, 1, %o4 -13: retl - sub %o4, %o1, %o0 + mov %o0, %o1 + andn %o0, 0x3, %o0 + + ld [%o0], %o5 + and %o1, 0x3, %g1 + mov -1, %g5 + + sethi %hi(0x01010101), %o2 + sll %g1, 3, %g1 + + or %o2, %lo(0x01010101), %o2 + srl %g5, %g1, %g2 + + orn %o5, %g2, %o5 + sll %o2, 7, %o3 +10: add %o0, 4, %o0 + + andn %o3, %o5, %g1 + sub %o5, %o2, %g2 + + andcc %g1, %g2, %g0 + be,a 10b + ld [%o0], %o5 + + srl %o5, 24, %g1 + + andcc %g1, 0xff, %g0 + be 90f + sub %o0, 4, %o0 + + srl %o5, 16, %g2 + + andcc %g2, 0xff, %g0 + be 90f + add %o0, 1, %o0 + + srl %o5, 8, %g1 + + andcc %g1, 0xff, %g0 + be 90f + add %o0, 1, %o0 + + add %o0, 1, %o0 + +90: retl + sub %o0, %o1, %o0 END(strlen) libc_hidden_builtin_def (strlen) diff --git a/sysdeps/sparc/sparc64/strlen.S b/sysdeps/sparc/sparc64/strlen.S index cc15e4e3fb..64350fb05e 100644 --- a/sysdeps/sparc/sparc64/strlen.S +++ b/sysdeps/sparc/sparc64/strlen.S @@ -1,8 +1,9 @@ /* Determine the length of a string. For SPARC v9. - Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc. + Copyright (C) 1998, 1999, 2003, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Jan Vondrak and - Jakub Jelinek . + Contributed by Jan Vondrak , + Jakub Jelinek , and + David S. Miller . The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -20,155 +21,66 @@ 02111-1307 USA. */ #include -#include - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ + + .register %g2, #scratch + .register %g3, #scratch .text .align 32 ENTRY(strlen) - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - ldub [%o0], %o3 /* Load */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - mov %o0, %o1 /* IEU1 */ - - sllx %g1, 32, %g4 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - or %g1, %g4, %g1 /* IEU0 Group */ - brz,pn %o3, 13f /* CTI+IEU1 */ - - sllx %g1, 7, %g4 /* IEU0 Group */ - bne,a,pn %icc, 15f /* CTI */ - add %o0, 1, %o0 /* IEU1 */ - /* %g1 = 0x0101010101010101 * - * %g4 = 0x8080808080808080 * - * %o0 = string pointer * - * %o1 = start of string */ -1: ldx [%o0], %o3 /* Load Group */ - - add %o0, 8, %o0 /* IEU1 */ -2: sub %o3, %g1, %o2 /* IEU0 Group */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %o3, %o5 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o5, %g4, %g0 /* IEU1 Group */ -#else - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g4, %g0 /* IEU1 Group */ -#endif - - be,pt %xcc, 2b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g5 /* IEU1 Group */ -#ifdef EIGHTBIT_NOT_RARE - srlx %o5, 32, %o5 /* IEU0 */ - -3: andcc %o5, %g4, %g0 /* IEU1 Group */ -#else - srlx %o2, 32, %o2 /* IEU0 */ - -3: andcc %o2, %g4, %g0 /* IEU1 Group */ -#endif - be,pn %xcc, 4f /* CTI */ - srlx %g5, 56, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 12f /* CTI */ - srlx %g5, 48, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 11f /* CTI */ - - srlx %g5, 40, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 10f /* CTI */ - srlx %g5, 32, %o2 /* IEU0 */ - - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 9f /* CTI */ -4: srlx %g5, 24, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 8f /* CTI */ - srlx %g5, 16, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 7f /* CTI */ - - srlx %g5, 8, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 6f /* CTI */ - sub %o3, %g1, %o2 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g4, %g0 /* IEU1 Group */ - - be,pt %xcc, 2b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g5 /* IEU1 Group */ - ba,pt %xcc, 3b /* CTI */ - - srlx %o2, 32, %o2 /* IEU0 */ -5: add %o0, -9, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ - -6: add %o0, -10, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -7: add %o0, -11, %o0 /* IEU0 Group */ - - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -8: add %o0, -12, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, %o1, %o0 /* IEU0 */ -9: add %o0, -13, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ - -10: add %o0, -14, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -11: add %o0, -15, %o0 /* IEU0 Group */ - - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -12: add %o0, -16, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, %o1, %o0 /* IEU0 */ -13: retl /* CTI+IEU1 Group */ - mov 0, %o0 /* IEU0 */ - nop - -15: ldub [%o0], %o3 /* Load Group */ -16: andcc %o0, 7, %g0 /* IEU1 */ - be,pn %icc, 1b /* CTI */ - nop /* IEU0 Group */ - - add %o0, 1, %o0 /* IEU1 */ - andcc %o3, 0xff, %g0 /* IEU1 Group */ - bne,a,pt %icc, 16b /* CTI */ - lduba [%o0] ASI_PNF, %o3 /* Load */ - - add %o0, -1, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ + mov %o0, %o1 + andn %o0, 0x7, %o0 + + ldx [%o0], %o5 + and %o1, 0x7, %g1 + mov -1, %g5 + + sethi %hi(0x01010101), %o2 + sll %g1, 3, %g1 + + or %o2, %lo(0x01010101), %o2 + srlx %g5, %g1, %o3 + + sllx %o2, 32, %g1 + sethi %hi(0x0000ff00), %g5 + + orn %o5, %o3, %o5 + or %o2, %g1, %o2 + + sllx %o2, 7, %o3 +10: add %o0, 8, %o0 + + andn %o3, %o5, %g1 + sub %o5, %o2, %g2 + + andcc %g1, %g2, %g0 + be,a,pt %xcc, 10b + ldx [%o0], %o5 + srlx %o5, 32, %g1 + + andn %o3, %g1, %o4 + sub %g1, %o2, %g2 + + add %o0, 4, %g3 + andcc %o4, %g2, %g0 + movne %icc, %g1, %o5 + + move %icc, %g3, %o0 + or %g5, %lo(0x0000ff00), %g5 + mov 3 - 8, %g2 + + andcc %o5, %g5, %g0 + srlx %o5, 16, %g1 + move %icc, 2 - 8, %g2 + + andcc %g1, 0xff, %g0 + srl %o5, 24, %o5 + move %icc, 1 - 8, %g2 + + movrz %o5, 0 - 8, %g2 + sub %o0, %o1, %o0 + + retl + add %o0, %g2, %o0 END(strlen) libc_hidden_builtin_def (strlen)