Skip to content

Commit

Permalink
Improve 64bit memcpy/memmove for Atom, Core 2 and Core i7
Browse files Browse the repository at this point in the history
This patch includes optimized 64bit memcpy/memmove for Atom, Core 2 and
Core i7.  It improves memcpy by up to 3X on Atom, up to 4X on Core 2 and
up to 1X on Core i7.  It also improves memmove by up to 3X on Atom, up to
4X on Core 2 and up to 2X on Core i7.
  • Loading branch information
H.J. Lu authored and Ulrich Drepper committed Jun 30, 2010
1 parent d85f8ff commit 6fb8cbc
Show file tree
Hide file tree
Showing 21 changed files with 6,681 additions and 10 deletions.
32 changes: 32 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,35 @@
2010-06-25 H.J. Lu <hongjiu.lu@intel.com>

* debug/memmove_chk.c (__memmove_chk): Renamed to ...
(MEMMOVE_CHK): ...this. Default to __memmove_chk.
* string/memmove.c (memmove): Renamed to ...
(MEMMOVE): ...this. Default to memmove.
* sysdeps/x86_64/memcpy.S: Use ENTRY_CHK and END_CHK.
* sysdeps/x86_64/sysdep.h (ENTRY_CHK): Define.
(END_CHK): Define.
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
memcpy-ssse3 mempcpy-ssse3 memmove-ssse3 memcpy-ssse3-back
mempcpy-ssse3-back memmove-ssse3-back.
* sysdeps/x86_64/multiarch/bcopy.S: New file .
* sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: New file.
* sysdeps/x86_64/multiarch/memcpy-ssse3.S: New file.
* sysdeps/x86_64/multiarch/memcpy.S: New file.
* sysdeps/x86_64/multiarch/memcpy_chk.S: New file.
* sysdeps/x86_64/multiarch/memmove-ssse3-back.S: New file.
* sysdeps/x86_64/multiarch/memmove-ssse3.S: New file.
* sysdeps/x86_64/multiarch/memmove.c: New file.
* sysdeps/x86_64/multiarch/memmove_chk.c: New file.
* sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S: New file.
* sysdeps/x86_64/multiarch/mempcpy-ssse3.S: New file.
* sysdeps/x86_64/multiarch/mempcpy.S: New file.
* sysdeps/x86_64/multiarch/mempcpy_chk.S: New file.
* sysdeps/x86_64/multiarch/init-arch.h (bit_Fast_Copy_Backward):
Define.
(index_Fast_Copy_Backward): Define.
(HAS_ARCH_FEATURE): Define.
(HAS_FAST_REP_STRING): Define.
(HAS_FAST_COPY_BACKWARD): Define.

2010-06-21 Andreas Schwab <schwab@redhat.com>

* sysdeps/unix/sysv/linux/getlogin_r.c (__getlogin_r_loginuid):
Expand Down
6 changes: 5 additions & 1 deletion debug/memmove_chk.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@
#include <memcopy.h>
#include <pagecopy.h>

#ifndef MEMMOVE_CHK
# define MEMMOVE_CHK __memmove_chk
#endif

void *
__memmove_chk (dest, src, len, destlen)
MEMMOVE_CHK (dest, src, len, destlen)
void *dest;
const void *src;
size_t len;
Expand Down
5 changes: 4 additions & 1 deletion string/memmove.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,12 @@
#define rettype void *
#endif

#ifndef MEMMOVE
#define MEMMOVE memmove
#endif

rettype
memmove (a1, a2, len)
MEMMOVE (a1, a2, len)
a1const void *a1;
a2const void *a2;
size_t len;
Expand Down
4 changes: 2 additions & 2 deletions sysdeps/x86_64/memcpy.S
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@
.text

#if defined PIC && !defined NOT_IN_libc
ENTRY (__memcpy_chk)
ENTRY_CHK (__memcpy_chk)

cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)

END (__memcpy_chk)
END_CHK (__memcpy_chk)
#endif

ENTRY(memcpy) /* (void *, const void*, size_t) */
Expand Down
4 changes: 3 additions & 1 deletion sysdeps/x86_64/multiarch/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ endif

ifeq ($(subdir),string)
sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
strend-sse4 memcmp-sse4
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
memmove-ssse3-back
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-strcspn-c.c += -msse4
Expand Down
7 changes: 7 additions & 0 deletions sysdeps/x86_64/multiarch/bcopy.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include <sysdep.h>

.text
ENTRY(bcopy)
xchg %rdi, %rsi
jmp HIDDEN_BUILTIN_JUMPTARGET(memmove)
END(bcopy)
9 changes: 6 additions & 3 deletions sysdeps/x86_64/multiarch/init-arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,13 @@ __init_cpu_features (void)
case 0x25:
case 0x2e:
case 0x2f:
/* Rep string instructions are fast on Intel Core i3, i5
and i7. */
/* Rep string instructions and copy backward are fast on
Intel Core i3, i5 and i7. */
#if index_Fast_Rep_String != index_Fast_Copy_Backward
# error index_Fast_Rep_String != index_Fast_Copy_Backward
#endif
__cpu_features.feature[index_Fast_Rep_String]
|= bit_Fast_Rep_String;
|= bit_Fast_Rep_String | bit_Fast_Copy_Backward;
break;
}
}
Expand Down
16 changes: 14 additions & 2 deletions sysdeps/x86_64/multiarch/init-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
02111-1307 USA. */

#define bit_Fast_Rep_String (1 << 0)
#define bit_Fast_Copy_Backward (1 << 1)

#ifdef __ASSEMBLER__

Expand All @@ -32,7 +33,8 @@
# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET

#define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE

#else /* __ASSEMBLER__ */

Expand Down Expand Up @@ -102,6 +104,16 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)

# define index_Fast_Rep_String FEATURE_INDEX_1
# define index_Fast_Rep_String FEATURE_INDEX_1
# define index_Fast_Copy_Backward FEATURE_INDEX_1

#define HAS_ARCH_FEATURE(idx, bit) \
((__get_cpu_features ()->feature[idx] & (bit)) != 0)

#define HAS_FAST_REP_STRING \
HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)

#define HAS_FAST_COPY_BACKWARD \
HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)

#endif /* __ASSEMBLER__ */
Loading

0 comments on commit 6fb8cbc

Please sign in to comment.