-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This patch provides optimized versions of strcpy and wcscpy with the z13 vector instructions. ChangeLog: * sysdeps/s390/multiarch/strcpy-vx.S: New File. * sysdeps/s390/multiarch/strcpy.c: Likewise. * sysdeps/s390/multiarch/wcscpy-c.c: Likewise. * sysdeps/s390/multiarch/wcscpy-vx.S: Likewise. * sysdeps/s390/multiarch/wcscpy.c: Likewise. * sysdeps/s390/s390-32/multiarch/strcpy.c: Likewise. * sysdeps/s390/s390-64/multiarch/strcpy.c: Likewise. * sysdeps/s390/multiarch/Makefile (sysdep_routines): Add strcpy and wcscpy functions. * sysdeps/s390/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add ifunc test for strcpy, wcscpy. * benchtests/bench-wcscpy.c: New File. * benchtests/Makefile (wcsmbs-bench): Add wcscpy.
- Loading branch information
Stefan Liebler
authored and
Andreas Krebbel
committed
Aug 26, 2015
1 parent
fcf40eb
commit 680df12
Showing
12 changed files
with
382 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* Measure wcscpy functions. | ||
Copyright (C) 2015 Free Software Foundation, Inc. | ||
This file is part of the GNU C Library. | ||
The GNU C Library is free software; you can redistribute it and/or | ||
modify it under the terms of the GNU Lesser General Public | ||
License as published by the Free Software Foundation; either | ||
version 2.1 of the License, or (at your option) any later version. | ||
The GNU C Library is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
Lesser General Public License for more details. | ||
You should have received a copy of the GNU Lesser General Public | ||
License along with the GNU C Library; if not, see | ||
<http://www.gnu.org/licenses/>. */ | ||
|
||
#define WIDE 1 | ||
#include "bench-strcpy.c" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,11 @@ | ||
ifeq ($(subdir),string) | ||
sysdep_routines += strlen strlen-vx strlen-c \ | ||
strnlen strnlen-vx strnlen-c | ||
strnlen strnlen-vx strnlen-c \ | ||
strcpy strcpy-vx | ||
endif | ||
|
||
ifeq ($(subdir),wcsmbs) | ||
sysdep_routines += wcslen wcslen-vx wcslen-c \ | ||
wcsnlen wcsnlen-vx wcsnlen-c | ||
wcsnlen wcsnlen-vx wcsnlen-c \ | ||
wcscpy wcscpy-vx wcscpy-c | ||
endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
/* Vector optimized 32/64 bit S/390 version of strcpy. | ||
Copyright (C) 2015 Free Software Foundation, Inc. | ||
This file is part of the GNU C Library. | ||
The GNU C Library is free software; you can redistribute it and/or | ||
modify it under the terms of the GNU Lesser General Public | ||
License as published by the Free Software Foundation; either | ||
version 2.1 of the License, or (at your option) any later version. | ||
The GNU C Library is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
Lesser General Public License for more details. | ||
You should have received a copy of the GNU Lesser General Public | ||
License along with the GNU C Library; if not, see | ||
<http://www.gnu.org/licenses/>. */ | ||
|
||
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) | ||
|
||
# include "sysdep.h" | ||
# include "asm-syntax.h" | ||
|
||
.text | ||
|
||
/* char * strcpy (const char *dest, const char *src) | ||
Copy string src to dest. | ||
Register usage: | ||
-r1=tmp | ||
-r2=dest and return_value | ||
-r3=src | ||
-r4=tmp | ||
-r5=current_len | ||
-v16=part of src | ||
-v17=index of zero | ||
-v18=part of src | ||
*/ | ||
ENTRY(__strcpy_vx) | ||
.machine "z13" | ||
.machinemode "zarch_nohighgprs" | ||
|
||
vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ | ||
lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ | ||
|
||
vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ | ||
vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ | ||
clrjl %r5,%r1,.Lfound_align /* If found zero within loaded bytes, | ||
copy bytes before and return. */ | ||
|
||
/* Align s to 16 byte. */ | ||
risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ | ||
lghi %r5,15 /* current_len = 15. */ | ||
slr %r5,%r4 /* Compute highest index to 16byte boundary. */ | ||
|
||
vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ | ||
ahi %r5,1 /* Start loop at next character. */ | ||
|
||
/* Find zero in 16byte aligned loop. */ | ||
.Lloop: | ||
vl %v16,0(%r5,%r3) /* Load s. */ | ||
vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ | ||
je .Lfound_v16_0 /* Jump away if zero was found. */ | ||
vl %v18,16(%r5,%r3)/* Load next part of s. */ | ||
vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ | ||
vfenezbs %v17,%v18,%v18 | ||
je .Lfound_v18_16 | ||
vl %v16,32(%r5,%r3) | ||
vst %v18,16(%r5,%r2) | ||
vfenezbs %v17,%v16,%v16 | ||
je .Lfound_v16_32 | ||
vl %v18,48(%r5,%r3) | ||
vst %v16,32(%r5,%r2) | ||
vfenezbs %v17,%v18,%v18 | ||
je .Lfound_v18_48 | ||
vst %v18,48(%r5,%r2) | ||
|
||
aghi %r5,64 | ||
j .Lloop /* No zero found -> loop. */ | ||
|
||
.Lfound_v16_32: | ||
aghi %r5,32 | ||
.Lfound_v16_0: | ||
la %r3,0(%r5,%r2) | ||
vlgvb %r4,%v17,7 /* Load byte index of zero. */ | ||
vstl %v16,%r4,0(%r3) /* Store characters including zero. */ | ||
br %r14 | ||
|
||
.Lfound_v18_48: | ||
aghi %r5,32 | ||
.Lfound_v18_16: | ||
la %r3,16(%r5,%r2) | ||
vlgvb %r4,%v17,7 /* Load byte index of zero. */ | ||
vstl %v18,%r4,0(%r3) /* Store characters including zero. */ | ||
br %r14 | ||
|
||
.Lfound_align: | ||
vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ | ||
br %r14 | ||
END(__strcpy_vx) | ||
|
||
/* Use mvst-strcpy-implementation as default implementation. */ | ||
# define strcpy __strcpy_c | ||
# undef libc_hidden_builtin_def | ||
# define libc_hidden_builtin_def(name) strong_alias(__strcpy_c, __GI_strcpy) | ||
#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ | ||
|
||
/* Include mvst-strcpy-implementation in s390-32/s390-64 subdirectory. */ | ||
#include <strcpy.S> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
/* Multiple versions of strcpy. | ||
Copyright (C) 2015 Free Software Foundation, Inc. | ||
This file is part of the GNU C Library. | ||
The GNU C Library is free software; you can redistribute it and/or | ||
modify it under the terms of the GNU Lesser General Public | ||
License as published by the Free Software Foundation; either | ||
version 2.1 of the License, or (at your option) any later version. | ||
The GNU C Library is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
Lesser General Public License for more details. | ||
You should have received a copy of the GNU Lesser General Public | ||
License along with the GNU C Library; if not, see | ||
<http://www.gnu.org/licenses/>. */ | ||
|
||
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) | ||
# include <string.h> | ||
# include <ifunc-resolve.h> | ||
|
||
s390_vx_libc_ifunc2 (__strcpy, strcpy) | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* Default wcscpy implementation for S/390. | ||
Copyright (C) 2015 Free Software Foundation, Inc. | ||
This file is part of the GNU C Library. | ||
The GNU C Library is free software; you can redistribute it and/or | ||
modify it under the terms of the GNU Lesser General Public | ||
License as published by the Free Software Foundation; either | ||
version 2.1 of the License, or (at your option) any later version. | ||
The GNU C Library is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
Lesser General Public License for more details. | ||
You should have received a copy of the GNU Lesser General Public | ||
License along with the GNU C Library; if not, see | ||
<http://www.gnu.org/licenses/>. */ | ||
|
||
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) | ||
# define WCSCPY __wcscpy_c | ||
|
||
# include <wchar.h> | ||
extern __typeof (wcscpy) __wcscpy_c; | ||
# include <wcsmbs/wcscpy.c> | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
/* Vector optimized 32/64 bit S/390 version of wcscpy. | ||
Copyright (C) 2015 Free Software Foundation, Inc. | ||
This file is part of the GNU C Library. | ||
The GNU C Library is free software; you can redistribute it and/or | ||
modify it under the terms of the GNU Lesser General Public | ||
License as published by the Free Software Foundation; either | ||
version 2.1 of the License, or (at your option) any later version. | ||
The GNU C Library is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
Lesser General Public License for more details. | ||
You should have received a copy of the GNU Lesser General Public | ||
License along with the GNU C Library; if not, see | ||
<http://www.gnu.org/licenses/>. */ | ||
|
||
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) | ||
|
||
# include "sysdep.h" | ||
# include "asm-syntax.h" | ||
|
||
.text | ||
|
||
/* char * wcscpy (const wchar_t *dest, const wchar_t *src) | ||
Copy string src to dest. | ||
Register usage: | ||
-r0=border-len for switching to vector-instructions | ||
-r1=tmp | ||
-r2=dest and return value | ||
-r3=src | ||
-r4=tmp | ||
-r5=current_len | ||
-v16=part of src | ||
-v17=index of zero | ||
-v18=part of src | ||
*/ | ||
ENTRY(__wcscpy_vx) | ||
.machine "z13" | ||
.machinemode "zarch_nohighgprs" | ||
|
||
vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ | ||
lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ | ||
|
||
tmll %r3,3 /* Test if s is 4-byte aligned? */ | ||
jne .Lfallback /* And use common-code variant if not. */ | ||
|
||
vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ | ||
vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ | ||
clrjl %r5,%r1,.Lfound_align /* If found zero within loaded bytes, | ||
copy bytes before and return. */ | ||
|
||
/* Align s to 16 byte. */ | ||
risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ | ||
lghi %r5,15 /* current_len = 15. */ | ||
slr %r5,%r4 /* Compute highest index to 16byte boundary. */ | ||
|
||
vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ | ||
ahi %r5,1 /* Start loop at next character. */ | ||
|
||
/* Find zero in 16byte aligned loop. */ | ||
.Lloop: | ||
vl %v16,0(%r5,%r3) /* Load s. */ | ||
vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ | ||
je .Lfound_v16_0 /* Jump away if zero was found. */ | ||
vl %v18,16(%r5,%r3) /* Load next part of s. */ | ||
vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ | ||
vfenezfs %v17,%v18,%v18 | ||
je .Lfound_v18_16 | ||
vl %v16,32(%r5,%r3) | ||
vst %v18,16(%r5,%r2) | ||
vfenezfs %v17,%v16,%v16 | ||
je .Lfound_v16_32 | ||
vl %v18,48(%r5,%r3) | ||
vst %v16,32(%r5,%r2) | ||
vfenezfs %v17,%v18,%v18 | ||
je .Lfound_v18_48 | ||
vst %v18,48(%r5,%r2) | ||
|
||
aghi %r5,64 | ||
j .Lloop /* No zero found -> loop. */ | ||
|
||
.Lfound_v16_32: | ||
aghi %r5,32 | ||
.Lfound_v16_0: | ||
la %r3,0(%r5,%r2) | ||
vlgvb %r1,%v17,7 /* Load byte index of zero. */ | ||
aghi %r1,3 /* Also copy remaining bytes of zero. */ | ||
vstl %v16,%r1,0(%r3) /* Copy characters including zero. */ | ||
br %r14 | ||
|
||
.Lfound_v18_48: | ||
aghi %r5,32 | ||
.Lfound_v18_16: | ||
la %r3,16(%r5,%r2) | ||
vlgvb %r1,%v17,7 /* Load byte index of zero. */ | ||
aghi %r1,3 /* Also copy remaining bytes of zero. */ | ||
vstl %v18,%r1,0(%r3) /* Copy characters including zero. */ | ||
br %r14 | ||
|
||
.Lfound_align: | ||
aghi %r5,3 /* Also copy remaining bytes of zero. */ | ||
vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ | ||
br %r14 | ||
|
||
.Lfallback: | ||
jg __wcscpy_c | ||
END(__wcscpy_vx) | ||
#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* Multiple versions of wcscpy. | ||
Copyright (C) 2015 Free Software Foundation, Inc. | ||
This file is part of the GNU C Library. | ||
The GNU C Library is free software; you can redistribute it and/or | ||
modify it under the terms of the GNU Lesser General Public | ||
License as published by the Free Software Foundation; either | ||
version 2.1 of the License, or (at your option) any later version. | ||
The GNU C Library is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
Lesser General Public License for more details. | ||
You should have received a copy of the GNU Lesser General Public | ||
License along with the GNU C Library; if not, see | ||
<http://www.gnu.org/licenses/>. */ | ||
|
||
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) | ||
# include <wchar.h> | ||
# include <ifunc-resolve.h> | ||
|
||
s390_vx_libc_ifunc2 (__wcscpy, wcscpy) | ||
|
||
#else | ||
# include <wcsmbs/wcscpy.c> | ||
#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
/* Multiple versions of strcpy. | ||
Copyright (C) 2015 Free Software Foundation, Inc. | ||
This file is part of the GNU C Library. | ||
The GNU C Library is free software; you can redistribute it and/or | ||
modify it under the terms of the GNU Lesser General Public | ||
License as published by the Free Software Foundation; either | ||
version 2.1 of the License, or (at your option) any later version. | ||
The GNU C Library is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
Lesser General Public License for more details. | ||
You should have received a copy of the GNU Lesser General Public | ||
License along with the GNU C Library; if not, see | ||
<http://www.gnu.org/licenses/>. */ | ||
|
||
/* This wrapper-file is needed, because otherwise file | ||
sysdeps/s390/s390-[32|64]/strcpy.S will be used. */ | ||
#include <sysdeps/s390/multiarch/strcpy.c> |
Oops, something went wrong.