Skip to content

Commit

Permalink
powerpc: Optimized st{r,p}ncpy for POWER8/PPC64
Browse files Browse the repository at this point in the history
This patch adds an optimized POWER8 st{r,p}ncpy using unaligned accesses.
It shows 10%-80% improvement over the optimized POWER7 one that uses
only aligned accesses, specially on unaligned inputs.

The algorithm first read and check 16 bytes (if inputs do not cross a 4K
page size).  The it realign source to 16-bytes and issue a 16 bytes read
and compare loop to speedup null byte checks for large strings.  Also,
different from POWER7 optimization, the null pad is done inline in the
implementation using possible unaligned accesses, instead of realying on
a memset call.  Special case is added for page cross reads.
  • Loading branch information
Adhemerval Zanella committed Jan 13, 2015
1 parent 9f2f36e commit f06a4fa
Show file tree
Hide file tree
Showing 10 changed files with 559 additions and 7 deletions.
15 changes: 15 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
2015-01-13 Adhemerval Zanella <azanella@linux.vnet.ibm.com>

* sysdeps/powerpc/powerpc64/multiarch/Makefile [sysdep_routines]:
Add strncpy-power8 and stpncpy-power8 objects.
* sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list): Add __strncpy_power8 and stpncpy_power8
implementations.
* sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S: New file.
* sysdeps/powerpc/powerpc64/multiarch/stpncpy.c (__stpncpy): Add
__stpncpy_power8 implementation.
* sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S: New file.
* sysdeps/powerpc/powerpc64/multiarch/strncpy.c (strncpy): Add
__strncpy_power8 implementation.
* sysdeps/powerpc/powerpc64/power8/stpncpy.S: New file.
* sysdeps/powerpc/powerpc64/power8/strncpy.S: New file.
* NEWS: Update.

* sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c: New file.
* sysdeps/powerpc/powerpc64/multiarch/strncat-power7.S: Remove file.
* sysdeps/powerpc/powerpc64/power7/strncat.S: Likewise.
Expand Down
3 changes: 2 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ Version 2.21
17744, 17745, 17746, 17747, 17748, 17775, 17777, 17780, 17781, 17782,
17791, 17793, 17796, 17797, 17803, 17806, 17834

* Optimized strcpy and stpcpy implementations for powerpc64/powerpc64le.
* Optimized strcpy, stpcpy, strncpy, stpncpy implementations for
powerpc64/powerpc64le.

* Added support for TSX lock elision of pthread mutexes on powerpc32, powerpc64
and powerpc64le. This may improve lock scaling of existing programs on
Expand Down
5 changes: 3 additions & 2 deletions sysdeps/powerpc/powerpc64/multiarch/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
stpcpy-power7 stpcpy-ppc64 \
strrchr-power7 strrchr-ppc64 strncat-power7 strncat-ppc64 \
strncpy-power7 strncpy-ppc64 \
stpncpy-power7 stpncpy-ppc64 strcmp-power7 strcmp-ppc64 \
stpncpy-power8 stpncpy-power7 stpncpy-ppc64 \
strcmp-power7 strcmp-ppc64 \
strcat-power8 strcat-power7 strcat-ppc64 memmove-power7 \
memmove-ppc64 bcopy-ppc64
memmove-ppc64 bcopy-ppc64 strncpy-power8

CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
Expand Down
6 changes: 6 additions & 0 deletions sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,

/* Support sysdeps/powerpc/powerpc64/multiarch/strncpy.c. */
IFUNC_IMPL (i, name, strncpy,
IFUNC_IMPL_ADD (array, i, strncpy,
hwcap2 & PPC_FEATURE2_ARCH_2_07,
__strncpy_power8)
IFUNC_IMPL_ADD (array, i, strncpy,
hwcap & PPC_FEATURE_HAS_VSX,
__strncpy_power7)
Expand All @@ -286,6 +289,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,

/* Support sysdeps/powerpc/powerpc64/multiarch/stpncpy.c. */
IFUNC_IMPL (i, name, stpncpy,
IFUNC_IMPL_ADD (array, i, stpncpy,
hwcap2 & PPC_FEATURE2_ARCH_2_07,
__stpncpy_power8)
IFUNC_IMPL_ADD (array, i, stpncpy,
hwcap & PPC_FEATURE_HAS_VSX,
__stpncpy_power7)
Expand Down
39 changes: 39 additions & 0 deletions sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/* Optimized stpncpy implementation for POWER8.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */

#include <sysdep.h>

#define USE_AS_STPNCPY

#undef EALIGN
#define EALIGN(name, alignt, words) \
.section ".text"; \
ENTRY_2(__stpncpy_power8) \
.align ALIGNARG(alignt); \
EALIGN_W_##words; \
BODY_LABEL(__stpncpy_power8): \
cfi_startproc; \
LOCALENTRY(__stpncpy_power8)

#undef END
#define END(name) \
cfi_endproc; \
TRACEBACK(__stpncpy_power8) \
END_2(__stpncpy_power8)

#include <sysdeps/powerpc/powerpc64/power8/stpncpy.S>
7 changes: 5 additions & 2 deletions sysdeps/powerpc/powerpc64/multiarch/stpncpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@

extern __typeof (__stpncpy) __stpncpy_ppc attribute_hidden;
extern __typeof (__stpncpy) __stpncpy_power7 attribute_hidden;
extern __typeof (__stpncpy) __stpncpy_power8 attribute_hidden;

libc_ifunc (__stpncpy,
(hwcap & PPC_FEATURE_HAS_VSX)
? __stpncpy_power7
(hwcap2 & PPC_FEATURE2_ARCH_2_07)
? __stpncpy_power8 :
(hwcap & PPC_FEATURE_HAS_VSX)
? __stpncpy_power7
: __stpncpy_ppc);

weak_alias (__stpncpy, stpncpy)
Expand Down
40 changes: 40 additions & 0 deletions sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/* Optimized strncpy implementation for POWER8.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */

#include <sysdep.h>

#undef EALIGN
#define EALIGN(name, alignt, words) \
.section ".text"; \
ENTRY_2(__strncpy_power8) \
.align ALIGNARG(alignt); \
EALIGN_W_##words; \
BODY_LABEL(__strncpy_power8): \
cfi_startproc; \
LOCALENTRY(__strncpy_power8)

#undef END
#define END(name) \
cfi_endproc; \
TRACEBACK(__strncpy_power8) \
END_2(__strncpy_power8)

#undef libc_hidden_builtin_def
#define libc_hidden_builtin_def(name)

#include <sysdeps/powerpc/powerpc64/power8/strncpy.S>
7 changes: 5 additions & 2 deletions sysdeps/powerpc/powerpc64/multiarch/strncpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,15 @@

extern __typeof (strncpy) __strncpy_ppc attribute_hidden;
extern __typeof (strncpy) __strncpy_power7 attribute_hidden;
extern __typeof (strncpy) __strncpy_power8 attribute_hidden;

/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
libc_ifunc (strncpy,
(hwcap & PPC_FEATURE_HAS_VSX)
? __strncpy_power7
(hwcap2 & PPC_FEATURE2_ARCH_2_07)
? __strncpy_power8 :
(hwcap & PPC_FEATURE_HAS_VSX)
? __strncpy_power7
: __strncpy_ppc);

#endif
20 changes: 20 additions & 0 deletions sysdeps/powerpc/powerpc64/power8/stpncpy.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/* Optimized stpncpy implementation for PowerPC64/POWER8.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */

#define USE_AS_STPNCPY
#include <sysdeps/powerpc/powerpc64/power8/strncpy.S>
Loading

0 comments on commit f06a4fa

Please sign in to comment.