Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2.
This implementation of strlen is faster than the armv6 version for all string lengths greater than 1 on a Cortex-A15. ports/ChangeLog.arm: 2013-08-09 Will Newton <will.newton@linaro.org> * sysdeps/arm/armv6t2/strlen.S: New file.
- Loading branch information
Will Newton
committed
Aug 30, 2013
1 parent
0186c6e
commit 2601bc1
Showing
2 changed files
with
145 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
/* Copyright (C) 2010-2011,2013 Free Software Foundation, Inc. | ||
This file is part of the GNU C Library. | ||
The GNU C Library is free software; you can redistribute it and/or | ||
modify it under the terms of the GNU Lesser General Public | ||
License as published by the Free Software Foundation; either | ||
version 2.1 of the License, or (at your option) any later version. | ||
The GNU C Library is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
Lesser General Public License for more details. | ||
You should have received a copy of the GNU Lesser General Public | ||
License along with the GNU C Library. If not, see | ||
<http://www.gnu.org/licenses/>. */ | ||
|
||
/* | ||
Assumes: | ||
ARMv6T2, AArch32 | ||
*/ | ||
|
||
#include <sysdep.h> | ||
|
||
#ifdef __ARMEB__ | ||
#define S2LO lsl | ||
#define S2HI lsr | ||
#else | ||
#define S2LO lsr | ||
#define S2HI lsl | ||
#endif | ||
|
||
/* This code requires Thumb. */ | ||
.thumb | ||
.syntax unified | ||
|
||
/* Parameters and result. */ | ||
#define srcin r0 | ||
#define result r0 | ||
|
||
/* Internal variables. */ | ||
#define src r1 | ||
#define data1a r2 | ||
#define data1b r3 | ||
#define const_m1 r12 | ||
#define const_0 r4 | ||
#define tmp1 r4 /* Overlaps const_0 */ | ||
#define tmp2 r5 | ||
|
||
.text | ||
.p2align 6 | ||
ENTRY(strlen) | ||
pld [srcin, #0] | ||
strd r4, r5, [sp, #-8]! | ||
cfi_adjust_cfa_offset (8) | ||
cfi_rel_offset (r4, 0) | ||
cfi_rel_offset (r5, 4) | ||
cfi_remember_state | ||
bic src, srcin, #7 | ||
mvn const_m1, #0 | ||
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */ | ||
pld [src, #32] | ||
bne.w .Lmisaligned8 | ||
mov const_0, #0 | ||
mov result, #-8 | ||
.Lloop_aligned: | ||
/* Bytes 0-7. */ | ||
ldrd data1a, data1b, [src] | ||
pld [src, #64] | ||
add result, result, #8 | ||
.Lstart_realigned: | ||
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ | ||
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ | ||
uadd8 data1b, data1b, const_m1 | ||
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ | ||
cbnz data1b, .Lnull_found | ||
|
||
/* Bytes 8-15. */ | ||
ldrd data1a, data1b, [src, #8] | ||
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ | ||
add result, result, #8 | ||
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ | ||
uadd8 data1b, data1b, const_m1 | ||
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ | ||
cbnz data1b, .Lnull_found | ||
|
||
/* Bytes 16-23. */ | ||
ldrd data1a, data1b, [src, #16] | ||
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ | ||
add result, result, #8 | ||
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ | ||
uadd8 data1b, data1b, const_m1 | ||
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ | ||
cbnz data1b, .Lnull_found | ||
|
||
/* Bytes 24-31. */ | ||
ldrd data1a, data1b, [src, #24] | ||
add src, src, #32 | ||
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ | ||
add result, result, #8 | ||
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ | ||
uadd8 data1b, data1b, const_m1 | ||
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ | ||
cmp data1b, #0 | ||
beq .Lloop_aligned | ||
|
||
.Lnull_found: | ||
cmp data1a, #0 | ||
itt eq | ||
addeq result, result, #4 | ||
moveq data1a, data1b | ||
#ifndef __ARMEB__ | ||
rev data1a, data1a | ||
#endif | ||
clz data1a, data1a | ||
ldrd r4, r5, [sp], #8 | ||
cfi_adjust_cfa_offset (-8) | ||
cfi_restore (r4) | ||
cfi_restore (r5) | ||
add result, result, data1a, lsr #3 /* Bits -> Bytes. */ | ||
DO_RET(lr) | ||
|
||
.Lmisaligned8: | ||
cfi_restore_state | ||
ldrd data1a, data1b, [src] | ||
and tmp2, tmp1, #3 | ||
rsb result, tmp1, #0 | ||
lsl tmp2, tmp2, #3 /* Bytes -> bits. */ | ||
tst tmp1, #4 | ||
pld [src, #64] | ||
S2HI tmp2, const_m1, tmp2 | ||
orn data1a, data1a, tmp2 | ||
itt ne | ||
ornne data1b, data1b, tmp2 | ||
movne data1a, const_m1 | ||
mov const_0, #0 | ||
b .Lstart_realigned | ||
|
||
END(strlen) | ||
libc_hidden_builtin_def (strlen) |