Skip to content

Commit

Permalink
sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2.
Browse files Browse the repository at this point in the history
This implementation of strlen is faster than the armv6 version for
all string lengths greater than 1 on a Cortex-A15.

ports/ChangeLog.arm:

2013-08-09  Will Newton  <will.newton@linaro.org>

	* sysdeps/arm/armv6t2/strlen.S: New file.
  • Loading branch information
Will Newton committed Aug 30, 2013
1 parent 0186c6e commit 2601bc1
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 0 deletions.
4 changes: 4 additions & 0 deletions ports/ChangeLog.arm
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
2013-08-30 Will Newton <will.newton@linaro.org>

* sysdeps/arm/armv6t2/strlen.S: New file.

2013-08-29 Thomas Schwinge <thomas@codesourcery.com>

* sysdeps/unix/sysv/linux/arm/ldsodefs.h (VALID_ELF_OSABI)
Expand Down
141 changes: 141 additions & 0 deletions ports/sysdeps/arm/armv6t2/strlen.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/* Copyright (C) 2010-2011,2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<http://www.gnu.org/licenses/>. */

/*
Assumes:
ARMv6T2, AArch32
*/

#include <sysdep.h>

#ifdef __ARMEB__
#define S2LO lsl
#define S2HI lsr
#else
#define S2LO lsr
#define S2HI lsl
#endif

/* This code requires Thumb. */
.thumb
.syntax unified

/* Parameters and result. */
#define srcin r0
#define result r0

/* Internal variables. */
#define src r1
#define data1a r2
#define data1b r3
#define const_m1 r12
#define const_0 r4
#define tmp1 r4 /* Overlaps const_0 */
#define tmp2 r5

.text
.p2align 6
ENTRY(strlen)
pld [srcin, #0]
strd r4, r5, [sp, #-8]!
cfi_adjust_cfa_offset (8)
cfi_rel_offset (r4, 0)
cfi_rel_offset (r5, 4)
cfi_remember_state
bic src, srcin, #7
mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
pld [src, #32]
bne.w .Lmisaligned8
mov const_0, #0
mov result, #-8
.Lloop_aligned:
/* Bytes 0-7. */
ldrd data1a, data1b, [src]
pld [src, #64]
add result, result, #8
.Lstart_realigned:
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found

/* Bytes 8-15. */
ldrd data1a, data1b, [src, #8]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found

/* Bytes 16-23. */
ldrd data1a, data1b, [src, #16]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found

/* Bytes 24-31. */
ldrd data1a, data1b, [src, #24]
add src, src, #32
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cmp data1b, #0
beq .Lloop_aligned

.Lnull_found:
cmp data1a, #0
itt eq
addeq result, result, #4
moveq data1a, data1b
#ifndef __ARMEB__
rev data1a, data1a
#endif
clz data1a, data1a
ldrd r4, r5, [sp], #8
cfi_adjust_cfa_offset (-8)
cfi_restore (r4)
cfi_restore (r5)
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
DO_RET(lr)

.Lmisaligned8:
cfi_restore_state
ldrd data1a, data1b, [src]
and tmp2, tmp1, #3
rsb result, tmp1, #0
lsl tmp2, tmp2, #3 /* Bytes -> bits. */
tst tmp1, #4
pld [src, #64]
S2HI tmp2, const_m1, tmp2
orn data1a, data1a, tmp2
itt ne
ornne data1b, data1b, tmp2
movne data1a, const_m1
mov const_0, #0
b .Lstart_realigned

END(strlen)
libc_hidden_builtin_def (strlen)

0 comments on commit 2601bc1

Please sign in to comment.