Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2.
This implementation of strlen is faster than the armv6 version for
all string lengths greater than 1 on a Cortex-A15.

ports/ChangeLog.arm:

2013-08-09  Will Newton  <will.newton@linaro.org>

	* sysdeps/arm/armv6t2/strlen.S: New file.
  • Loading branch information
Will Newton committed Aug 30, 2013
1 parent 0186c6e commit 2601bc1
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 0 deletions.
4 changes: 4 additions & 0 deletions ports/ChangeLog.arm
@@ -1,3 +1,7 @@
2013-08-30 Will Newton <will.newton@linaro.org>

* sysdeps/arm/armv6t2/strlen.S: New file.

2013-08-29 Thomas Schwinge <thomas@codesourcery.com>

* sysdeps/unix/sysv/linux/arm/ldsodefs.h (VALID_ELF_OSABI)
Expand Down
141 changes: 141 additions & 0 deletions ports/sysdeps/arm/armv6t2/strlen.S
@@ -0,0 +1,141 @@
/* Copyright (C) 2010-2011,2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<http://www.gnu.org/licenses/>. */

/*
Assumes:
ARMv6T2, AArch32
*/

#include <sysdep.h>

#ifdef __ARMEB__
#define S2LO lsl
#define S2HI lsr
#else
#define S2LO lsr
#define S2HI lsl
#endif

/* This code requires Thumb. */
.thumb
.syntax unified

/* Parameters and result. */
#define srcin r0
#define result r0

/* Internal variables. */
#define src r1
#define data1a r2
#define data1b r3
#define const_m1 r12
#define const_0 r4
#define tmp1 r4 /* Overlaps const_0 */
#define tmp2 r5

.text
.p2align 6
ENTRY(strlen)
pld [srcin, #0]
strd r4, r5, [sp, #-8]!
cfi_adjust_cfa_offset (8)
cfi_rel_offset (r4, 0)
cfi_rel_offset (r5, 4)
cfi_remember_state
bic src, srcin, #7
mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
pld [src, #32]
bne.w .Lmisaligned8
mov const_0, #0
mov result, #-8
.Lloop_aligned:
/* Bytes 0-7. */
ldrd data1a, data1b, [src]
pld [src, #64]
add result, result, #8
.Lstart_realigned:
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found

/* Bytes 8-15. */
ldrd data1a, data1b, [src, #8]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found

/* Bytes 16-23. */
ldrd data1a, data1b, [src, #16]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found

/* Bytes 24-31. */
ldrd data1a, data1b, [src, #24]
add src, src, #32
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cmp data1b, #0
beq .Lloop_aligned

.Lnull_found:
cmp data1a, #0
itt eq
addeq result, result, #4
moveq data1a, data1b
#ifndef __ARMEB__
rev data1a, data1a
#endif
clz data1a, data1a
ldrd r4, r5, [sp], #8
cfi_adjust_cfa_offset (-8)
cfi_restore (r4)
cfi_restore (r5)
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
DO_RET(lr)

.Lmisaligned8:
cfi_restore_state
ldrd data1a, data1b, [src]
and tmp2, tmp1, #3
rsb result, tmp1, #0
lsl tmp2, tmp2, #3 /* Bytes -> bits. */
tst tmp1, #4
pld [src, #64]
S2HI tmp2, const_m1, tmp2
orn data1a, data1a, tmp2
itt ne
ornne data1b, data1b, tmp2
movne data1a, const_m1
mov const_0, #0
b .Lstart_realigned

END(strlen)
libc_hidden_builtin_def (strlen)

0 comments on commit 2601bc1

Please sign in to comment.