[PATCH] Improved strlen for ARM, around 29% faster
Gabriel Gonzalez
gabriel.gonzalez.garcia at gmail.com
Sat Sep 29 00:48:48 UTC 2012
This version for ARM improves performance mainly unrolling the loop for iterations
and reducing the instructions need to look for the null character.
A deeper analysis of this can be found at http://www.gabrielgonzalezgarcia.com/2012/10/02/mystrlen-vs-android-bionics-strlen-on-arm-cpu/
where you can find some data which back up the performance improvement.
I have only tested it on a little endian CPU so the BIG ENDIAN chunk might need some testing
Signed-off-by: Gabriel Gonzalez <gabriel.gonzalez.garcia at gmail.com>
---
libc/string/arm/strlen.S | 123 +++++++++++++++++++++++++++-------------------
1 file changed, 72 insertions(+), 51 deletions(-)
diff --git a/libc/string/arm/strlen.S b/libc/string/arm/strlen.S
index 949e918..b87fbb4 100644
--- a/libc/string/arm/strlen.S
+++ b/libc/string/arm/strlen.S
@@ -46,63 +46,84 @@ strlen:
bx lr
#else
strlen:
- bic r1, r0, $3 @ addr of word containing first byte
- ldr r2, [r1], $4 @ get the first word
- ands r3, r0, $3 @ how many bytes are duff?
- rsb r0, r3, $0 @ get - that number into counter.
- beq Laligned @ skip into main check routine if no
- @ more
-#if __BYTE_ORDER == __BIG_ENDIAN
- orr r2, r2, $0xff000000 @ set this byte to non-zero
- subs r3, r3, $1 @ any more to do?
- IT(t, gt)
- orrgt r2, r2, $0x00ff0000 @ if so, set this byte
- subs r3, r3, $1 @ more?
- IT(t, gt)
- orrgt r2, r2, $0x0000ff00 @ then set.
-#else
- orr r2, r2, $0x000000ff @ set this byte to non-zero
- subs r3, r3, $1 @ any more to do?
- IT(t, gt)
- orrgt r2, r2, $0x0000ff00 @ if so, set this byte
- subs r3, r3, $1 @ more?
- IT(t, gt)
- orrgt r2, r2, $0x00ff0000 @ then set.
-#endif
-Laligned: @ here, we have a word in r2. Does it
- tst r2, $0x000000ff @ contain any zeroes?
- IT(tttt, ne)
- tstne r2, $0x0000ff00 @
- tstne r2, $0x00ff0000 @
- tstne r2, $0xff000000 @
- addne r0, r0, $4 @ if not, the string is 4 bytes longer
- IT(t, ne)
- ldrne r2, [r1], $4 @ and we continue to the next word
- bne Laligned @
-Llastword: @ drop through to here once we find a
+ stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, lr}
+
+ mov v7, a1
+ ldr v6, =0x80808080
+
+ ##
+ ## un-aligned address till we get aligned
+ ##
+1: tst v7, #3
+ beq 0f
+ ldrb v1, [v7], #1
+ tst v1, #0xFF
+ beq 4f
+ bal 1b
+
+
+ ## un-rolling strings
+ ## as few instructions in the loop
+ ## as possible
+ ## - Check whether any position equals 0
+0: ldmfd v7!, {v1, v2, v3, v4}
+ sub v5, v1, v6, LSR #7
+ and v5, v5, v6
+ bics v5, v5, v1
+ bne 1f
+ sub v5, v2, v6, LSR #7
+ and v5, v5, v6
+ bics v5, v5, v2
+ bne 2f
+ sub v5, v3, v6, LSR #7
+ and v5, v5, v6
+ bics v5, v5, v3
+ bne 3f
+ sub v5, v4, v6, LSR #7
+ and v5, v5, v6
+ bics v5, v5, v4
+ bne 4f
+ beq 0b
+
+4: mov v1, v4
+ bal 0f
+3: mov v1, v3
+ sub v7, v7, #4
+ bal 0f
+2: mov v1, v2
+ sub v7, v7, #8
+ bal 0f
+1: sub v7, v7, #12
+ ## After the loop we calculate the diff
+ ## between the end and the begining of the str
+ ##
#if __BYTE_ORDER == __BIG_ENDIAN
- tst r2, $0xff000000 @ word that has a zero byte in it
- IT(tttt, ne)
- addne r0, r0, $1 @
- tstne r2, $0x00ff0000 @ and add up to 3 bytes on to it
- addne r0, r0, $1 @
- tstne r2, $0x0000ff00 @ (if first three all non-zero, 4th
- IT(t, ne)
- addne r0, r0, $1 @ must be zero)
+0: tst v1, #0xFF000000
+ subeq v7, v7, #1
+ tst v1, #0x00FF0000
+ subeq v7, v7, #1
+ tst v1, #0x0000FF00
+ subeq v7, v7, #1
+ tst v1, #0x000000FF
+4: subeq v7, v7, #1
#else
- tst r2, $0x000000ff @
- IT(tttt, ne)
- addne r0, r0, $1 @
- tstne r2, $0x0000ff00 @ and add up to 3 bytes on to it
- addne r0, r0, $1 @
- tstne r2, $0x00ff0000 @ (if first three all non-zero, 4th
- IT(t, ne)
- addne r0, r0, $1 @ must be zero)
+0: tst v1, #0xFF
+ subeq v7, v7, #1
+ tst v1, #0xFF00
+ subeq v7, v7, #1
+ tst v1, #0xFF0000
+ subeq v7, v7, #1
+ tst v1, #0xFF000000
+4: subeq v7, v7, #1
#endif
+
+ sub a1, v7, a1
+
#if defined(__USE_BX__)
+ ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, lr}
bx lr
#else
- mov pc,lr
+ ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, pc}
#endif
#endif
--
1.7.9.5
More information about the uClibc
mailing list