[PATCH 4/7] add AVR32 optimized string functions

Hans-Christian Egtvedt hcegtvedt at atmel.com
Tue Nov 6 07:50:07 UTC 2007


This patch adds AVR32 optimized string functions. There has been some issues
with the assembler optimized string functions in the past, but this patch
passes the test/string test-cases.

Signed-off-by: Hans-Christian Egtvedt <hcegtvedt at atmel.com>
--- 

diff --git a/libc/string/avr32/Makefile b/libc/string/avr32/Makefile
new file mode 100644
index 0000000..e19e9d9
--- /dev/null
+++ b/libc/string/avr32/Makefile
@@ -0,0 +1,26 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2003 Erik Andersen <andersen at uclibc.org>
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Library General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Library General Public License
+# along with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+top_srcdir	:= ../../../
+top_builddir	:= ../../../
+
+all: objs
+
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/avr32/bcopy.S b/libc/string/avr32/bcopy.S
new file mode 100644
index 0000000..87c1e04
--- /dev/null
+++ b/libc/string/avr32/bcopy.S
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#ifdef __UCLIBC_SUSV3_LEGACY__
+
+	.text
+	.global bcopy
+	.type	bcopy, @function
+	.align	1
+bcopy:
+	/* Swap the first two arguments */
+	eor	r11, r12
+	eor	r12, r11
+	eor	r11, r12
+	rjmp	__GI_memmove
+
+	.size	bcopy, . - bcopy
+
+#endif /* __UCLIBC_SUSV3_LEGACY__ */
diff --git a/libc/string/avr32/bzero.S b/libc/string/avr32/bzero.S
new file mode 100644
index 0000000..c999e65
--- /dev/null
+++ b/libc/string/avr32/bzero.S
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#ifdef __UCLIBC_SUSV3_LEGACY__
+
+	.text
+	.global bzero
+	.type	bzero, @function
+	.align	1
+bzero:
+	mov	r10, r11
+	mov	r11, 0
+	rjmp	__memset
+
+	.size	bzero, . - bzero
+
+#endif /* __UCLIBC_SUSV3_LEGACY__ */
diff --git a/libc/string/avr32/memcmp.S b/libc/string/avr32/memcmp.S
new file mode 100644
index 0000000..ae6cc91
--- /dev/null
+++ b/libc/string/avr32/memcmp.S
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define s1 r12
+#define s2 r11
+#define len r10
+
+	.text
+	.global memcmp
+	.type	memcmp, @function
+	.align	1
+memcmp:
+	sub	len, 4
+	brlt	.Lless_than_4
+
+1:	ld.w	r8, s1++
+	ld.w	r9, s2++
+	cp.w	r8, r9
+	brne	.Lfound_word
+	sub	len, 4
+	brge	1b
+
+.Lless_than_4:
+	sub	len, -4
+	reteq	0
+
+1:	ld.ub	r8, s1++
+	ld.ub	r9, s2++
+	sub	r8, r9
+	retne	r8
+	sub	len, 1
+	brgt	1b
+
+	retal	0
+
+.Lfound_word:
+	mov	len, 4
+
+2:	bfextu	r11, r9, 24, 8
+	bfextu	r12, r8, 24, 8
+	sub	r12, r11
+	retne	r12
+	lsl	r8, 8
+	lsl	r9, 8
+	sub	len, 1
+	brne	2b
+	retal	r12
+
+	.size	memcmp, . - memcmp
+
+libc_hidden_def(memcmp)
+#ifdef __UCLIBC_SUSV3_LEGACY__
+strong_alias(memcmp,bcmp)
+#endif
diff --git a/libc/string/avr32/memcpy.S b/libc/string/avr32/memcpy.S
new file mode 100644
index 0000000..bf091ab
--- /dev/null
+++ b/libc/string/avr32/memcpy.S
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+/* Don't use r12 as dst since we must return it unmodified */
+#define dst r9
+#define src r11
+#define len r10
+
+	.text
+	.global	memcpy
+	.type	memcpy, @function
+memcpy:
+	pref	src[0]
+	mov	dst, r12
+
+	/* If we have less than 32 bytes, don't do anything fancy */
+	cp.w	len, 32
+	brge	.Lmore_than_31
+
+	sub	len, 1
+	retlt	r12
+1:	ld.ub	r8, src++
+	st.b	dst++, r8
+	sub	len, 1
+	brge	1b
+	retal	r12
+
+.Lmore_than_31:
+	pushm	r0-r7, lr
+
+	/* Check alignment */
+	mov	r8, src
+	andl	r8, 31, COH
+	brne	.Lunaligned_src
+	mov	r8, dst
+	andl	r8, 3, COH
+	brne	.Lunaligned_dst
+
+.Laligned_copy:
+	sub	len, 32
+	brlt	.Lless_than_32
+
+1:	/* Copy 32 bytes at a time */
+	ldm	src, r0-r7
+	sub	src, -32
+	stm	dst, r0-r7
+	sub	dst, -32
+	sub	len, 32
+	brge	1b
+
+.Lless_than_32:
+	/* Copy 16 more bytes if possible */
+	sub	len, -16
+	brlt	.Lless_than_16
+	ldm	src, r0-r3
+	sub	src, -16
+	sub	len, 16
+	stm	dst, r0-r3
+	sub	dst, -16
+
+.Lless_than_16:
+	/* Do the remaining as byte copies */
+	neg	len
+	add	pc, pc, len << 2
+	.rept	15
+	ld.ub	r0, src++
+	st.b	dst++, r0
+	.endr
+
+	popm	r0-r7, pc
+
+.Lunaligned_src:
+	/* Make src cacheline-aligned. r8 = (src & 31) */
+	rsub	r8, r8, 32
+	sub	len, r8
+1:	ld.ub	r0, src++
+	st.b	dst++, r0
+	sub	r8, 1
+	brne	1b
+
+	/* If dst is word-aligned, we're ready to go */
+	pref	src[0]
+	mov	r8, 3
+	tst	dst, r8
+	breq	.Laligned_copy
+
+.Lunaligned_dst:
+	/* src is aligned, but dst is not. Expect bad performance */
+	sub	len, 4
+	brlt	2f
+1:	ld.w	r0, src++
+	st.w	dst++, r0
+	sub	len, 4
+	brge	1b
+
+2:	neg	len
+	add	pc, pc, len << 2
+	.rept	3
+	ld.ub	r0, src++
+	st.b	dst++, r0
+	.endr
+
+	popm	r0-r7, pc
+	.size	memcpy, . - memcpy
+
+libc_hidden_def(memcpy)
diff --git a/libc/string/avr32/memmove.S b/libc/string/avr32/memmove.S
new file mode 100644
index 0000000..98287c5
--- /dev/null
+++ b/libc/string/avr32/memmove.S
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#define dst r12
+#define src r11
+#define len r10
+
+	.text
+	.global memmove
+	.type	memmove, @function
+memmove:
+	cp.w	src, dst
+	brge	__GI_memcpy
+
+	add	dst, len
+	add	src, len
+	pref	src[-1]
+
+	/*
+	 * The rest is basically the same as in memcpy.S except that
+	 * the direction is reversed.
+	 */
+	cp.w	len, 32
+	brge	.Lmore_than_31
+
+	sub	len, 1
+	retlt	r12
+1:	ld.ub	r8, --src
+	st.b	--dst, r8
+	sub	len, 1
+	brge	1b
+	retal	r12
+
+.Lmore_than_31:
+	pushm	r0-r7, lr
+
+	/* Check alignment */
+	mov	r8, src
+	andl	r8, 31, COH
+	brne	.Lunaligned_src
+	mov	r8, r12
+	andl	r8, 3, COH
+	brne	.Lunaligned_dst
+
+.Laligned_copy:
+	sub	len, 32
+	brlt	.Lless_than_32
+
+1:	/* Copy 32 bytes at a time */
+	sub	src, 32
+	ldm	src, r0-r7
+	sub	dst, 32
+	sub	len, 32
+	stm	dst, r0-r7
+	brge	1b
+
+.Lless_than_32:
+	/* Copy 16 more bytes if possible */
+	sub	len, -16
+	brlt	.Lless_than_16
+	sub	src, 16
+	ldm	src, r0-r3
+	sub	dst, 16
+	sub	len, 16
+	stm	dst, r0-r3
+
+.Lless_than_16:
+	/* Do the remaining as byte copies */
+	sub	len, -16
+	breq	2f
+1:	ld.ub	r0, --src
+	st.b	--dst, r0
+	sub	len, 1
+	brne	1b
+
+2:	popm	r0-r7, pc
+
+.Lunaligned_src:
+	/* Make src cacheline-aligned. r8 = (src & 31) */
+	sub	len, r8
+1:	ld.ub	r0, --src
+	st.b	--dst, r0
+	sub	r8, 1
+	brne	1b
+
+	/* If dst is word-aligned, we're ready to go */
+	pref	src[-4]
+	mov	r8, 3
+	tst	dst, r8
+	breq	.Laligned_copy
+
+.Lunaligned_dst:
+	/* src is aligned, but dst is not. Expect bad performance */
+	sub	len, 4
+	brlt	2f
+1:	ld.w	r0, --src
+	st.w	--dst, r0
+	sub	len, 4
+	brge	1b
+
+2:	neg	len
+	add	pc, pc, len << 2
+	.rept	3
+	ld.ub	r0, --src
+	st.b	--dst, r0
+	.endr
+
+	popm	r0-r7, pc
+	.size	memmove, . - memmove
+
+libc_hidden_def(memmove)
diff --git a/libc/string/avr32/memset.S b/libc/string/avr32/memset.S
new file mode 100644
index 0000000..33cfaed
--- /dev/null
+++ b/libc/string/avr32/memset.S
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define s r12
+#define c r11
+#define n r10
+
+	.text
+	.global memset
+	.type	memset, @function
+
+	.global	__memset
+	.hidden	__memset
+	.type	__memset, @function
+
+	.align	1
+memset:
+__memset:
+	cp.w	n, 32
+	mov	r9, s
+	brge	.Llarge_memset
+
+	sub	n, 1
+	retlt	s
+1:	st.b	s++, c
+	sub	n, 1
+	brge	1b
+
+	retal	r9
+
+.Llarge_memset:
+	mov	r8, r11
+	mov	r11, 3
+	bfins	r8, r8, 8, 8
+	bfins	r8, r8, 16, 16
+	tst	s, r11
+	breq	2f
+
+1:	st.b	s++, r8
+	sub	n, 1
+	tst	s, r11
+	brne	1b
+
+2:	mov	r11, r9
+	mov	r9, r8
+	sub	n, 8
+
+3:	st.d	s++, r8
+	sub	n, 8
+	brge	3b
+
+	/* If we are done, n == -8 and we'll skip all st.b insns below */
+	neg	n
+	lsl	n, 1
+	add	pc, n
+	.rept	7
+	st.b	s++, r8
+	.endr
+	retal	r11
+
+	.size	memset, . - memset
+
+libc_hidden_def(memset)
diff --git a/libc/string/avr32/strcmp.S b/libc/string/avr32/strcmp.S
new file mode 100644
index 0000000..f73bd43
--- /dev/null
+++ b/libc/string/avr32/strcmp.S
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define s1 r12
+#define s2 r11
+#define len r10
+
+	.text
+	.global strcmp
+	.type	strcmp, @function
+	.align	1
+strcmp:
+	mov	r8, 3
+	tst	s1, r8
+	brne	.Lunaligned_s1
+	tst	s2, r8
+	brne	.Lunaligned_s2
+
+1:	ld.w	r8, s1++
+	ld.w	r9, s2++
+	cp.w	r8, r9
+	brne	2f
+	tnbz	r8
+	brne	1b
+	retal	0
+
+2:	bfextu	r12, r8, 24, 8
+	bfextu	r11, r9, 24, 8
+	sub	r12, r11
+	retne	r12
+	cp.w	r11, 0
+	reteq	0
+	bfextu	r12, r8, 16, 8
+	bfextu	r11, r9, 16, 8
+	sub	r12, r11
+	retne	r12
+	cp.w	r11, 0
+	reteq	0
+	bfextu	r12, r8, 8, 8
+	bfextu	r11, r9, 8, 8
+	sub	r12, r11
+	retne	r12
+	cp.w	r11, 0
+	reteq	0
+	bfextu	r12, r8, 0, 8
+	bfextu	r11, r9, 0, 8
+	sub	r12, r11
+	retal	r12
+
+.Lunaligned_s1:
+3:	tst	s1, r8
+	breq	4f
+	ld.ub	r10, s1++
+	ld.ub	r9, s2++
+	sub	r10, r9
+	retne	r10
+	cp.w	r9, 0
+	brne	3b
+	retal	r10
+
+4:	tst	s2, r8
+	breq	1b
+
+.Lunaligned_s2:
+	/*
+	 * s1 and s2 can't both be aligned, and unaligned word loads
+	 * can trigger spurious exceptions if we cross a page boundary.
+	 * Do it the slow way...
+	 */
+1:	ld.ub	r8, s1++
+	ld.ub	r9, s2++
+	sub	r8, r9
+	retne	r8
+	cp.w	r9, 0
+	brne	1b
+	retal	0
+
+	.size	strcmp, . - strcmp
+
+libc_hidden_def(strcmp)
+#ifndef __UCLIBC_HAS_LOCALE__
+strong_alias(strcmp, strcoll)
+libc_hidden_def(strcoll)
+#endif
diff --git a/libc/string/avr32/strlen.S b/libc/string/avr32/strlen.S
new file mode 100644
index 0000000..5223e53
--- /dev/null
+++ b/libc/string/avr32/strlen.S
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define str r12
+
+	.text
+	.global strlen
+	.type	strlen, @function
+strlen:
+	mov	r11, r12
+
+	mov	r9, str
+	andl	r9, 3, COH
+	brne	.Lunaligned_str
+
+1:	ld.w	r8, str++
+	tnbz	r8
+	brne	1b
+
+	sub	r12, r11
+	bfextu	r9, r8, 24, 8
+	cp.w	r9, 0
+	subeq	r12, 4
+	reteq	r12
+	bfextu	r9, r8, 16, 8
+	cp.w	r9, 0
+	subeq	r12, 3
+	reteq	r12
+	bfextu	r9, r8, 8, 8
+	cp.w	r9, 0
+	subeq	r12, 2
+	reteq	r12
+	sub	r12, 1
+	retal	r12
+
+.Lunaligned_str:
+	add	pc, pc, r9 << 3
+	sub	r0, r0, 0	/* 4-byte nop */
+	ld.ub	r8, str++
+	sub	r8, r8, 0
+	breq	1f
+	ld.ub	r8, str++
+	sub	r8, r8, 0
+	breq	1f
+	ld.ub	r8, str++
+	sub	r8, r8, 0
+	brne	1b
+
+1:	sub	r12, 1
+	sub	r12, r11
+	retal	r12
+
+	.size	strlen, . - strlen
+
+libc_hidden_def(strlen)


More information about the uClibc mailing list