svn commit: trunk/uClibc/libc/string: bfin

vapier at uclibc.org vapier at uclibc.org
Tue May 30 09:14:13 UTC 2006


Author: vapier
Date: 2006-05-30 02:13:53 -0700 (Tue, 30 May 2006)
New Revision: 15233

Log:
import some optimized functions from blackfin cvs

Added:
   trunk/uClibc/libc/string/bfin/
   trunk/uClibc/libc/string/bfin/Makefile
   trunk/uClibc/libc/string/bfin/memchr.S
   trunk/uClibc/libc/string/bfin/memcmp.S
   trunk/uClibc/libc/string/bfin/memcpy.S
   trunk/uClibc/libc/string/bfin/memmove.S
   trunk/uClibc/libc/string/bfin/memset.S
   trunk/uClibc/libc/string/bfin/strcmp.S


Changeset:

Property changes on: trunk/uClibc/libc/string/bfin
___________________________________________________________________
Name: svn:ignore
   + *.os


Added: trunk/uClibc/libc/string/bfin/Makefile
===================================================================
--- trunk/uClibc/libc/string/bfin/Makefile	2006-05-30 08:25:59 UTC (rev 15232)
+++ trunk/uClibc/libc/string/bfin/Makefile	2006-05-30 09:13:53 UTC (rev 15233)
@@ -0,0 +1,13 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2005 Erik Andersen <andersen at uclibc.org>
+#
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+#
+
+top_srcdir:=../../../
+top_builddir:=../../../
+all: objs
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules

Added: trunk/uClibc/libc/string/bfin/memchr.S
===================================================================
--- trunk/uClibc/libc/string/bfin/memchr.S	2006-05-30 08:25:59 UTC (rev 15232)
+++ trunk/uClibc/libc/string/bfin/memchr.S	2006-05-30 09:13:53 UTC (rev 15233)
@@ -0,0 +1,54 @@
+/* memchr.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* void *memchr(const void *s, int c, size_t n);
+ * R0 = address (s)
+ * R1 = sought byte (c)
+ * R2 = count (n)
+ *
+ * Returns pointer to located character.
+ */
+
+.text
+
+.align 2
+
+.global _memchr
+.type _memchr, STT_FUNC
+_memchr:
+	P0 = R0;             // P0 = address
+	P2 = R2;             // P2 = count
+	R1 = R1.B(Z);
+	CC = R2 == 0;
+	IF CC JUMP failed;
+
+bytes:
+	LSETUP (byte_loop_s , byte_loop_e) LC0=P2;
+
+byte_loop_s:
+	R3 = B[P0++](Z);
+	CC = R3 == R1;
+	IF CC JUMP found;
+	NOP;
+byte_loop_e:
+
+failed:
+	R0=0;
+	RTS;
+
+found:
+	R0 = P0;
+	R0 += -1;
+	RTS;
+
+.size _memchr,.-_memchr
+
+libc_hidden_def (memchr)

Added: trunk/uClibc/libc/string/bfin/memcmp.S
===================================================================
--- trunk/uClibc/libc/string/bfin/memcmp.S	2006-05-30 08:25:59 UTC (rev 15232)
+++ trunk/uClibc/libc/string/bfin/memcmp.S	2006-05-30 09:13:53 UTC (rev 15233)
@@ -0,0 +1,101 @@
+/* memcmp.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* int memcmp(const void *s1, const void *s2, size_t n);
+ * R0 = First Address (s1)
+ * R1 = Second Address (s2)
+ * R2 = count (n)
+ *
+ * Favours word aligned data.
+ */
+
+.text
+
+.align 2
+
+.global _memcmp
+.type _memcmp, STT_FUNC
+_memcmp:
+	I1 = P3;
+	P0 = R0;              // P0 = s1 address
+	P3 = R1;              // P3 = s2 Address
+	P2 = R2 ;             // P2 = count
+	CC = R2 <= 7(IU);
+	IF CC JUMP  too_small;
+	I0 = R1;		    // s2
+	R1 = R1 | R0;         // OR addresses together
+	R1 <<= 30;            // check bottom two bits
+	CC =  AZ;             // AZ set if zero.
+	IF !CC JUMP  bytes ;  // Jump if addrs not aligned.
+
+	P1 = P2 >> 2;          // count = n/4
+	R3 =  3;
+	R2 = R2 & R3;         // remainder
+	P2 = R2;               // set remainder
+
+	LSETUP (quad_loop_s , quad_loop_e) LC0=P1;
+quad_loop_s:
+#if !defined(__WORKAROUND_AVOID_DAG1)
+	MNOP || R0 = [P0++] || R1 = [I0++];
+#else
+	R0 = [P0++];
+	R1 = [I0++];
+#endif
+	CC = R0 == R1;
+	IF !CC JUMP quad_different;
+quad_loop_e:
+	NOP;
+
+	P3 = I0;                 // s2
+too_small:
+	CC = P2 == 0;            //Check zero count
+	IF CC JUMP finished;     // very unlikely
+
+bytes:
+	LSETUP (byte_loop_s , byte_loop_e) LC0=P2;
+byte_loop_s:
+	R1 = B[P3++](Z);	// *s2
+	R0 = B[P0++](Z);	// *s1
+	CC = R0 == R1;
+	IF !CC JUMP different;
+byte_loop_e:
+	NOP;
+
+different:
+	R0 = R0 - R1;
+	P3 = I1;
+	RTS;
+
+quad_different:
+	// We've read two quads which don't match.
+	// Can't just compare them, because we're
+	// a little-endian machine, so the MSBs of
+	// the regs occur at later addresses in the
+	// string.
+	// Arrange to re-read those two quads again,
+	// byte-by-byte.
+	P0 += -4;	// back up to the start of the
+	P3 = I0;	// quads, and increase the
+	P2 += 4;	// remainder count
+	P3 += -4;
+	JUMP bytes;
+
+finished:
+	R0 = 0;
+	P3 = I1;
+	RTS;
+.size _memcmp,.-_memcmp
+
+libc_hidden_def (memcmp)
+
+#ifdef __UCLIBC_SUSV3_LEGACY__
+strong_alias (memcmp,bcmp)
+#endif

Added: trunk/uClibc/libc/string/bfin/memcpy.S
===================================================================
--- trunk/uClibc/libc/string/bfin/memcpy.S	2006-05-30 08:25:59 UTC (rev 15232)
+++ trunk/uClibc/libc/string/bfin/memcpy.S	2006-05-30 09:13:53 UTC (rev 15233)
@@ -0,0 +1,74 @@
+/* memcpy.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* void *memcpy(void *dest, const void *src, size_t n);
+ * R0 = To Address (dest) (leave unchanged to form result)
+ * R1 = From Address (src)
+ * R2 = count
+ *
+ * Note: Favours word alignment
+ */
+
+.text
+
+.align 2
+
+.global _memcpy
+.type _memcpy, STT_FUNC
+_memcpy:
+	[--SP] = P3;
+	P0 = R0;              // P0 = To address
+	P3 = R1;              // P3 = From Address
+	P2 = R2 ;             // P2 = count
+	CC = R2 <= 7(IU);
+	IF CC JUMP  too_small;
+	I0 = R1;
+	R3 = R1 | R0;         // OR addresses together
+	R3 <<= 30;            // check bottom two bits
+	CC =  AZ;             // AZ set if zero.
+	IF !CC JUMP  bytes ;  // Jump if addrs not aligned.
+	P1 = P2 >> 2;         // count = n/4
+	P1 += -1;
+	R3 =  3;
+	R2 = R2 & R3;         // remainder
+	P2 = R2;              // set remainder
+	R1 = [I0++];
+#if !defined(__WORKAROUND_AVOID_DAG1)
+	LSETUP (quad_loop , quad_loop) LC0=P1;
+quad_loop:		MNOP || [P0++] = R1 || R1 = [I0++];
+#else
+	LSETUP (quad_loop_s , quad_loop_e) LC0=P1;
+quad_loop_s:	[P0++] = R1;
+quad_loop_e:	R1 = [I0++];
+#endif
+	[P0++] = R1;
+
+	CC = P2 == 0;         // any remaining bytes?
+	P3 = I0;	      // Ammend P3 for remaining copy
+	IF !CC JUMP bytes;
+	P3 = [SP++];
+	RTS;
+
+too_small:
+	CC = P2 == 0;           //Check zero count
+	IF CC JUMP finished;    // very unlikely
+
+bytes:
+	LSETUP (byte_loop_s , byte_loop_e) LC0=P2;
+byte_loop_s:	R1 = B[P3++](Z);
+byte_loop_e:	B[P0++] = R1;
+
+finished:
+	P3 = [SP++];
+	RTS;
+.size _memcpy,.-_memcpy
+
+libc_hidden_def (memcpy)

Added: trunk/uClibc/libc/string/bfin/memmove.S
===================================================================
--- trunk/uClibc/libc/string/bfin/memmove.S	2006-05-30 08:25:59 UTC (rev 15232)
+++ trunk/uClibc/libc/string/bfin/memmove.S	2006-05-30 09:13:53 UTC (rev 15233)
@@ -0,0 +1,95 @@
+/* memmove.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* void *memmove(void *dest, const void *src, size_t n);
+ * R0 = To Address (dest) (leave unchanged to form result)
+ * R1 = From Address (src)
+ * R2 = count (n)
+ *
+ * Note: Data may overlap
+ */
+
+.text
+
+.align 2
+
+.global _memmove
+.type _memmove, STT_FUNC
+_memmove:
+	I1 = P3;
+	P0 = R0;              // P0 = To address
+	P3 = R1;              // P3 = From Address
+	P2 = R2 ;             // P2 = count
+	CC = P2 == 0;           //Check zero count
+	IF CC JUMP finished;    // very unlikely
+
+	CC = R1 < R0 (IU);	// From < To
+	IF !CC JUMP no_overlap;
+	R3 = R1 + R2;
+	CC = R0 <= R3 (IU);	// (From+len) >= To
+	IF CC JUMP overlap;
+no_overlap:
+	R3 = 11;
+	CC = R2 <= R3;
+	IF CC JUMP  bytes;
+	R3 = R1 | R0;         // OR addresses together
+	R3 <<= 30;            // check bottom two bits
+	CC =  AZ;             // AZ set if zero.
+	IF !CC JUMP  bytes ;  // Jump if addrs not aligned.
+
+	I0 = P3;
+	P1 = P2 >> 2;         // count = n/4
+	P1 += -1;
+	R3 =  3;
+	R2 = R2 & R3;         // remainder
+	P2 = R2;              // set remainder
+	R1 = [I0++];
+
+#if !defined(__WORKAROUND_AVOID_DAG1)
+	LSETUP (quad_loop , quad_loop) LC0=P1;
+quad_loop:		MNOP || [P0++] = R1 || R1 = [I0++];
+#else
+	LSETUP (quad_loop_s, quad_loop_e) LC0=P1;
+quad_loop_s:	[P0++] = R1;
+quad_loop_e:	R1 = [I0++];
+#endif
+	[P0++] = R1;
+
+	CC = P2 == 0;         // any remaining bytes?
+	P3 = I0;		  // Ammend P3 to updated ptr.
+	IF !CC JUMP bytes;
+	P3 = I1;
+	RTS;
+
+bytes:		LSETUP (byte2_s , byte2_e) LC0=P2;
+byte2_s:	R1 = B[P3++](Z);
+byte2_e:	B[P0++] = R1;
+
+finished:
+	P3 = I1;
+	RTS;
+
+overlap:
+	P2 += -1;
+	P0 = P0 + P2;
+	P3 = P3 + P2;
+	R1 = B[P3--] (Z);
+	CC = P2 == 0;
+	IF CC JUMP no_loop;
+	LSETUP (ol_s, ol_e) LC0 = P2;
+ol_s:		B[P0--] = R1;
+ol_e:		R1 = B[P3--] (Z);
+no_loop:	B[P0] = R1;
+	P3 = I1;
+	RTS;
+.size _memmove,.-_memmove
+
+libc_hidden_def (memmove)

Added: trunk/uClibc/libc/string/bfin/memset.S
===================================================================
--- trunk/uClibc/libc/string/bfin/memset.S	2006-05-30 08:25:59 UTC (rev 15232)
+++ trunk/uClibc/libc/string/bfin/memset.S	2006-05-30 09:13:53 UTC (rev 15233)
@@ -0,0 +1,86 @@
+/* memset.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* void *memset(void *s, int c, size_t n);
+ * R0 = address (s) (leave unchanged to form result)
+ * R1 = filler byte (c)
+ * R2 = count (n)
+ *
+ * Note: Favours word aligned data.
+ */
+
+.text
+
+.align 2
+
+.global _memset
+.type _memset, STT_FUNC
+_memset:
+	P0 = R0 ;             // P0 = address
+	P2 = R2 ;             // P2 = count
+	R3 = R0 + R2;         // end
+	CC = R2 <= 7(IU);
+	IF CC JUMP  too_small;
+	R1 = R1.B (Z);        // R1 = fill char
+	R2 =  3;
+	R2 = R0 & R2;         // addr bottom two bits
+	CC =  R2 == 0;             // AZ set if zero.
+	IF !CC JUMP  force_align ;  // Jump if addr not aligned.
+
+aligned:
+	P1 = P2 >> 2;          // count = n/4
+	R2 = R1 <<  8;         // create quad filler
+	R2.L = R2.L + R1.L(NS);
+	R2.H = R2.L + R1.H(NS);
+	P2 = R3;
+
+	LSETUP (quad_loop , quad_loop) LC0=P1;
+quad_loop:
+	[P0++] = R2;
+
+	CC = P0 == P2;
+	IF !CC JUMP bytes_left;
+	RTS;
+
+bytes_left:
+	R2 = R3;         // end point
+	R3 = P0;         // current position
+	R2 = R2 - R3;    // bytes left
+	P2 = R2;
+
+too_small:
+	CC = P2 == 0;           //Check zero count
+	IF CC JUMP finished;    // Unusual
+
+bytes:       LSETUP (byte_loop , byte_loop) LC0=P2;
+byte_loop:   B[P0++] = R1;
+
+finished:
+	RTS;
+
+force_align:
+	CC = BITTST (R0, 0 );  // odd byte
+	R0 = 4;
+	R0 = R0 - R2;
+	P1 = R0;
+	R0 = P0;			// Recover return address
+	IF !CC JUMP skip1;
+	B[P0++] = R1;
+skip1:
+	CC = R2 <= 2;          // 2 bytes
+	P2 -= P1;              // reduce count
+	IF !CC JUMP aligned;
+	B[P0++] = R1;
+	B[P0++] = R1;
+	JUMP aligned;
+.size _memset,.-_memset
+
+libc_hidden_def (memset)

Added: trunk/uClibc/libc/string/bfin/strcmp.S
===================================================================
--- trunk/uClibc/libc/string/bfin/strcmp.S	2006-05-30 08:25:59 UTC (rev 15232)
+++ trunk/uClibc/libc/string/bfin/strcmp.S	2006-05-30 09:13:53 UTC (rev 15233)
@@ -0,0 +1,121 @@
+/* strcmp.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* Fast strcmp() for Blackfin.
+ * When both strings are aligned, this processes four characters at
+ * a time. Uses a hw loop with "very big" count to loop "forever",
+ * until difference or a terminating zero is found.
+ * Once the end-case word has been identified, breaks out of the
+ * loop to check more carefully (same as the unaligned case).
+ */
+
+.text
+
+.align 2
+
+.global _strcmp
+.type _strcmp, STT_FUNC
+_strcmp:
+	[--sp] = (R7:4);
+	p1 = r0;
+	p2 = r1;
+
+	p0 = -1;	// (need for loop counter init)
+
+	  // check if byte aligned
+	r0 = r0 | r1;	// check both pointers at same time
+	r0 <<= 30;	// dump all but last 2 bits
+	cc = az;	// are they zero?
+	if !cc jump unaligned;	// no; use unaligned code.
+			// fall-thru for aligned case..
+
+	  // note that r0 is zero from the previous...
+	  //           p0 set to -1
+
+	lsetup (beginloop, endloop) lc0=p0;
+	  // pick up first words
+	r1 = [p1++];
+	r2 = [p2++];
+	  // make up mask:  0FF0FF
+	r7 = 0xFF;
+	r7.h = 0xFF;
+		// loop : 9 cycles to check 4 characters
+	cc = r1 == r2;
+beginloop:
+	if !cc jump notequal4;	// compare failure, exit loop
+
+	  // starting with   44332211
+	  // see if char 3 or char 1 is 0
+	r3 = r1 & r7;		// form 00330011
+	  // add to zero, and (r2 is free, reload)
+	r6 = r3 +|+ r0 || r2 = [p2++] || nop;
+	cc = az;	// true if either is zero
+	r3 = r1 ^ r3;	        // form 44002200 (4321^0301 => 4020)
+				// (trick, saves having another mask)
+	// add to zero,  and  (r1 is free, reload)
+	r6 = r3 +|+ r0 || r1 = [p1++] || nop;
+	cc |= az;	// true if either is zero
+	if cc jump zero4;	// leave if a zero somewhere
+endloop:
+	cc = r1 == r2;
+
+ // loop exits
+notequal4:		// compare failure on 4-char compare
+			// address pointers are one word ahead;
+			// faster to use zero4 exit code
+	p1 += 4;
+	p2 += 4;
+
+zero4:			// one of the bytes in word 1 is zero
+			// but we've already fetched the next word; so
+			// backup two to look at failing word again
+	p1 += -8;
+	p2 += -8;
+
+
+
+		// here when pointers are unaligned: checks one
+		// character at a time.  Also use at the end of
+		// the word-check algorithm to figure out what happened
+unaligned:
+	  //	R0 is non-zero from before.
+	  //           p0 set to -1
+
+	r0 = 0 (Z);
+	r1 = B[p1++] (Z);
+	r2 = B[p2++] (Z);
+	lsetup (beginloop1, endloop1) lc0=p0;
+
+beginloop1:
+	cc = r1;	// first char must be non-zero
+	// chars must be the same
+	r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop;
+	cc &= az;
+	r3 = r0 - r2;	// second char must be non-zero
+	cc &= an;
+	if !cc jump exitloop1;
+endloop1:
+	r2 = B[p2++] (Z);
+
+exitloop1: // here means we found a zero or a difference.
+	   // we have r2(N), p2(N), r1(N+1), p1(N+2)
+	r1=B[p1+ -2] (Z);
+	r0 = r1 - r2;
+	(r7:4) = [sp++];
+	rts;
+.size _strcmp,.-_strcmp
+
+libc_hidden_def (strcmp)
+
+#ifndef __UCLIBC_HAS_LOCALE__
+strong_alias (strcmp,strcoll)
+libc_hidden_def (strcoll)
+#endif




More information about the uClibc-cvs mailing list