svn commit: trunk/uClibc: include libc/string libc/string/generic etc...

vda at uclibc.org vda at uclibc.org
Wed Dec 17 01:36:32 UTC 2008


Author: vda
Date: 2008-12-16 17:36:31 -0800 (Tue, 16 Dec 2008)
New Revision: 24435

Log:
since gcc -Os hates us and does not inline string ops,
implement inline versions of some of them.
Enable only those which result roughly in the same
code size as using out-or-line versions.

None of this affects users, installed headers won't have
any trace of it.



Added:
   trunk/uClibc/include/libc-string_i386.h

Modified:
   trunk/uClibc/include/string.h
   trunk/uClibc/libc/string/generic/memchr.c
   trunk/uClibc/libc/string/generic/mempcpy.c
   trunk/uClibc/libc/string/i386/memcpy.c
   trunk/uClibc/libc/string/i386/memset.c
   trunk/uClibc/libc/string/i386/strcpy.c
   trunk/uClibc/libc/string/i386/strlen.c
   trunk/uClibc/libc/string/memchr.c
   trunk/uClibc/libc/string/mempcpy.c
   trunk/uClibc/libc/string/stpcpy.c


Changeset:
Added: trunk/uClibc/include/libc-string_i386.h
===================================================================
--- trunk/uClibc/include/libc-string_i386.h	                        (rev 0)
+++ trunk/uClibc/include/libc-string_i386.h	2008-12-17 01:36:31 UTC (rev 24435)
@@ -0,0 +1,314 @@
+/*
+ * Copyright (C) 2008 Denys Vlasenko <vda.linux at googlemail.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball
+ */
+
+#if !defined _STRING_H
+#error "Never use <libc-string_i386.h> directly; include <string.h> instead"
+#endif
+
+#ifndef _LIBC_STRING_i386_H
+#define _LIBC_STRING_i386_H 1
+
+static __always_inline
+void *inlined_memset_const_c_count4(void *s, unsigned eax, unsigned count)
+{
+	int ecx, edi;
+
+	if (count == 0)
+		return s;
+
+	/* Very small (2 stores or less) are best done with direct
+	 * mov <const>,<mem> instructions (they do not clobber registers) */
+	if (count == 1) {
+		*(char *)(s + 0) = eax;
+		return s;
+	}
+
+	eax *= 0x01010101; /* done at compile time */
+
+	if (count == 2) {
+		*(short *)(s + 0) = eax;
+		return s;
+	}
+	if (count == 3) {
+		*(short *)(s + 0) = eax;
+		*(char *) (s + 2) = eax;
+		return s;
+	}
+	if (count == 1*4 + 0) {
+		*(int *)(s + 0) = eax;
+		return s;
+	}
+	if (count == 1*4 + 1) {
+		*(int *) (s + 0) = eax;
+		*(char *)(s + 4) = eax;
+		return s;
+	}
+	if (count == 1*4 + 2) {
+		*(int *)  (s + 0) = eax;
+		*(short *)(s + 4) = eax;
+		return s;
+	}
+
+	/* Small string stores: don't clobber ecx
+	 * (clobbers only eax and edi) */
+#define small_store(arg) { \
+	__asm__ __volatile__( \
+		arg \
+		: "=&D" (edi) \
+		: "a" (eax), "0" (s) \
+		: "memory" \
+	); \
+	return s; \
+}
+	if (count == 1*4 + 3) small_store("stosl; stosw; stosb");
+	if (count == 2*4 + 0) {
+		((int *)s)[0] = eax;
+		((int *)s)[1] = eax;
+		return s;
+	}
+	if (count == 2*4 + 1) small_store("stosl; stosl; stosb");
+	if (count == 2*4 + 2) small_store("stosl; stosl; stosw");
+	if (count == 2*4 + 3) small_store("stosl; stosl; stosw; stosb");
+	if (count == 3*4 + 0) small_store("stosl; stosl; stosl");
+	if (count == 3*4 + 1) small_store("stosl; stosl; stosl; stosb");
+	if (count == 3*4 + 2) small_store("stosl; stosl; stosl; stosw");
+	if (count == 3*4 + 3) small_store("stosl; stosl; stosl; stosw; stosb");
+	if (count == 4*4 + 0) small_store("stosl; stosl; stosl; stosl");
+	if (count == 4*4 + 1) small_store("stosl; stosl; stosl; stosl; stosb");
+	/* going over 7 bytes is suboptimal */
+	/* stosw is 2-byte insn, so this one takes 6 bytes: */
+	if (count == 4*4 + 2) small_store("stosl; stosl; stosl; stosl; stosw");
+	/* 7 bytes */
+	if (count == 4*4 + 3) small_store("stosl; stosl; stosl; stosl; stosw; stosb");
+	/* 5 bytes */
+	if (count == 5*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl");
+	/* 6 bytes */
+	if (count == 5*4 + 1) small_store("stosl; stosl; stosl; stosl; stosl; stosb");
+	/* 7 bytes */
+	if (count == 5*4 + 2) small_store("stosl; stosl; stosl; stosl; stosl; stosw");
+	/* 8 bytes, but oh well... */
+	if (count == 5*4 + 3) small_store("stosl; stosl; stosl; stosl; stosl; stosw; stosb");
+	/* 6 bytes */
+	if (count == 6*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl; stosl");
+	/* the rest would be 7+ bytes and is handled below instead */
+#undef small_store
+
+	/* Not small, but multiple-of-4 store.
+	 * "mov <const>,%ecx; rep; stosl" sequence is 7 bytes */
+	__asm__ __volatile__(
+		"	rep; stosl\n"
+		: "=&c" (ecx), "=&D" (edi)
+		: "a" (eax), "0" (count / 4), "1" (s)
+		: "memory"
+	);
+	return s;
+}
+#if 1 /* -51 bytes on shared i386 build with gcc 4.3.0 */
+#define memset(s, c, count) ( \
+	( !(__builtin_constant_p(c) && __builtin_constant_p(count)) \
+	  || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
+	) \
+	? memset((s), (c), (count)) \
+	: inlined_memset_const_c_count4((s), (c), (count)) \
+	)
+#endif
+
+
+static __always_inline
+void *inlined_mempcpy_const_count4(void *d, const void *s, unsigned count)
+{
+	int ecx;
+	char *esi, *edi;
+
+	if (count == 0)
+		return d;
+
+	if (count == 1) {
+		*(char *)d = *(char *)s;
+		return d + 1;
+	}
+	if (count == 2) {
+		*(short *)d = *(short *)s;
+		return d + 2;
+	}
+	/* Small string moves: don't clobber ecx
+	 * (clobbers only esi and edi) */
+#define small_move(arg) { \
+	__asm__ __volatile__( \
+		arg \
+		: "=&S" (esi), "=&D" (edi) \
+		: "0" (s), "1" (d) \
+		: "memory" \
+	); \
+	return edi; \
+}
+	if (count == 3) small_move("movsw; movsb");
+	if (count == 1*4 + 0) {
+		*(int *)d = *(int *)s;
+		return d + 4;
+	}
+	if (count == 1*4 + 1) small_move("movsl; movsb");
+	if (count == 1*4 + 2) small_move("movsl; movsw");
+	if (count == 1*4 + 3) small_move("movsl; movsw; movsb");
+	if (count == 2*4 + 0) small_move("movsl; movsl");
+	if (count == 2*4 + 1) small_move("movsl; movsl; movsb");
+	if (count == 2*4 + 2) small_move("movsl; movsl; movsw");
+	if (count == 2*4 + 3) small_move("movsl; movsl; movsw; movsb");
+	if (count == 3*4 + 0) small_move("movsl; movsl; movsl");
+	if (count == 3*4 + 1) small_move("movsl; movsl; movsl; movsb");
+	if (count == 3*4 + 2) small_move("movsl; movsl; movsl; movsw");
+	if (count == 3*4 + 3) small_move("movsl; movsl; movsl; movsw; movsb");
+	if (count == 4*4 + 0) small_move("movsl; movsl; movsl; movsl");
+	if (count == 4*4 + 1) small_move("movsl; movsl; movsl; movsl; movsb");
+	/* going over 7 bytes is suboptimal */
+	/* movsw is 2-byte insn, so this one takes 6 bytes: */
+	if (count == 4*4 + 2) small_move("movsl; movsl; movsl; movsl; movsw");
+	/* 7 bytes */
+	if (count == 4*4 + 3) small_move("movsl; movsl; movsl; movsl; movsw; movsb");
+	/* 5 bytes */
+	if (count == 5*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl");
+	/* 6 bytes */
+	if (count == 5*4 + 1) small_move("movsl; movsl; movsl; movsl; movsl; movsb");
+	/* 7 bytes */
+	if (count == 5*4 + 2) small_move("movsl; movsl; movsl; movsl; movsl; movsw");
+	/* 8 bytes, but oh well... */
+	if (count == 5*4 + 3) small_move("movsl; movsl; movsl; movsl; movsl; movsw; movsb");
+	/* 6 bytes */
+	if (count == 6*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl; movsl");
+	/* the rest would be 7+ bytes and is handled below instead */
+#undef small_move
+
+	/* Not small, but multiple-of-4 move.
+	 * "mov <const>,%ecx; rep; movsl" sequence is 7 bytes */
+	__asm__ __volatile__(
+		"	rep; movsl\n"
+		: "=&c" (ecx), "=&S" (esi), "=&D" (edi)
+		: "0" (count / 4), "1" (s), "2" (d)
+		: "memory"
+	);
+	return edi;
+}
+static __always_inline
+void *inlined_memcpy_const_count4(void *d, const void *s, unsigned count)
+{
+	inlined_mempcpy_const_count4(d, s, count);
+	return d;
+}
+#if 1 /* +34 bytes on shared i386 build with gcc 4.3.0 */
+#define mempcpy(d, s, count) ( \
+	( !(__builtin_constant_p(count)) \
+	  || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
+	) \
+	? mempcpy((d), (s), (count)) \
+	: inlined_mempcpy_const_count4((d), (s), (count)) \
+	)
+#define memcpy(d, s, count) ( \
+	( !(__builtin_constant_p(count)) \
+	  || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
+	) \
+	? memcpy((d), (s), (count)) \
+	: inlined_memcpy_const_count4((d), (s), (count)) \
+	)
+#endif
+
+
+static __always_inline
+size_t inlined_strlen(const char *s)
+{
+	int edi;
+	int ecx;
+	__asm__ __volatile__(
+		"	repne; scasb\n"
+	/*	"	notl	%0\n" */
+	/*	"	decl	%0\n" */
+		: "=c" (ecx), "=&D" (edi)
+		: "1" (s), "a" (0), "0" (0xffffffffu)
+		/* : no clobbers */
+	);
+	return -ecx - 1;
+}
+#if 0 /* +1108 bytes on shared i386 build with gcc 4.3.0 */
+#define strlen(s) inlined_strlen(s)
+#endif
+
+
+static __always_inline
+char *inlined_stpcpy(char *dest, const char *src)
+{
+	char *esi, *edi;
+	int eax;
+	__asm__ __volatile__(
+		"1:	lodsb\n"
+		"	stosb\n"
+		"	testb	%%al, %%al\n"
+		"	jnz	1b\n"
+		: "=&S" (esi), "=&D" (edi), "=&a" (eax)
+		: "0" (src), "1" (dest)
+		: "memory"
+	);
+	return edi - 1;
+}
+static __always_inline
+char *inlined_strcpy(char *dest, const char *src)
+{
+	inlined_stpcpy(dest, src);
+	return dest;
+}
+#if 0 /* +562 bytes on shared i386 build with gcc 4.3.0 */
+#define stpcpy(dest, src) inlined_stpcpy(dest, src)
+#define strcpy(dest, src) inlined_strcpy(dest, src)
+#endif
+
+
+static __always_inline
+void *inlined_memchr(const void *s, int c, size_t count)
+{
+	void *edi;
+	int ecx;
+	/* Unfortunately, c gets loaded to %eax (wide insn), not %al */
+	__asm__ __volatile__(
+		"	jecxz	1f\n"
+		"	repne; scasb\n"
+		"	leal	-1(%%edi), %%edi\n"
+		"	je	2f\n"
+		"1:\n"
+		"	xorl	%%edi, %%edi\n"
+		"2:\n"
+		: "=&D" (edi), "=&c" (ecx)
+		: "a" (c), "0" (s), "1" (count)
+		/* : no clobbers */
+	);
+	return edi;
+}
+static __always_inline
+void *inlined_memchr_const_c(const void *s, int c, size_t count)
+{
+	void *edi;
+	int ecx, eax;
+	__asm__ __volatile__(
+		"	jecxz	1f\n"
+		"	movb	%4, %%al\n" /* const c to %%al */
+		"	repne; scasb\n"
+		"	leal	-1(%%edi), %%edi\n"
+		"	je	2f\n"
+		"1:\n"
+		"	xorl	%%edi, %%edi\n"
+		"2:\n"
+		: "=&D" (edi), "=&c" (ecx), "=&a" (eax)
+		: "0" (s), "i" (c), "1" (count)
+		/* : no clobbers */
+	);
+	return edi;
+}
+#if 1 /* +2 bytes on shared i386 build with gcc 4.3.0 */
+#define memchr(s, c, count) ( \
+	__builtin_constant_p(c) \
+	? inlined_memchr_const_c(s, (c) & 0xff, count) \
+	: inlined_memchr(s, c, count) \
+	)
+#endif
+
+#endif /* _LIBC_STRING_i386_H  */

Modified: trunk/uClibc/include/string.h
===================================================================
--- trunk/uClibc/include/string.h	2008-12-17 01:31:29 UTC (rev 24434)
+++ trunk/uClibc/include/string.h	2008-12-17 01:36:31 UTC (rev 24435)
@@ -378,7 +378,7 @@
 
 /* The following two functions are non-standard but necessary for non-32 bit
    platforms.  */
-#if 0 /*def	__USE_GNU*/
+# if 0 /*#ifdef __USE_GNU*/
 extern int ffsl (long int __l) __THROW __attribute__ ((__const__));
 #  ifdef __GNUC__
 __extension__ extern int ffsll (long long int __ll)
@@ -422,44 +422,44 @@
 
 #ifdef	__USE_GNU
 /* Compare S1 and S2 as strings holding name & indices/version numbers.  */
-#if 0
+# if 0
 extern int strverscmp (__const char *__s1, __const char *__s2)
      __THROW __attribute_pure__ __nonnull ((1, 2));
 libc_hidden_proto(strverscmp)
-#endif
+# endif
 
 /* Return a string describing the meaning of the signal number in SIG.  */
 extern char *strsignal (int __sig) __THROW;
 libc_hidden_proto(strsignal)
 
 /* Copy SRC to DEST, returning the address of the terminating '\0' in DEST.  */
-#if 0 /* uClibc: disabled */
+# if 0 /* uClibc: disabled */
 extern char *__stpcpy (char *__restrict __dest, __const char *__restrict __src)
      __THROW __nonnull ((1, 2));
-#endif
+# endif
 extern char *stpcpy (char *__restrict __dest, __const char *__restrict __src)
      __THROW __nonnull ((1, 2));
 libc_hidden_proto(stpcpy)
 
 /* Copy no more than N characters of SRC to DEST, returning the address of
    the last character written into DEST.  */
-#if 0 /* uClibc: disabled */
+# if 0 /* uClibc: disabled */
 extern char *__stpncpy (char *__restrict __dest,
 			__const char *__restrict __src, size_t __n)
      __THROW __nonnull ((1, 2));
-#endif
+# endif
 extern char *stpncpy (char *__restrict __dest,
 		      __const char *__restrict __src, size_t __n)
      __THROW __nonnull ((1, 2));
 libc_hidden_proto(stpncpy)
 
-#if 0							/* uClibc does not support strfry or memfrob. */
+# if 0			/* uClibc does not support strfry or memfrob. */
 /* Sautee STRING briskly.  */
 extern char *strfry (char *__string) __THROW __nonnull ((1));
 
 /* Frobnicate N bytes of S.  */
 extern void *memfrob (void *__s, size_t __n) __THROW __nonnull ((1));
-#endif
+# endif
 
 # ifndef basename
 /* Return the file name within directory of FILENAME.  We don't
@@ -469,7 +469,7 @@
 extern char *basename (__const char *__filename) __THROW __nonnull ((1));
 libc_hidden_proto(basename)
 # endif
-#endif
+#endif /* __USE_GNU */
 
 
 #ifdef	__USE_BSD
@@ -484,4 +484,11 @@
 
 __END_DECLS
 
-#endif /* string.h  */
+
+#ifdef UCLIBC_INTERNAL
+# if defined __i386__
+#  include <libc-string_i386.h>
+# endif
+#endif
+
+#endif /* string.h */

Modified: trunk/uClibc/libc/string/generic/memchr.c
===================================================================
--- trunk/uClibc/libc/string/generic/memchr.c	2008-12-17 01:31:29 UTC (rev 24434)
+++ trunk/uClibc/libc/string/generic/memchr.c	2008-12-17 01:36:31 UTC (rev 24435)
@@ -25,14 +25,12 @@
 #include <stdlib.h>
 #include <limits.h>
 
-/* Experimentally off - libc_hidden_proto(memchr) */
-/* libc_hidden_proto(abort) */
-
 #include "memcopy.h"
 
 #define LONG_MAX_32_BITS 2147483647
 
 /* Search no more than N bytes of S for C.  */
+#undef memchr
 void *memchr (const void * s, int c_in, size_t n)
 {
   const unsigned char *char_ptr;

Modified: trunk/uClibc/libc/string/generic/mempcpy.c
===================================================================
--- trunk/uClibc/libc/string/generic/mempcpy.c	2008-12-17 01:31:29 UTC (rev 24434)
+++ trunk/uClibc/libc/string/generic/mempcpy.c	2008-12-17 01:36:31 UTC (rev 24435)
@@ -8,9 +8,8 @@
 #include <string.h>
 
 #ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(mempcpy) */
-/* Experimentally off - libc_hidden_proto(memcpy) */
 
+# undef mempcpy
 void *mempcpy (void *dstpp, const void *srcpp, size_t len)
 {
   memcpy(dstpp, srcpp, len);

Modified: trunk/uClibc/libc/string/i386/memcpy.c
===================================================================
--- trunk/uClibc/libc/string/i386/memcpy.c	2008-12-17 01:31:29 UTC (rev 24434)
+++ trunk/uClibc/libc/string/i386/memcpy.c	2008-12-17 01:36:31 UTC (rev 24435)
@@ -32,7 +32,7 @@
 
 #include <string.h>
 
-/* Experimentally off - libc_hidden_proto(memcpy) */
+#undef memcpy
 void *memcpy(void * to, const void * from, size_t n)
 {
 	int d0, d1, d2;

Modified: trunk/uClibc/libc/string/i386/memset.c
===================================================================
--- trunk/uClibc/libc/string/i386/memset.c	2008-12-17 01:31:29 UTC (rev 24434)
+++ trunk/uClibc/libc/string/i386/memset.c	2008-12-17 01:36:31 UTC (rev 24435)
@@ -33,6 +33,7 @@
 #include <string.h>
 
 /* Experimentally off - libc_hidden_proto(memset) */
+#undef memset
 void *memset(void *s, int c, size_t count)
 {
     int d0, d1;

Modified: trunk/uClibc/libc/string/i386/strcpy.c
===================================================================
--- trunk/uClibc/libc/string/i386/strcpy.c	2008-12-17 01:31:29 UTC (rev 24434)
+++ trunk/uClibc/libc/string/i386/strcpy.c	2008-12-17 01:36:31 UTC (rev 24435)
@@ -32,7 +32,7 @@
 
 #include <string.h>
 
-/* Experimentally off - libc_hidden_proto(strcpy) */
+#undef strcpy
 char *strcpy(char * dest, const char * src)
 {
     int d0, d1, d2;

Modified: trunk/uClibc/libc/string/i386/strlen.c
===================================================================
--- trunk/uClibc/libc/string/i386/strlen.c	2008-12-17 01:31:29 UTC (rev 24434)
+++ trunk/uClibc/libc/string/i386/strlen.c	2008-12-17 01:36:31 UTC (rev 24435)
@@ -32,7 +32,7 @@
 
 #include <string.h>
 
-/* Experimentally off - libc_hidden_proto(strlen) */
+#undef strlen
 size_t strlen(const char *s)
 {
     int d0;

Modified: trunk/uClibc/libc/string/memchr.c
===================================================================
--- trunk/uClibc/libc/string/memchr.c	2008-12-17 01:31:29 UTC (rev 24434)
+++ trunk/uClibc/libc/string/memchr.c	2008-12-17 01:36:31 UTC (rev 24435)
@@ -10,6 +10,7 @@
 #ifdef WANT_WIDE
 # define Wmemchr wmemchr
 #else
+# undef memchr
 # define Wmemchr memchr
 #endif
 

Modified: trunk/uClibc/libc/string/mempcpy.c
===================================================================
--- trunk/uClibc/libc/string/mempcpy.c	2008-12-17 01:31:29 UTC (rev 24434)
+++ trunk/uClibc/libc/string/mempcpy.c	2008-12-17 01:36:31 UTC (rev 24435)
@@ -12,6 +12,7 @@
 #ifdef WANT_WIDE
 # define Wmempcpy wmempcpy
 #else
+# undef mempcpy
 # define Wmempcpy mempcpy
 #endif
 

Modified: trunk/uClibc/libc/string/stpcpy.c
===================================================================
--- trunk/uClibc/libc/string/stpcpy.c	2008-12-17 01:31:29 UTC (rev 24434)
+++ trunk/uClibc/libc/string/stpcpy.c	2008-12-17 01:36:31 UTC (rev 24435)
@@ -10,7 +10,7 @@
 #ifdef WANT_WIDE
 # define Wstpcpy wcpcpy
 #else
-/* Experimentally off - libc_hidden_proto(stpcpy) */
+# undef stpcpy
 # define Wstpcpy stpcpy
 #endif
 




More information about the uClibc-cvs mailing list