[git commit master 1/1] libm_sh: add optimised assembly implementation of lroundf and lrintf

Carmelo Amoroso carmelo.amoroso at st.com
Fri Dec 17 09:02:44 UTC 2010


commit: http://git.uclibc.org/uClibc/commit/?id=6ac247452e646c2187f2f559143c8c087b0542e0
branch: http://git.uclibc.org/uClibc/commit/?id=refs/heads/master

* libc/sysdeps/linux/sh/sysdep.h: Add LOCAL macro
* libm/sh/sh4/Makefile.arch: Include asm source in the build
* libm/sh/sh4/s_lrintf.S [NEW]: optimised asm lrintf
* libm/sh/sh4/s_lroundf.S [NEW]: optimised asm lroundf

Signed-off-by: Christian Bruel <christian.bruel at st.com>
Signed-off-by: Carmelo Amoroso <carmelo.amoroso at st.com>
---
 libc/sysdeps/linux/sh/sysdep.h |    1 +
 libm/sh/sh4/Makefile.arch      |    8 ++++--
 libm/sh/sh4/s_lrintf.S         |   52 ++++++++++++++++++++++++++++++++++++++++
 libm/sh/sh4/s_lroundf.S        |   39 ++++++++++++++++++++++++++++++
 4 files changed, 97 insertions(+), 3 deletions(-)
 create mode 100644 libm/sh/sh4/s_lrintf.S
 create mode 100644 libm/sh/sh4/s_lroundf.S

diff --git a/libc/sysdeps/linux/sh/sysdep.h b/libc/sysdeps/linux/sh/sysdep.h
index 2ef0a33..8b3c682 100644
--- a/libc/sysdeps/linux/sh/sysdep.h
+++ b/libc/sysdeps/linux/sh/sysdep.h
@@ -26,6 +26,7 @@
 
 /* Syntactic details of assembler.  */
 
+#define LOCAL(X)	.L_##X
 #define ALIGNARG(log2) log2
 /* For ELF we need the `.type' directive to make shared libs work right.  */
 #define ASM_TYPE_DIRECTIVE(name,typearg) .type name,@##typearg;
diff --git a/libm/sh/sh4/Makefile.arch b/libm/sh/sh4/Makefile.arch
index 122d84d..e38e99c 100644
--- a/libm/sh/sh4/Makefile.arch
+++ b/libm/sh/sh4/Makefile.arch
@@ -7,11 +7,13 @@
 #
 
 ifeq ($(UCLIBC_HAS_FENV),y)
-libm_ARCH_SRC:=$(wildcard $(libm_SUBARCH_DIR)/*.c)
-libm_ARCH_OBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.c,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SRC))
+libm_ARCH_CSRC:=$(wildcard $(libm_SUBARCH_DIR)/*.c)
+libm_ARCH_COBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.c,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SRC))
+libm_ARCH_SSRC:=$(wildcard $(libm_SUBARCH_DIR)/*.S)
+libm_ARCH_SOBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.S,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SSRC))
 endif
 
-libm_ARCH_OBJS:=$(libm_ARCH_OBJ)
+libm_ARCH_OBJS:=$(libm_ARCH_COBJ) $(libm_ARCH_SOBJ)
 
 ifeq ($(DOPIC),y)
 libm-a-y+=$(libm_ARCH_OBJS:.o=.os)
diff --git a/libm/sh/sh4/s_lrintf.S b/libm/sh/sh4/s_lrintf.S
new file mode 100644
index 0000000..d8cec32
--- /dev/null
+++ b/libm/sh/sh4/s_lrintf.S
@@ -0,0 +1,52 @@
+/* Round argument to nearest integer value. SH4 version.
+ * According to ISO/IEC 9899:1999. This version doesn't handle range error.
+ * If arg is not finite or if the result cannot be represented into a long,
+ * return an unspecified value. No exception raised.
+ *
+ * Copyright (C) 2010 STMicroelectronics Ltd.
+ *
+ * Author: Christian Bruel <christian.bruel at st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+
+ENTRY(lrintf)
+	mov	#0,r0
+	sts	fpscr,r3
+	lds	r0,fpscr
+	flds	fr5,fpul
+	mov.l	LOCAL(mask),r1
+	sts	fpul,r2
+	and	r2,r1
+	mov.l	LOCAL(midway),r2
+	or	r1,r2
+	lds	r2,fpul
+	fsts	fpul,fr2
+	fadd	fr2,fr5
+	ftrc	fr5,fpul
+	sts	fpul,r0
+	float	fpul,fr2
+	fcmp/eq	fr5,fr2
+	bf/s	0f
+	mov	#1,r2
+	tst	r1,r1
+	and	r0,r2
+	movt	r1
+	shal	r1
+	tst	r2,r2
+	add	#-1,r1
+	bt	0f
+	sub	r1,r0
+0:
+	rts
+	lds	r3,fpscr
+
+	.align 2
+LOCAL(mask):
+	.long	0x80000000
+LOCAL(midway):
+	.long	1056964608
+
+END(lrintf)
diff --git a/libm/sh/sh4/s_lroundf.S b/libm/sh/sh4/s_lroundf.S
new file mode 100644
index 0000000..fda3a4b
--- /dev/null
+++ b/libm/sh/sh4/s_lroundf.S
@@ -0,0 +1,39 @@
+/* Round argument toward 0. SH4 version.
+ * According to ISO/IEC 9899:1999. This version doesn't handle range error.
+ * If arg is not finite or if the result cannot be represented into a long,
+ * return an unspecified value. No exception raised.
+ *
+ * Copyright (C) 2010 STMicroelectronics Ltd.
+ *
+ * Author: Christian Bruel <christian.bruel at st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+
+ENTRY(lroundf)
+	mov	#0,r0
+	sts	fpscr,r3
+	lds	r0,fpscr
+	flds	fr5,fpul
+	mov.l	LOCAL(mask),r1
+	sts	fpul,r2
+	and	r2,r1
+	mov.l	LOCAL(midway),r2
+	or	r1,r2
+	lds	r2,fpul
+	fsts	fpul,fr2
+	fadd	fr2,fr5
+	ftrc	fr5,fpul
+	sts	fpul,r0
+	rts
+	lds	r3,fpscr
+
+	.align 2
+LOCAL(mask):
+	.long	0x80000000
+LOCAL(midway):
+	.long	1056964608
+
+END(lroundf)
-- 
1.7.2.2



More information about the uClibc-cvs mailing list