[uClibc-cvs] uClibc/libc/string/frv Makefile, NONE, 1.1 memcpy.S, NONE, 1.1 memset.S, NONE, 1.1

Erik Andersen andersen at uclibc.org
Fri May 14 10:29:47 UTC 2004


Update of /var/cvs/uClibc/libc/string/frv
In directory nail:/tmp/cvs-serv21725/libc/string/frv

Added Files:
	Makefile memcpy.S memset.S 
Log Message:
Alexandre Oliva writes:

This patch introduces optimized versions of memcpy and memset for
frv.



--- NEW FILE: memset.S ---
/* memset.S: optimised assembly memset
 *
 * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells at redhat.com)
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public
 *  License along with this library; if not, write to the Free
 *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */


        .text
        .p2align	4

###############################################################################
#
# void *memset(void *p, char ch, size_t count)
#
# - NOTE: must not use any stack. exception detection performs function return
#         to caller's fixup routine, aborting the remainder of the set
#         GR4, GR7, GR8, and GR11 must be managed
#
###############################################################################
        .globl		memset
        .type		memset, at function
memset:
	orcc.p		gr10,gr0,gr5,icc3		; GR5 = count
	andi		gr9,#0xff,gr9
	or.p		gr8,gr0,gr4			; GR4 = address
	beqlr		icc3,#0

	# conditionally write a byte to 2b-align the address
	setlos.p	#1,gr6
	andicc		gr4,#1,gr0,icc0
	ckne		icc0,cc7
	cstb.p		gr9,@(gr4,gr0)		,cc7,#1
	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
	cadd.p		gr4,gr6,gr4		,cc7,#1
	beqlr		icc3,#0

	# conditionally write a word to 4b-align the address
	andicc.p	gr4,#2,gr0,icc0
	subicc		gr5,#2,gr0,icc1
	setlos.p	#2,gr6
	ckne		icc0,cc7
	slli.p		gr9,#8,gr12			; need to double up the pattern
	cknc		icc1,cc5
	or.p		gr9,gr12,gr12
	andcr		cc7,cc5,cc7

	csth.p		gr12,@(gr4,gr0)		,cc7,#1
	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
	cadd.p		gr4,gr6,gr4		,cc7,#1
	beqlr		icc3,#0

	# conditionally write a dword to 8b-align the address
	andicc.p	gr4,#4,gr0,icc0
	subicc		gr5,#4,gr0,icc1
	setlos.p	#4,gr6
	ckne		icc0,cc7
	slli.p		gr12,#16,gr13			; need to quadruple-up the pattern
	cknc		icc1,cc5
	or.p		gr13,gr12,gr12
	andcr		cc7,cc5,cc7

	cst.p		gr12,@(gr4,gr0)		,cc7,#1
	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
	cadd.p		gr4,gr6,gr4		,cc7,#1
	beqlr		icc3,#0

	or.p		gr12,gr12,gr13			; need to octuple-up the pattern

	# the address is now 8b-aligned - loop around writing 64b chunks
	setlos		#8,gr7
	subi.p		gr4,#8,gr4			; store with update index does weird stuff
	setlos		#64,gr6

	subicc		gr5,#64,gr0,icc0
0:	cknc		icc0,cc7
	cstdu		gr12,@(gr4,gr7)		,cc7,#1
	cstdu		gr12,@(gr4,gr7)		,cc7,#1
	cstdu		gr12,@(gr4,gr7)		,cc7,#1
	cstdu		gr12,@(gr4,gr7)		,cc7,#1
	cstdu		gr12,@(gr4,gr7)		,cc7,#1
	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
	subicc		gr5,#64,gr0,icc0
	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
	beqlr		icc3,#0
	bnc		icc0,#2,0b

	# now do 32-byte remnant
	subicc.p	gr5,#32,gr0,icc0
	setlos		#32,gr6
	cknc		icc0,cc7
	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
	setlos		#16,gr6
	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
	subicc		gr5,#16,gr0,icc0
	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
	beqlr		icc3,#0

	# now do 16-byte remnant
	cknc		icc0,cc7
	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
	beqlr		icc3,#0

	# now do 8-byte remnant
	subicc		gr5,#8,gr0,icc1
	cknc		icc1,cc7
	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
	setlos.p	#4,gr7
	beqlr		icc3,#0

	# now do 4-byte remnant
	subicc		gr5,#4,gr0,icc0
	addi.p		gr4,#4,gr4
	cknc		icc0,cc7
	cstu.p		gr12,@(gr4,gr7)		,cc7,#1
	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
	subicc.p	gr5,#2,gr0,icc1
	beqlr		icc3,#0

	# now do 2-byte remnant
	setlos		#2,gr7
	addi.p		gr4,#2,gr4
	cknc		icc1,cc7
	csthu.p		gr12,@(gr4,gr7)		,cc7,#1
	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
	subicc.p	gr5,#1,gr0,icc0
	beqlr		icc3,#0

	# now do 1-byte remnant
	setlos		#0,gr7
	addi.p		gr4,#2,gr4
	cknc		icc0,cc7
	cstb.p		gr12,@(gr4,gr0)		,cc7,#1
	bralr
	.size		memset, .-memset

--- NEW FILE: Makefile ---
# Makefile for uClibc
#
# Copyright (C) 2004 Alexandre Oliva <aoliva at redhat.com>
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU Library General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more
# details.
#
# You should have received a copy of the GNU Library General Public License
# along with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

TOPDIR=../../../
include $(TOPDIR)Rules.mak

SSRC=memcpy.S memset.S
SOBJS=$(patsubst %.S,%.o, $(SSRC))
OBJS=$(SOBJS)

all: $(OBJS) $(LIBC)

$(LIBC): ar-target

ar-target: $(OBJS)
	$(AR) $(ARFLAGS) $(LIBC) $(OBJS)

$(SOBJS): %.o : %.S
	$(CC) $(CFLAGS) -c $< -o $@
	$(STRIPTOOL) -x -R .note -R .comment $*.o

clean:
	$(RM) *.[oa] *~ core


--- NEW FILE: memcpy.S ---
/* memcpy.S: optimised assembly memcpy
 *
 * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells at redhat.com)
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public
 *  License along with this library; if not, write to the Free
 *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */


        .text
        .p2align	4

###############################################################################
#
# void *memcpy(void *to, const char *from, size_t count)
#
# - NOTE: must not use any stack. exception detection performs function return
#         to caller's fixup routine, aborting the remainder of the copy
#
###############################################################################
        .globl		memcpy
        .type		memcpy, at function
memcpy:
	or.p		gr8,gr9,gr4
	orcc		gr10,gr0,gr0,icc3
	or.p		gr10,gr4,gr4
	beqlr		icc3,#0

	# optimise based on best common alignment for to, from & count
	andicc.p	gr4,#0x1f,gr0,icc0
	setlos		#8,gr11
	andicc.p	gr4,#0x0f,gr0,icc1
	beq		icc0,#0,memcpy_32
	andicc.p	gr4,#0x07,gr0,icc0
	beq		icc1,#0,memcpy_16
	andicc.p	gr4,#0x03,gr0,icc1
	beq		icc0,#0,memcpy_8
	andicc.p	gr4,#0x01,gr0,icc0
	beq		icc1,#0,memcpy_4
	setlos.p	#1,gr11
	beq		icc0,#0,memcpy_2

	# do byte by byte copy
	sub.p		gr8,gr11,gr3
	sub		gr9,gr11,gr9
0:	ldubu.p		@(gr9,gr11),gr4
	subicc		gr10,#1,gr10,icc0
	stbu.p		gr4,@(gr3,gr11)
	bne		icc0,#2,0b
	bralr

	# do halfword by halfword copy
memcpy_2:
	setlos		#2,gr11
	sub.p		gr8,gr11,gr3
	sub		gr9,gr11,gr9
0:	lduhu.p		@(gr9,gr11),gr4
	subicc		gr10,#2,gr10,icc0
	sthu.p		gr4,@(gr3,gr11)
	bne		icc0,#2,0b
	bralr

	# do word by word copy
memcpy_4:
	setlos		#4,gr11
	sub.p		gr8,gr11,gr3
	sub		gr9,gr11,gr9
0:	ldu.p		@(gr9,gr11),gr4
	subicc		gr10,#4,gr10,icc0
	stu.p		gr4,@(gr3,gr11)
	bne		icc0,#2,0b
	bralr

	# do double-word by double-word copy
memcpy_8:
	sub.p		gr8,gr11,gr3
	sub		gr9,gr11,gr9
0:	lddu.p		@(gr9,gr11),gr4
	subicc		gr10,#8,gr10,icc0
	stdu.p		gr4,@(gr3,gr11)
	bne		icc0,#2,0b
	bralr

	# do quad-word by quad-word copy
memcpy_16:
	sub.p		gr8,gr11,gr3
	sub		gr9,gr11,gr9
0:	lddu		@(gr9,gr11),gr4
	lddu.p		@(gr9,gr11),gr6
	subicc		gr10,#16,gr10,icc0
	stdu		gr4,@(gr3,gr11)
	stdu.p		gr6,@(gr3,gr11)
	bne		icc0,#2,0b
	bralr

	# do eight-word by eight-word copy
memcpy_32:
	sub.p		gr8,gr11,gr3
	sub		gr9,gr11,gr9
0:	lddu		@(gr9,gr11),gr4
	lddu		@(gr9,gr11),gr6
	lddu		@(gr9,gr11),gr12
	lddu.p		@(gr9,gr11),gr14
	subicc		gr10,#32,gr10,icc0
	stdu		gr4,@(gr3,gr11)
	stdu		gr6,@(gr3,gr11)
	stdu		gr12,@(gr3,gr11)
	stdu.p		gr14,@(gr3,gr11)
	bne		icc0,#2,0b
	bralr

	.size		memcpy, .-memcpy




More information about the uClibc-cvs mailing list