[PATCH] string/microblaze: Fix for little-endian

Steve Bennett steveb at workware.net.au
Wed Oct 31 23:43:41 UTC 2012


ping!

On 21/09/2012, at 3:38 PM, Steve Bennett wrote:

> Fix the asm-optimised memcpy and memmove so they
> work for little-endian as well as big-endian.
> 
> Testing has shown no issues, but I am not a microblaze
> asm expert so YMMV.
> 
> Signed-off-by: Steve Bennett <steveb at workware.net.au>
> ---
> libc/string/microblaze/memcpy.S  | 128 +++++++++++++++++++++------------------
> libc/string/microblaze/memmove.S | 128 +++++++++++++++++++++------------------
> 2 files changed, 136 insertions(+), 120 deletions(-)
> 
> diff --git a/libc/string/microblaze/memcpy.S b/libc/string/microblaze/memcpy.S
> index 7cf081e..f44f48e 100644
> --- a/libc/string/microblaze/memcpy.S
> +++ b/libc/string/microblaze/memcpy.S
> @@ -34,6 +34,14 @@
> 	.type  memcpy, @function
> 	.ent	memcpy
> 
> +#ifdef __MICROBLAZEEL__
> +	#define BSLLI bsrli
> +	#define BSRLI bslli
> +#else
> +	#define BSLLI bslli
> +	#define BSRLI bsrli
> +#endif
> +
> memcpy:
> fast_memcpy_ascending:
> 	/* move d to return register as value of function */
> @@ -85,48 +93,48 @@ a_block_unaligned:
> 	beqi	r9, a_block_u2		/* t1 was 2 => 2 byte offset */
> 
> a_block_u3:
> -	bslli	r11, r11, 24	/* h = h << 24 */
> +	BSLLI	r11, r11, 24	/* h = h << 24 */
> a_bu3_loop:
> 	lwi	r12, r8, 4	/* v = *(as + 4) */
> -	bsrli	r9, r12, 8	/* t1 = v >> 8 */
> +	BSRLI	r9, r12, 8	/* t1 = v >> 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 0	/* *(d + 0) = t1 */
> -	bslli	r11, r12, 24	/* h = v << 24 */
> +	BSLLI	r11, r12, 24	/* h = v << 24 */
> 	lwi	r12, r8, 8	/* v = *(as + 8) */
> -	bsrli	r9, r12, 8	/* t1 = v >> 8 */
> +	BSRLI	r9, r12, 8	/* t1 = v >> 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 4	/* *(d + 4) = t1 */
> -	bslli	r11, r12, 24	/* h = v << 24 */
> +	BSLLI	r11, r12, 24	/* h = v << 24 */
> 	lwi	r12, r8, 12	/* v = *(as + 12) */
> -	bsrli	r9, r12, 8	/* t1 = v >> 8 */
> +	BSRLI	r9, r12, 8	/* t1 = v >> 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 8	/* *(d + 8) = t1 */
> -	bslli	r11, r12, 24	/* h = v << 24 */
> +	BSLLI	r11, r12, 24	/* h = v << 24 */
> 	lwi	r12, r8, 16	/* v = *(as + 16) */
> -	bsrli	r9, r12, 8	/* t1 = v >> 8 */
> +	BSRLI	r9, r12, 8	/* t1 = v >> 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 12	/* *(d + 12) = t1 */
> -	bslli	r11, r12, 24	/* h = v << 24 */
> +	BSLLI	r11, r12, 24	/* h = v << 24 */
> 	lwi	r12, r8, 20	/* v = *(as + 20) */
> -	bsrli	r9, r12, 8	/* t1 = v >> 8 */
> +	BSRLI	r9, r12, 8	/* t1 = v >> 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 16	/* *(d + 16) = t1 */
> -	bslli	r11, r12, 24	/* h = v << 24 */
> +	BSLLI	r11, r12, 24	/* h = v << 24 */
> 	lwi	r12, r8, 24	/* v = *(as + 24) */
> -	bsrli	r9, r12, 8	/* t1 = v >> 8 */
> +	BSRLI	r9, r12, 8	/* t1 = v >> 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 20	/* *(d + 20) = t1 */
> -	bslli	r11, r12, 24	/* h = v << 24 */
> +	BSLLI	r11, r12, 24	/* h = v << 24 */
> 	lwi	r12, r8, 28	/* v = *(as + 28) */
> -	bsrli	r9, r12, 8	/* t1 = v >> 8 */
> +	BSRLI	r9, r12, 8	/* t1 = v >> 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 24	/* *(d + 24) = t1 */
> -	bslli	r11, r12, 24	/* h = v << 24 */
> +	BSLLI	r11, r12, 24	/* h = v << 24 */
> 	lwi	r12, r8, 32	/* v = *(as + 32) */
> -	bsrli	r9, r12, 8	/* t1 = v >> 8 */
> +	BSRLI	r9, r12, 8	/* t1 = v >> 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 28	/* *(d + 28) = t1 */
> -	bslli	r11, r12, 24	/* h = v << 24 */
> +	BSLLI	r11, r12, 24	/* h = v << 24 */
> 	addi	r8, r8, 32	/* as = as + 32 */
> 	addi	r4, r4, -32	/* n = n - 32 */
> 	bneid	r4, a_bu3_loop	/* while (n) loop */
> @@ -134,48 +142,48 @@ a_bu3_loop:
> 	bri	a_block_done
> 
> a_block_u1:
> -	bslli	r11, r11, 8	/* h = h << 8 */
> +	BSLLI	r11, r11, 8	/* h = h << 8 */
> a_bu1_loop:
> 	lwi	r12, r8, 4	/* v = *(as + 4) */
> -	bsrli	r9, r12, 24	/* t1 = v >> 24 */
> +	BSRLI	r9, r12, 24	/* t1 = v >> 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 0	/* *(d + 0) = t1 */
> -	bslli	r11, r12, 8	/* h = v << 8 */
> +	BSLLI	r11, r12, 8	/* h = v << 8 */
> 	lwi	r12, r8, 8	/* v = *(as + 8) */
> -	bsrli	r9, r12, 24	/* t1 = v >> 24 */
> +	BSRLI	r9, r12, 24	/* t1 = v >> 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 4	/* *(d + 4) = t1 */
> -	bslli	r11, r12, 8	/* h = v << 8 */
> +	BSLLI	r11, r12, 8	/* h = v << 8 */
> 	lwi	r12, r8, 12	/* v = *(as + 12) */
> -	bsrli	r9, r12, 24	/* t1 = v >> 24 */
> +	BSRLI	r9, r12, 24	/* t1 = v >> 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 8	/* *(d + 8) = t1 */
> -	bslli	r11, r12, 8	/* h = v << 8 */
> +	BSLLI	r11, r12, 8	/* h = v << 8 */
> 	lwi	r12, r8, 16	/* v = *(as + 16) */
> -	bsrli	r9, r12, 24	/* t1 = v >> 24 */
> +	BSRLI	r9, r12, 24	/* t1 = v >> 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 12	/* *(d + 12) = t1 */
> -	bslli	r11, r12, 8	/* h = v << 8 */
> +	BSLLI	r11, r12, 8	/* h = v << 8 */
> 	lwi	r12, r8, 20	/* v = *(as + 20) */
> -	bsrli	r9, r12, 24	/* t1 = v >> 24 */
> +	BSRLI	r9, r12, 24	/* t1 = v >> 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 16	/* *(d + 16) = t1 */
> -	bslli	r11, r12, 8	/* h = v << 8 */
> +	BSLLI	r11, r12, 8	/* h = v << 8 */
> 	lwi	r12, r8, 24	/* v = *(as + 24) */
> -	bsrli	r9, r12, 24	/* t1 = v >> 24 */
> +	BSRLI	r9, r12, 24	/* t1 = v >> 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 20	/* *(d + 20) = t1 */
> -	bslli	r11, r12, 8	/* h = v << 8 */
> +	BSLLI	r11, r12, 8	/* h = v << 8 */
> 	lwi	r12, r8, 28	/* v = *(as + 28) */
> -	bsrli	r9, r12, 24	/* t1 = v >> 24 */
> +	BSRLI	r9, r12, 24	/* t1 = v >> 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 24	/* *(d + 24) = t1 */
> -	bslli	r11, r12, 8	/* h = v << 8 */
> +	BSLLI	r11, r12, 8	/* h = v << 8 */
> 	lwi	r12, r8, 32	/* v = *(as + 32) */
> -	bsrli	r9, r12, 24	/* t1 = v >> 24 */
> +	BSRLI	r9, r12, 24	/* t1 = v >> 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 28	/* *(d + 28) = t1 */
> -	bslli	r11, r12, 8	/* h = v << 8 */
> +	BSLLI	r11, r12, 8	/* h = v << 8 */
> 	addi	r8, r8, 32	/* as = as + 32 */
> 	addi	r4, r4, -32	/* n = n - 32 */
> 	bneid	r4, a_bu1_loop	/* while (n) loop */
> @@ -183,48 +191,48 @@ a_bu1_loop:
> 	bri	a_block_done
> 
> a_block_u2:
> -	bslli	r11, r11, 16	/* h = h << 16 */
> +	BSLLI	r11, r11, 16	/* h = h << 16 */
> a_bu2_loop:
> 	lwi	r12, r8, 4	/* v = *(as + 4) */
> -	bsrli	r9, r12, 16	/* t1 = v >> 16 */
> +	BSRLI	r9, r12, 16	/* t1 = v >> 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 0	/* *(d + 0) = t1 */
> -	bslli	r11, r12, 16	/* h = v << 16 */
> +	BSLLI	r11, r12, 16	/* h = v << 16 */
> 	lwi	r12, r8, 8	/* v = *(as + 8) */
> -	bsrli	r9, r12, 16	/* t1 = v >> 16 */
> +	BSRLI	r9, r12, 16	/* t1 = v >> 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 4	/* *(d + 4) = t1 */
> -	bslli	r11, r12, 16	/* h = v << 16 */
> +	BSLLI	r11, r12, 16	/* h = v << 16 */
> 	lwi	r12, r8, 12	/* v = *(as + 12) */
> -	bsrli	r9, r12, 16	/* t1 = v >> 16 */
> +	BSRLI	r9, r12, 16	/* t1 = v >> 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 8	/* *(d + 8) = t1 */
> -	bslli	r11, r12, 16	/* h = v << 16 */
> +	BSLLI	r11, r12, 16	/* h = v << 16 */
> 	lwi	r12, r8, 16	/* v = *(as + 16) */
> -	bsrli	r9, r12, 16	/* t1 = v >> 16 */
> +	BSRLI	r9, r12, 16	/* t1 = v >> 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 12	/* *(d + 12) = t1 */
> -	bslli	r11, r12, 16	/* h = v << 16 */
> +	BSLLI	r11, r12, 16	/* h = v << 16 */
> 	lwi	r12, r8, 20	/* v = *(as + 20) */
> -	bsrli	r9, r12, 16	/* t1 = v >> 16 */
> +	BSRLI	r9, r12, 16	/* t1 = v >> 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 16	/* *(d + 16) = t1 */
> -	bslli	r11, r12, 16	/* h = v << 16 */
> +	BSLLI	r11, r12, 16	/* h = v << 16 */
> 	lwi	r12, r8, 24	/* v = *(as + 24) */
> -	bsrli	r9, r12, 16	/* t1 = v >> 16 */
> +	BSRLI	r9, r12, 16	/* t1 = v >> 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 20	/* *(d + 20) = t1 */
> -	bslli	r11, r12, 16	/* h = v << 16 */
> +	BSLLI	r11, r12, 16	/* h = v << 16 */
> 	lwi	r12, r8, 28	/* v = *(as + 28) */
> -	bsrli	r9, r12, 16	/* t1 = v >> 16 */
> +	BSRLI	r9, r12, 16	/* t1 = v >> 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 24	/* *(d + 24) = t1 */
> -	bslli	r11, r12, 16	/* h = v << 16 */
> +	BSLLI	r11, r12, 16	/* h = v << 16 */
> 	lwi	r12, r8, 32	/* v = *(as + 32) */
> -	bsrli	r9, r12, 16	/* t1 = v >> 16 */
> +	BSRLI	r9, r12, 16	/* t1 = v >> 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 28	/* *(d + 28) = t1 */
> -	bslli	r11, r12, 16	/* h = v << 16 */
> +	BSLLI	r11, r12, 16	/* h = v << 16 */
> 	addi	r8, r8, 32	/* as = as + 32 */
> 	addi	r4, r4, -32	/* n = n - 32 */
> 	bneid	r4, a_bu2_loop	/* while (n) loop */
> @@ -263,13 +271,13 @@ a_word_unaligned:
> 	beqi	r9, a_word_u2		/* t1 was 2 => 2 byte offset */
> 
> a_word_u3:
> -	bslli	r11, r11, 24	/* h = h << 24 */
> +	BSLLI	r11, r11, 24	/* h = h << 24 */
> a_wu3_loop:
> 	lw	r12, r8, r10	/* v = *(as + offset) */
> -	bsrli	r9, r12, 8	/* t1 = v >> 8 */
> +	BSRLI	r9, r12, 8	/* t1 = v >> 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	sw	r9, r5, r10	/* *(d + offset) = t1 */
> -	bslli	r11, r12, 24	/* h = v << 24 */
> +	BSLLI	r11, r12, 24	/* h = v << 24 */
> 	addi	r4, r4,-4	/* n = n - 4 */
> 	bneid	r4, a_wu3_loop	/* while (n) loop */
> 	addi	r10, r10, 4	/* offset = ofset + 4 (IN DELAY SLOT) */
> @@ -277,13 +285,13 @@ a_wu3_loop:
> 	bri	a_word_done
> 
> a_word_u1:
> -	bslli	r11, r11, 8	/* h = h << 8 */
> +	BSLLI	r11, r11, 8	/* h = h << 8 */
> a_wu1_loop:
> 	lw	r12, r8, r10	/* v = *(as + offset) */
> -	bsrli	r9, r12, 24	/* t1 = v >> 24 */
> +	BSRLI	r9, r12, 24	/* t1 = v >> 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	sw	r9, r5, r10	/* *(d + offset) = t1 */
> -	bslli	r11, r12, 8	/* h = v << 8 */
> +	BSLLI	r11, r12, 8	/* h = v << 8 */
> 	addi	r4, r4,-4	/* n = n - 4 */
> 	bneid	r4, a_wu1_loop	/* while (n) loop */
> 	addi	r10, r10, 4	/* offset = ofset + 4 (IN DELAY SLOT) */
> @@ -291,13 +299,13 @@ a_wu1_loop:
> 	bri	a_word_done
> 
> a_word_u2:
> -	bslli	r11, r11, 16	/* h = h << 16 */
> +	BSLLI	r11, r11, 16	/* h = h << 16 */
> a_wu2_loop:
> 	lw	r12, r8, r10	/* v = *(as + offset) */
> -	bsrli	r9, r12, 16	/* t1 = v >> 16 */
> +	BSRLI	r9, r12, 16	/* t1 = v >> 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	sw	r9, r5, r10	/* *(d + offset) = t1 */
> -	bslli	r11, r12, 16	/* h = v << 16 */
> +	BSLLI	r11, r12, 16	/* h = v << 16 */
> 	addi	r4, r4,-4	/* n = n - 4 */
> 	bneid	r4, a_wu2_loop	/* while (n) loop */
> 	addi	r10, r10, 4	/* offset = ofset + 4 (IN DELAY SLOT) */
> diff --git a/libc/string/microblaze/memmove.S b/libc/string/microblaze/memmove.S
> index 29233f5..28f8139 100644
> --- a/libc/string/microblaze/memmove.S
> +++ b/libc/string/microblaze/memmove.S
> @@ -33,6 +33,14 @@
> 	.type  memmove, @function
> 	.ent	memmove
> 
> +#ifdef __MICROBLAZEEL__
> +	#define BSLLI bsrli
> +	#define BSRLI bslli
> +#else
> +	#define BSLLI bslli
> +	#define BSRLI bsrli
> +#endif
> +
> memmove:
> 	cmpu	r4, r5, r6	/* n = s - d */
> 	bgei	r4, HIDDEN_JUMPTARGET(memcpy)
> @@ -112,150 +120,150 @@ d_block_unaligned:
> 	beqi	r9,d_block_u2		/* t1 was 2 => 2 byte offset */
> 
> d_block_u3:
> -	bsrli	r11, r11, 8	/* h = h >> 8 */
> +	BSRLI	r11, r11, 8	/* h = h >> 8 */
> d_bu3_loop:
> 	addi	r8, r8, -32	/* as = as - 32 */
> 	addi	r5, r5, -32	/* d = d - 32 */
> 	lwi	r12, r8, 28	/* v = *(as + 28) */
> -	bslli	r9, r12, 24	/* t1 = v << 24 */
> +	BSLLI	r9, r12, 24	/* t1 = v << 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 28	/* *(d + 28) = t1 */
> -	bsrli	r11, r12, 8	/* h = v >> 8 */
> +	BSRLI	r11, r12, 8	/* h = v >> 8 */
> 	lwi	r12, r8, 24	/* v = *(as + 24) */
> -	bslli	r9, r12, 24	/* t1 = v << 24 */
> +	BSLLI	r9, r12, 24	/* t1 = v << 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 24	/* *(d + 24) = t1 */
> -	bsrli	r11, r12, 8	/* h = v >> 8 */
> +	BSRLI	r11, r12, 8	/* h = v >> 8 */
> 	lwi	r12, r8, 20	/* v = *(as + 20) */
> -	bslli	r9, r12, 24	/* t1 = v << 24 */
> +	BSLLI	r9, r12, 24	/* t1 = v << 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 20	/* *(d + 20) = t1 */
> -	bsrli	r11, r12, 8	/* h = v >> 8 */
> +	BSRLI	r11, r12, 8	/* h = v >> 8 */
> 	lwi	r12, r8, 16	/* v = *(as + 16) */
> -	bslli	r9, r12, 24	/* t1 = v << 24 */
> +	BSLLI	r9, r12, 24	/* t1 = v << 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 16	/* *(d + 16) = t1 */
> -	bsrli	r11, r12, 8	/* h = v >> 8 */
> +	BSRLI	r11, r12, 8	/* h = v >> 8 */
> 	lwi	r12, r8, 12	/* v = *(as + 12) */
> -	bslli	r9, r12, 24	/* t1 = v << 24 */
> +	BSLLI	r9, r12, 24	/* t1 = v << 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 12	/* *(d + 112) = t1 */
> -	bsrli	r11, r12, 8	/* h = v >> 8 */
> +	BSRLI	r11, r12, 8	/* h = v >> 8 */
> 	lwi	r12, r8, 8	/* v = *(as + 8) */
> -	bslli	r9, r12, 24	/* t1 = v << 24 */
> +	BSLLI	r9, r12, 24	/* t1 = v << 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 8	/* *(d + 8) = t1 */
> -	bsrli	r11, r12, 8	/* h = v >> 8 */
> +	BSRLI	r11, r12, 8	/* h = v >> 8 */
> 	lwi	r12, r8, 4	/* v = *(as + 4) */
> -	bslli	r9, r12, 24	/* t1 = v << 24 */
> +	BSLLI	r9, r12, 24	/* t1 = v << 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 4	/* *(d + 4) = t1 */
> -	bsrli	r11, r12, 8	/* h = v >> 8 */
> +	BSRLI	r11, r12, 8	/* h = v >> 8 */
> 	lwi	r12, r8, 0	/* v = *(as + 0) */
> -	bslli	r9, r12, 24	/* t1 = v << 24 */
> +	BSLLI	r9, r12, 24	/* t1 = v << 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 0	/* *(d + 0) = t1 */
> 	addi	r4, r4, -32	/* n = n - 32 */
> 	bneid	r4, d_bu3_loop	/* while (n) loop */
> -	bsrli	r11, r12, 8	/* h = v >> 8 (IN DELAY SLOT) */
> +	BSRLI	r11, r12, 8	/* h = v >> 8 (IN DELAY SLOT) */
> 	bri	d_block_done
> 
> d_block_u1:
> -	bsrli	r11, r11, 24	/* h = h >> 24 */
> +	BSRLI	r11, r11, 24	/* h = h >> 24 */
> d_bu1_loop:
> 	addi	r8, r8, -32	/* as = as - 32 */
> 	addi	r5, r5, -32	/* d = d - 32 */
> 	lwi	r12, r8, 28	/* v = *(as + 28) */
> -	bslli	r9, r12, 8	/* t1 = v << 8 */
> +	BSLLI	r9, r12, 8	/* t1 = v << 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 28	/* *(d + 28) = t1 */
> -	bsrli	r11, r12, 24	/* h = v >> 24 */
> +	BSRLI	r11, r12, 24	/* h = v >> 24 */
> 	lwi	r12, r8, 24	/* v = *(as + 24) */
> -	bslli	r9, r12, 8	/* t1 = v << 8 */
> +	BSLLI	r9, r12, 8	/* t1 = v << 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 24	/* *(d + 24) = t1 */
> -	bsrli	r11, r12, 24	/* h = v >> 24 */
> +	BSRLI	r11, r12, 24	/* h = v >> 24 */
> 	lwi	r12, r8, 20	/* v = *(as + 20) */
> -	bslli	r9, r12, 8	/* t1 = v << 8 */
> +	BSLLI	r9, r12, 8	/* t1 = v << 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 20	/* *(d + 20) = t1 */
> -	bsrli	r11, r12, 24	/* h = v >> 24 */
> +	BSRLI	r11, r12, 24	/* h = v >> 24 */
> 	lwi	r12, r8, 16	/* v = *(as + 16) */
> -	bslli	r9, r12, 8	/* t1 = v << 8 */
> +	BSLLI	r9, r12, 8	/* t1 = v << 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 16	/* *(d + 16) = t1 */
> -	bsrli	r11, r12, 24	/* h = v >> 24 */
> +	BSRLI	r11, r12, 24	/* h = v >> 24 */
> 	lwi	r12, r8, 12	/* v = *(as + 12) */
> -	bslli	r9, r12, 8	/* t1 = v << 8 */
> +	BSLLI	r9, r12, 8	/* t1 = v << 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 12	/* *(d + 112) = t1 */
> -	bsrli	r11, r12, 24	/* h = v >> 24 */
> +	BSRLI	r11, r12, 24	/* h = v >> 24 */
> 	lwi	r12, r8, 8	/* v = *(as + 8) */
> -	bslli	r9, r12, 8	/* t1 = v << 8 */
> +	BSLLI	r9, r12, 8	/* t1 = v << 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 8	/* *(d + 8) = t1 */
> -	bsrli	r11, r12, 24	/* h = v >> 24 */
> +	BSRLI	r11, r12, 24	/* h = v >> 24 */
> 	lwi	r12, r8, 4	/* v = *(as + 4) */
> -	bslli	r9, r12, 8	/* t1 = v << 8 */
> +	BSLLI	r9, r12, 8	/* t1 = v << 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 4	/* *(d + 4) = t1 */
> -	bsrli	r11, r12, 24	/* h = v >> 24 */
> +	BSRLI	r11, r12, 24	/* h = v >> 24 */
> 	lwi	r12, r8, 0	/* v = *(as + 0) */
> -	bslli	r9, r12, 8	/* t1 = v << 8 */
> +	BSLLI	r9, r12, 8	/* t1 = v << 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 0	/* *(d + 0) = t1 */
> 	addi	r4, r4, -32	/* n = n - 32 */
> 	bneid	r4, d_bu1_loop	/* while (n) loop */
> -	bsrli	r11, r12, 24	/* h = v >> 24 (IN DELAY SLOT) */
> +	BSRLI	r11, r12, 24	/* h = v >> 24 (IN DELAY SLOT) */
> 	bri	d_block_done
> 
> d_block_u2:
> -	bsrli	r11, r11, 16	/* h = h >> 16 */
> +	BSRLI	r11, r11, 16	/* h = h >> 16 */
> d_bu2_loop:
> 	addi	r8, r8, -32	/* as = as - 32 */
> 	addi	r5, r5, -32	/* d = d - 32 */
> 	lwi	r12, r8, 28	/* v = *(as + 28) */
> -	bslli	r9, r12, 16	/* t1 = v << 16 */
> +	BSLLI	r9, r12, 16	/* t1 = v << 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 28	/* *(d + 28) = t1 */
> -	bsrli	r11, r12, 16	/* h = v >> 16 */
> +	BSRLI	r11, r12, 16	/* h = v >> 16 */
> 	lwi	r12, r8, 24	/* v = *(as + 24) */
> -	bslli	r9, r12, 16	/* t1 = v << 16 */
> +	BSLLI	r9, r12, 16	/* t1 = v << 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 24	/* *(d + 24) = t1 */
> -	bsrli	r11, r12, 16	/* h = v >> 16 */
> +	BSRLI	r11, r12, 16	/* h = v >> 16 */
> 	lwi	r12, r8, 20	/* v = *(as + 20) */
> -	bslli	r9, r12, 16	/* t1 = v << 16 */
> +	BSLLI	r9, r12, 16	/* t1 = v << 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 20	/* *(d + 20) = t1 */
> -	bsrli	r11, r12, 16	/* h = v >> 16 */
> +	BSRLI	r11, r12, 16	/* h = v >> 16 */
> 	lwi	r12, r8, 16	/* v = *(as + 16) */
> -	bslli	r9, r12, 16	/* t1 = v << 16 */
> +	BSLLI	r9, r12, 16	/* t1 = v << 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 16	/* *(d + 16) = t1 */
> -	bsrli	r11, r12, 16	/* h = v >> 16 */
> +	BSRLI	r11, r12, 16	/* h = v >> 16 */
> 	lwi	r12, r8, 12	/* v = *(as + 12) */
> -	bslli	r9, r12, 16	/* t1 = v << 16 */
> +	BSLLI	r9, r12, 16	/* t1 = v << 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 12	/* *(d + 112) = t1 */
> -	bsrli	r11, r12, 16	/* h = v >> 16 */
> +	BSRLI	r11, r12, 16	/* h = v >> 16 */
> 	lwi	r12, r8, 8	/* v = *(as + 8) */
> -	bslli	r9, r12, 16	/* t1 = v << 16 */
> +	BSLLI	r9, r12, 16	/* t1 = v << 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 8	/* *(d + 8) = t1 */
> -	bsrli	r11, r12, 16	/* h = v >> 16 */
> +	BSRLI	r11, r12, 16	/* h = v >> 16 */
> 	lwi	r12, r8, 4	/* v = *(as + 4) */
> -	bslli	r9, r12, 16	/* t1 = v << 16 */
> +	BSLLI	r9, r12, 16	/* t1 = v << 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 4	/* *(d + 4) = t1 */
> -	bsrli	r11, r12, 16	/* h = v >> 16 */
> +	BSRLI	r11, r12, 16	/* h = v >> 16 */
> 	lwi	r12, r8, 0	/* v = *(as + 0) */
> -	bslli	r9, r12, 16	/* t1 = v << 16 */
> +	BSLLI	r9, r12, 16	/* t1 = v << 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	swi	r9, r5, 0	/* *(d + 0) = t1 */
> 	addi	r4, r4, -32	/* n = n - 32 */
> 	bneid	r4, d_bu2_loop	/* while (n) loop */
> -	bsrli	r11, r12, 16	/* h = v >> 16 (IN DELAY SLOT) */
> +	BSRLI	r11, r12, 16	/* h = v >> 16 (IN DELAY SLOT) */
> 
> d_block_done:
> 	addi	r4, r0, 4	/* n = 4 */
> @@ -290,41 +298,41 @@ d_word_unaligned:
> 	beqi	r9,d_word_u2		/* t1 was 2 => 2 byte offset */
> 
> d_word_u3:
> -	bsrli	r11, r11, 8	/* h = h >> 8 */
> +	BSRLI	r11, r11, 8	/* h = h >> 8 */
> d_wu3_loop:
> 	addi	r4, r4,-4	/* n = n - 4 */
> 	lw	r12, r8, r4	/* v = *(as + n) */
> -	bslli	r9, r12, 24	/* t1 = v << 24 */
> +	BSLLI	r9, r12, 24	/* t1 = v << 24 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	sw	r9, r5, r4	/* *(d + n) = t1 */
> 	bneid	r4, d_wu3_loop	/* while (n) loop */
> -	bsrli	r11, r12, 8	/* h = v >> 8 (IN DELAY SLOT) */
> +	BSRLI	r11, r12, 8	/* h = v >> 8 (IN DELAY SLOT) */
> 
> 	bri	d_word_done
> 
> d_word_u1:
> -	bsrli	r11, r11, 24	/* h = h >> 24 */
> +	BSRLI	r11, r11, 24	/* h = h >> 24 */
> d_wu1_loop:
> 	addi	r4, r4,-4	/* n = n - 4 */
> 	lw	r12, r8, r4	/* v = *(as + n) */
> -	bslli	r9, r12, 8	/* t1 = v << 8 */
> +	BSLLI	r9, r12, 8	/* t1 = v << 8 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	sw	r9, r5, r4	/* *(d + n) = t1 */
> 	bneid	r4, d_wu1_loop	/* while (n) loop */
> -	bsrli	r11, r12, 24	/* h = v >> 24 (IN DELAY SLOT) */
> +	BSRLI	r11, r12, 24	/* h = v >> 24 (IN DELAY SLOT) */
> 
> 	bri	d_word_done
> 
> d_word_u2:
> -	bsrli	r11, r11, 16	/* h = h >> 16 */
> +	BSRLI	r11, r11, 16	/* h = h >> 16 */
> d_wu2_loop:
> 	addi	r4, r4,-4	/* n = n - 4 */
> 	lw	r12, r8, r4	/* v = *(as + n) */
> -	bslli	r9, r12, 16	/* t1 = v << 16 */
> +	BSLLI	r9, r12, 16	/* t1 = v << 16 */
> 	or	r9, r11, r9	/* t1 = h | t1 */
> 	sw	r9, r5, r4	/* *(d + n) = t1 */
> 	bneid	r4, d_wu2_loop	/* while (n) loop */
> -	bsrli	r11, r12, 16	/* h = v >> 16 (IN DELAY SLOT) */
> +	BSRLI	r11, r12, 16	/* h = v >> 16 (IN DELAY SLOT) */
> 
> d_word_done:
> 
> -- 
> 1.7.11.3
> 

--
Embedded Systems Specialists - http://workware.net.au/
WorkWare Systems Pty Ltd
W: www.workware.net.au      P: +61 434 921 300
E: steveb at workware.net.au   F: +61 7 3391 6002








More information about the uClibc mailing list