diff options
author | Michael Lauer <mickey@vanille-media.de> | 2004-05-02 13:54:06 +0000 |
---|---|---|
committer | Michael Lauer <mickey@vanille-media.de> | 2004-05-02 13:54:06 +0000 |
commit | a85e69ba959784bec9a31ef4c9ca887dced281ae (patch) | |
tree | 7cc72527a897bae4b9e19d1966453166c727f315 /glibc | |
parent | 5437e012322b1f17c4d3e31d7b9c75d8ecfa76d1 (diff) |
Merge bk://openembedded@openembedded.bkbits.net/packages
into gandalf.tm.informatik.uni-frankfurt.de:/usr/local/projects/packages
2004/05/02 15:53:55+02:00 uni-frankfurt.de!mickeyl
update arm-memcpy.patch - patch courtesy pb_.
BKrev: 4094fd7eaZUdoxaCbF88ajKlkbsvTQ
Diffstat (limited to 'glibc')
-rw-r--r-- | glibc/glibc-cvs/arm-memcpy.patch | 490 |
1 files changed, 246 insertions, 244 deletions
diff --git a/glibc/glibc-cvs/arm-memcpy.patch b/glibc/glibc-cvs/arm-memcpy.patch index 7fe0040bea..bc2b3dab84 100644 --- a/glibc/glibc-cvs/arm-memcpy.patch +++ b/glibc/glibc-cvs/arm-memcpy.patch @@ -1,248 +1,4 @@ --- /dev/null 2004-02-02 20:32:13.000000000 +0000 -+++ sysdeps/arm/memcpy.S 2004-03-20 13:25:27.000000000 +0000 -@@ -0,0 +1,241 @@ -+/* -+ * Optimized memcpy implementation for ARM processors -+ * -+ * Author: Nicolas Pitre -+ * Created: Dec 23, 2003 -+ * Copyright: (C) MontaVista Software, Inc. -+ * -+ * This file is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This file is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ */ -+ -+#include <sysdep.h> -+ -+ -+/* -+ * Endian independent macros for shifting bytes within registers. -+ */ -+#ifndef __ARMEB__ -+#define pull lsr -+#define push lsl -+#else -+#define pull lsl -+#define push lsr -+#endif -+ -+/* -+ * Enable data preload for architectures that support it (ARMv5 and above) -+ */ -+#if defined(__ARM_ARCH_5__) || \ -+ defined(__ARM_ARCH_5T__) || \ -+ defined(__ARM_ARCH_5TE__) -+#define PLD(code...) code -+#else -+#define PLD(code...) -+#endif -+ -+ -+/* char * memcpy (char *dst, const char *src) */ -+ -+ENTRY(memcpy) -+ subs r2, r2, #4 -+ stmfd sp!, {r0, r4, lr} -+ blt 7f -+ ands ip, r0, #3 -+ PLD( pld [r1, #0] ) -+ bne 8f -+ ands ip, r1, #3 -+ bne 9f -+ -+1: subs r2, r2, #4 -+ blt 6f -+ subs r2, r2, #8 -+ blt 5f -+ subs r2, r2, #16 -+ blt 4f -+ -+ PLD( subs r2, r2, #65 ) -+ stmfd sp!, {r5 - r8} -+ PLD( blt 3f ) -+ PLD( pld [r1, #32] ) -+ -+ PLD( @ cache alignment ) -+ PLD( ands ip, r1, #31 ) -+ PLD( pld [r1, #64] ) -+ PLD( beq 2f ) -+ PLD( rsb ip, ip, #32 ) -+ PLD( cmp r2, ip ) -+ PLD( pld [r1, #96] ) -+ PLD( blt 2f ) -+ PLD( cmp ip, #16 ) -+ PLD( sub r2, r2, ip ) -+ PLD( ldmgeia r1!, {r3 - r6} ) -+ PLD( stmgeia r0!, {r3 - r6} ) -+ PLD( beq 2f ) -+ PLD( and ip, ip, #15 ) -+ PLD( cmp ip, #8 ) -+ PLD( ldr r3, [r1], #4 ) -+ PLD( ldrge r4, [r1], #4 ) -+ PLD( ldrgt r5, [r1], #4 ) -+ PLD( str r3, [r0], #4 ) -+ PLD( strge r4, [r0], #4 ) -+ PLD( strgt r5, [r0], #4 ) -+ -+2: PLD( pld [r1, #96] ) -+3: ldmia r1!, {r3 - r8, ip, lr} -+ subs r2, r2, #32 -+ stmia r0!, {r3 - r8, ip, lr} -+ bge 2b -+ PLD( cmn r2, #65 ) -+ PLD( bge 3b ) -+ PLD( add r2, r2, #65 ) -+ tst r2, #31 -+ ldmfd sp!, {r5 - r8} -+ ldmeqfd sp!, {r0, r4, pc} -+ -+ tst r2, #16 -+4: ldmneia r1!, {r3, r4, ip, lr} -+ stmneia r0!, {r3, r4, ip, lr} -+ -+ tst r2, #8 -+5: ldmneia r1!, {r3, r4} -+ stmneia r0!, {r3, r4} -+ -+ tst r2, #4 -+6: ldrne r3, [r1], #4 -+ strne r3, [r0], #4 -+ -+7: ands r2, r2, #3 -+ ldmeqfd sp!, {r0, r4, pc} -+ -+ cmp r2, #2 -+ ldrb r3, [r1], #1 -+ ldrgeb r4, [r1], #1 -+ ldrgtb ip, [r1] -+ strb r3, [r0], #1 -+ strgeb r4, [r0], #1 -+ strgtb ip, [r0] -+ ldmfd sp!, {r0, r4, pc} -+ -+8: rsb ip, ip, #4 -+ cmp ip, #2 -+ ldrb r3, [r1], #1 -+ ldrgeb r4, [r1], #1 -+ ldrgtb lr, [r1], #1 -+ strb r3, [r0], #1 -+ strgeb r4, [r0], #1 -+ strgtb lr, [r0], #1 -+ subs r2, r2, ip -+ blt 7b -+ ands ip, r1, #3 -+ beq 1b -+ -+9: bic r1, r1, #3 -+ cmp ip, #2 -+ ldr lr, [r1], #4 -+ beq 17f -+ bgt 18f -+ -+ -+ .macro forward_copy_shift pull push -+ -+ cmp r2, #12 -+ PLD( pld [r1, #0] ) -+ blt 15f -+ subs r2, r2, #28 -+ stmfd sp!, {r5 - r9} -+ blt 13f -+ -+ PLD( subs r2, r2, #97 ) -+ PLD( blt 12f ) -+ PLD( pld [r1, #32] ) -+ -+ PLD( @ cache alignment ) -+ PLD( rsb ip, r1, #36 ) -+ PLD( pld [r1, #64] ) -+ PLD( ands ip, ip, #31 ) -+ PLD( pld [r1, #96] ) -+ PLD( beq 11f ) -+ PLD( cmp r2, ip ) -+ PLD( pld [r1, #128] ) -+ PLD( blt 11f ) -+ PLD( sub r2, r2, ip ) -+10: PLD( mov r3, lr, pull #\pull ) -+ PLD( ldr lr, [r1], #4 ) -+ PLD( subs ip, ip, #4 ) -+ PLD( orr r3, r3, lr, push #\push ) -+ PLD( str r3, [r0], #4 ) -+ PLD( bgt 10b ) -+ -+11: PLD( pld [r1, #128] ) -+12: mov r3, lr, pull #\pull -+ ldmia r1!, {r4 - r9, ip, lr} -+ subs r2, r2, #32 -+ orr r3, r3, r4, push #\push -+ mov r4, r4, pull #\pull -+ orr r4, r4, r5, push #\push -+ mov r5, r5, pull #\pull -+ orr r5, r5, r6, push #\push -+ mov r6, r6, pull #\pull -+ orr r6, r6, r7, push #\push -+ mov r7, r7, pull #\pull -+ orr r7, r7, r8, push #\push -+ mov r8, r8, pull #\pull -+ orr r8, r8, r9, push #\push -+ mov r9, r9, pull #\pull -+ orr r9, r9, ip, push #\push -+ mov ip, ip, pull #\pull -+ orr ip, ip, lr, push #\push -+ stmia r0!, {r3 - r9, ip} -+ bge 11b -+ PLD( cmn r2, #97 ) -+ PLD( bge 12b ) -+ PLD( add r2, r2, #97 ) -+ cmn r2, #16 -+ blt 14f -+13: mov r3, lr, pull #\pull -+ ldmia r1!, {r4 - r6, lr} -+ sub r2, r2, #16 -+ orr r3, r3, r4, push #\push -+ mov r4, r4, pull #\pull -+ orr r4, r4, r5, push #\push -+ mov r5, r5, pull #\pull -+ orr r5, r5, r6, push #\push -+ mov r6, r6, pull #\pull -+ orr r6, r6, lr, push #\push -+ stmia r0!, {r3 - r6} -+14: adds r2, r2, #28 -+ ldmfd sp!, {r5 - r9} -+ blt 16f -+15: mov r3, lr, pull #\pull -+ ldr lr, [r1], #4 -+ subs r2, r2, #4 -+ orr r3, r3, lr, push #\push -+ str r3, [r0], #4 -+ bge 15b -+16: -+ .endm -+ -+ -+ forward_copy_shift pull=8 push=24 -+ sub r1, r1, #3 -+ b 7b -+ -+17: forward_copy_shift pull=16 push=16 -+ sub r1, r1, #2 -+ b 7b -+ -+18: forward_copy_shift pull=24 push=8 -+ sub r1, r1, #1 -+ b 7b -+ -+ .size memcpy, . - memcpy -+END(memcpy) -+libc_hidden_builtin_def (memcpy) ---- /dev/null 2004-02-02 20:32:13.000000000 +0000 +++ sysdeps/arm/memmove.S 2004-03-20 18:37:23.000000000 +0000 @@ -0,0 +1,251 @@ +/* @@ -754,3 +510,249 @@ + + .size bcopy, . - bcopy +END(bcopy) + +--- /dev/null 2004-02-02 20:32:13.000000000 +0000 ++++ sysdeps/arm/memcpy.S 2004-05-02 14:33:22.000000000 +0100 +@@ -0,0 +1,242 @@ ++/* ++ * Optimized memcpy implementation for ARM processors ++ * ++ * Author: Nicolas Pitre ++ * Created: Dec 23, 2003 ++ * Copyright: (C) MontaVista Software, Inc. ++ * ++ * This file is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This file is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ */ ++ ++#include <sysdep.h> ++ ++ ++/* ++ * Endian independent macros for shifting bytes within registers. ++ */ ++#ifndef __ARMEB__ ++#define pull lsr ++#define push lsl ++#else ++#define pull lsl ++#define push lsr ++#endif ++ ++/* ++ * Enable data preload for architectures that support it (ARMv5 and above) ++ */ ++#if defined(__ARM_ARCH_5__) || \ ++ defined(__ARM_ARCH_5T__) || \ ++ defined(__ARM_ARCH_5TE__) ++#define PLD(code...) code ++#else ++#define PLD(code...) ++#endif ++ ++ ++/* char * memcpy (char *dst, const char *src) */ ++ ++ENTRY(memcpy) ++ subs r2, r2, #4 ++ stmfd sp!, {r0, r4, lr} ++ blt 7f ++ ands ip, r0, #3 ++ PLD( pld [r1, #0] ) ++ bne 8f ++ ands ip, r1, #3 ++ bne 9f ++ ++1: subs r2, r2, #4 ++ blt 6f ++ subs r2, r2, #8 ++ blt 5f ++ subs r2, r2, #16 ++ blt 4f ++ ++ PLD( subs r2, r2, #65 ) ++ stmfd sp!, {r5 - r8} ++ PLD( blt 3f ) ++ PLD( pld [r1, #32] ) ++ ++ PLD( @ cache alignment ) ++ PLD( ands ip, r1, #31 ) ++ PLD( pld [r1, #64] ) ++ PLD( beq 2f ) ++ PLD( rsb ip, ip, #32 ) ++ PLD( cmp r2, ip ) ++ PLD( pld [r1, #96] ) ++ PLD( blt 2f ) ++ PLD( cmp ip, #16 ) ++ PLD( sub r2, r2, ip ) ++ PLD( ldmgeia r1!, {r3 - r6} ) ++ PLD( stmgeia r0!, {r3 - r6} ) ++ PLD( beq 2f ) ++ PLD( and ip, ip, #15 ) ++ PLD( cmp ip, #8 ) ++ PLD( ldr r3, [r1], #4 ) ++ PLD( ldrge r4, [r1], #4 ) ++ PLD( ldrgt r5, [r1], #4 ) ++ PLD( str r3, [r0], #4 ) ++ PLD( strge r4, [r0], #4 ) ++ PLD( strgt r5, [r0], #4 ) ++ ++2: PLD( pld [r1, #96] ) ++3: ldmia r1!, {r3 - r8, ip, lr} ++ subs r2, r2, #32 ++ stmia r0!, {r3 - r8, ip, lr} ++ bge 2b ++ PLD( cmn r2, #65 ) ++ PLD( bge 3b ) ++ PLD( add r2, r2, #65 ) ++ tst r2, #31 ++ ldmfd sp!, {r5 - r8} ++ ldmeqfd sp!, {r0, r4, pc} ++ ++ tst r2, #16 ++4: ldmneia r1!, {r3, r4, ip, lr} ++ stmneia r0!, {r3, r4, ip, lr} ++ ++ tst r2, #8 ++5: ldmneia r1!, {r3, r4} ++ stmneia r0!, {r3, r4} ++ ++ tst r2, #4 ++6: ldrne r3, [r1], #4 ++ strne r3, [r0], #4 ++ ++7: ands r2, r2, #3 ++ ldmeqfd sp!, {r0, r4, pc} ++ ++ cmp r2, #2 ++ ldrb r3, [r1], #1 ++ ldrgeb r4, [r1], #1 ++ ldrgtb ip, [r1] ++ strb r3, [r0], #1 ++ strgeb r4, [r0], #1 ++ strgtb ip, [r0] ++ ldmfd sp!, {r0, r4, pc} ++ ++8: rsb ip, ip, #4 ++ cmp ip, #2 ++ ldrb r3, [r1], #1 ++ ldrgeb r4, [r1], #1 ++ ldrgtb lr, [r1], #1 ++ strb r3, [r0], #1 ++ strgeb r4, [r0], #1 ++ strgtb lr, [r0], #1 ++ subs r2, r2, ip ++ blt 7b ++ ands ip, r1, #3 ++ beq 1b ++ ++9: bic r1, r1, #3 ++ cmp ip, #2 ++ ldr lr, [r1], #4 ++ beq 17f ++ bgt 18f ++ ++ ++ .macro forward_copy_shift pull push ++ ++ cmp r2, #12 ++ PLD( pld [r1, #0] ) ++ blt 15f ++ subs r2, r2, #28 ++ stmfd sp!, {r5 - r9} ++ blt 13f ++ ++ PLD( subs r2, r2, #97 ) ++ PLD( blt 12f ) ++ PLD( pld [r1, #32] ) ++ ++ PLD( @ cache alignment ) ++ PLD( rsb ip, r1, #36 ) ++ PLD( pld [r1, #64] ) ++ PLD( ands ip, ip, #31 ) ++ PLD( pld [r1, #96] ) ++ PLD( beq 11f ) ++ PLD( cmp r2, ip ) ++ PLD( pld [r1, #128] ) ++ PLD( blt 11f ) ++ PLD( sub r2, r2, ip ) ++10: PLD( mov r3, lr, pull #\pull ) ++ PLD( ldr lr, [r1], #4 ) ++ PLD( subs ip, ip, #4 ) ++ PLD( orr r3, r3, lr, push #\push ) ++ PLD( str r3, [r0], #4 ) ++ PLD( bgt 10b ) ++ ++11: PLD( pld [r1, #128] ) ++12: mov r3, lr, pull #\pull ++ ldmia r1!, {r4 - r9, ip, lr} ++ subs r2, r2, #32 ++ orr r3, r3, r4, push #\push ++ mov r4, r4, pull #\pull ++ orr r4, r4, r5, push #\push ++ mov r5, r5, pull #\pull ++ orr r5, r5, r6, push #\push ++ mov r6, r6, pull #\pull ++ orr r6, r6, r7, push #\push ++ mov r7, r7, pull #\pull ++ orr r7, r7, r8, push #\push ++ mov r8, r8, pull #\pull ++ orr r8, r8, r9, push #\push ++ mov r9, r9, pull #\pull ++ orr r9, r9, ip, push #\push ++ mov ip, ip, pull #\pull ++ orr ip, ip, lr, push #\push ++ stmia r0!, {r3 - r9, ip} ++ bge 11b ++ PLD( cmn r2, #97 ) ++ PLD( bge 12b ) ++ PLD( add r2, r2, #97 ) ++ cmn r2, #16 ++ blt 14f ++13: mov r3, lr, pull #\pull ++ ldmia r1!, {r4 - r6, lr} ++ sub r2, r2, #16 ++ orr r3, r3, r4, push #\push ++ mov r4, r4, pull #\pull ++ orr r4, r4, r5, push #\push ++ mov r5, r5, pull #\pull ++ orr r5, r5, r6, push #\push ++ mov r6, r6, pull #\pull ++ orr r6, r6, lr, push #\push ++ stmia r0!, {r3 - r6} ++14: adds r2, r2, #28 ++ ldmfd sp!, {r5 - r9} ++ blt 16f ++15: mov r3, lr, pull #\pull ++ ldr lr, [r1], #4 ++ subs r2, r2, #4 ++ orr r3, r3, lr, push #\push ++ str r3, [r0], #4 ++ bge 15b ++16: ++ .endm ++ ++ ++ forward_copy_shift pull=8 push=24 ++ sub r1, r1, #3 ++ b 7b ++ ++17: forward_copy_shift pull=16 push=16 ++ sub r1, r1, #2 ++ b 7b ++ ++18: forward_copy_shift pull=24 push=8 ++ sub r1, r1, #1 ++ b 7b ++ ++ .size memcpy, . - memcpy ++END(memcpy) ++libc_hidden_builtin_def (memcpy) ++ |