diff options
author | Gerald Britton <gbritton@doomcom.org> | 2004-04-27 02:53:54 +0000 |
---|---|---|
committer | Gerald Britton <gbritton@doomcom.org> | 2004-04-27 02:53:54 +0000 |
commit | 5c76c320af6f82794823215c081469d3de8e64e4 (patch) | |
tree | 324384db2b3a349459fb4fa28fc910448ec52ae7 /glibc/glibc-cvs/arm-memcpy.patch | |
parent | f63c99c34855fff1cbb932457b08bdd29b3777ff (diff) |
- binutils 2.15.90.0.3 / gcc 3.4.0 toolchain
- support in glibc and uclibc for gcc 3.4.0
- new target setup for uclibc, TARGET_OS=linux-uclibc to match the uclibc
project's setup and modern config.sub already has support for it.
BKrev: 408dcb42JOaGKxGg3PSn6IwU4Kimfw
Diffstat (limited to 'glibc/glibc-cvs/arm-memcpy.patch')
-rw-r--r-- | glibc/glibc-cvs/arm-memcpy.patch | 756 |
1 files changed, 756 insertions, 0 deletions
diff --git a/glibc/glibc-cvs/arm-memcpy.patch b/glibc/glibc-cvs/arm-memcpy.patch index e69de29bb2..7fe0040bea 100644 --- a/glibc/glibc-cvs/arm-memcpy.patch +++ b/glibc/glibc-cvs/arm-memcpy.patch @@ -0,0 +1,756 @@ +--- /dev/null 2004-02-02 20:32:13.000000000 +0000 ++++ sysdeps/arm/memcpy.S 2004-03-20 13:25:27.000000000 +0000 +@@ -0,0 +1,241 @@ ++/* ++ * Optimized memcpy implementation for ARM processors ++ * ++ * Author: Nicolas Pitre ++ * Created: Dec 23, 2003 ++ * Copyright: (C) MontaVista Software, Inc. ++ * ++ * This file is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This file is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ */ ++ ++#include <sysdep.h> ++ ++ ++/* ++ * Endian independent macros for shifting bytes within registers. ++ */ ++#ifndef __ARMEB__ ++#define pull lsr ++#define push lsl ++#else ++#define pull lsl ++#define push lsr ++#endif ++ ++/* ++ * Enable data preload for architectures that support it (ARMv5 and above) ++ */ ++#if defined(__ARM_ARCH_5__) || \ ++ defined(__ARM_ARCH_5T__) || \ ++ defined(__ARM_ARCH_5TE__) ++#define PLD(code...) code ++#else ++#define PLD(code...) ++#endif ++ ++ ++/* char * memcpy (char *dst, const char *src) */ ++ ++ENTRY(memcpy) ++ subs r2, r2, #4 ++ stmfd sp!, {r0, r4, lr} ++ blt 7f ++ ands ip, r0, #3 ++ PLD( pld [r1, #0] ) ++ bne 8f ++ ands ip, r1, #3 ++ bne 9f ++ ++1: subs r2, r2, #4 ++ blt 6f ++ subs r2, r2, #8 ++ blt 5f ++ subs r2, r2, #16 ++ blt 4f ++ ++ PLD( subs r2, r2, #65 ) ++ stmfd sp!, {r5 - r8} ++ PLD( blt 3f ) ++ PLD( pld [r1, #32] ) ++ ++ PLD( @ cache alignment ) ++ PLD( ands ip, r1, #31 ) ++ PLD( pld [r1, #64] ) ++ PLD( beq 2f ) ++ PLD( rsb ip, ip, #32 ) ++ PLD( cmp r2, ip ) ++ PLD( pld [r1, #96] ) ++ PLD( blt 2f ) ++ PLD( cmp ip, #16 ) ++ PLD( sub r2, r2, ip ) ++ PLD( ldmgeia r1!, {r3 - r6} ) ++ PLD( stmgeia r0!, {r3 - r6} ) ++ PLD( beq 2f ) ++ PLD( and ip, ip, #15 ) ++ PLD( cmp ip, #8 ) ++ PLD( ldr r3, [r1], #4 ) ++ PLD( ldrge r4, [r1], #4 ) ++ PLD( ldrgt r5, [r1], #4 ) ++ PLD( str r3, [r0], #4 ) ++ PLD( strge r4, [r0], #4 ) ++ PLD( strgt r5, [r0], #4 ) ++ ++2: PLD( pld [r1, #96] ) ++3: ldmia r1!, {r3 - r8, ip, lr} ++ subs r2, r2, #32 ++ stmia r0!, {r3 - r8, ip, lr} ++ bge 2b ++ PLD( cmn r2, #65 ) ++ PLD( bge 3b ) ++ PLD( add r2, r2, #65 ) ++ tst r2, #31 ++ ldmfd sp!, {r5 - r8} ++ ldmeqfd sp!, {r0, r4, pc} ++ ++ tst r2, #16 ++4: ldmneia r1!, {r3, r4, ip, lr} ++ stmneia r0!, {r3, r4, ip, lr} ++ ++ tst r2, #8 ++5: ldmneia r1!, {r3, r4} ++ stmneia r0!, {r3, r4} ++ ++ tst r2, #4 ++6: ldrne r3, [r1], #4 ++ strne r3, [r0], #4 ++ ++7: ands r2, r2, #3 ++ ldmeqfd sp!, {r0, r4, pc} ++ ++ cmp r2, #2 ++ ldrb r3, [r1], #1 ++ ldrgeb r4, [r1], #1 ++ ldrgtb ip, [r1] ++ strb r3, [r0], #1 ++ strgeb r4, [r0], #1 ++ strgtb ip, [r0] ++ ldmfd sp!, {r0, r4, pc} ++ ++8: rsb ip, ip, #4 ++ cmp ip, #2 ++ ldrb r3, [r1], #1 ++ ldrgeb r4, [r1], #1 ++ ldrgtb lr, [r1], #1 ++ strb r3, [r0], #1 ++ strgeb r4, [r0], #1 ++ strgtb lr, [r0], #1 ++ subs r2, r2, ip ++ blt 7b ++ ands ip, r1, #3 ++ beq 1b ++ ++9: bic r1, r1, #3 ++ cmp ip, #2 ++ ldr lr, [r1], #4 ++ beq 17f ++ bgt 18f ++ ++ ++ .macro forward_copy_shift pull push ++ ++ cmp r2, #12 ++ PLD( pld [r1, #0] ) ++ blt 15f ++ subs r2, r2, #28 ++ stmfd sp!, {r5 - r9} ++ blt 13f ++ ++ PLD( subs r2, r2, #97 ) ++ PLD( blt 12f ) ++ PLD( pld [r1, #32] ) ++ ++ PLD( @ cache alignment ) ++ PLD( rsb ip, r1, #36 ) ++ PLD( pld [r1, #64] ) ++ PLD( ands ip, ip, #31 ) ++ PLD( pld [r1, #96] ) ++ PLD( beq 11f ) ++ PLD( cmp r2, ip ) ++ PLD( pld [r1, #128] ) ++ PLD( blt 11f ) ++ PLD( sub r2, r2, ip ) ++10: PLD( mov r3, lr, pull #\pull ) ++ PLD( ldr lr, [r1], #4 ) ++ PLD( subs ip, ip, #4 ) ++ PLD( orr r3, r3, lr, push #\push ) ++ PLD( str r3, [r0], #4 ) ++ PLD( bgt 10b ) ++ ++11: PLD( pld [r1, #128] ) ++12: mov r3, lr, pull #\pull ++ ldmia r1!, {r4 - r9, ip, lr} ++ subs r2, r2, #32 ++ orr r3, r3, r4, push #\push ++ mov r4, r4, pull #\pull ++ orr r4, r4, r5, push #\push ++ mov r5, r5, pull #\pull ++ orr r5, r5, r6, push #\push ++ mov r6, r6, pull #\pull ++ orr r6, r6, r7, push #\push ++ mov r7, r7, pull #\pull ++ orr r7, r7, r8, push #\push ++ mov r8, r8, pull #\pull ++ orr r8, r8, r9, push #\push ++ mov r9, r9, pull #\pull ++ orr r9, r9, ip, push #\push ++ mov ip, ip, pull #\pull ++ orr ip, ip, lr, push #\push ++ stmia r0!, {r3 - r9, ip} ++ bge 11b ++ PLD( cmn r2, #97 ) ++ PLD( bge 12b ) ++ PLD( add r2, r2, #97 ) ++ cmn r2, #16 ++ blt 14f ++13: mov r3, lr, pull #\pull ++ ldmia r1!, {r4 - r6, lr} ++ sub r2, r2, #16 ++ orr r3, r3, r4, push #\push ++ mov r4, r4, pull #\pull ++ orr r4, r4, r5, push #\push ++ mov r5, r5, pull #\pull ++ orr r5, r5, r6, push #\push ++ mov r6, r6, pull #\pull ++ orr r6, r6, lr, push #\push ++ stmia r0!, {r3 - r6} ++14: adds r2, r2, #28 ++ ldmfd sp!, {r5 - r9} ++ blt 16f ++15: mov r3, lr, pull #\pull ++ ldr lr, [r1], #4 ++ subs r2, r2, #4 ++ orr r3, r3, lr, push #\push ++ str r3, [r0], #4 ++ bge 15b ++16: ++ .endm ++ ++ ++ forward_copy_shift pull=8 push=24 ++ sub r1, r1, #3 ++ b 7b ++ ++17: forward_copy_shift pull=16 push=16 ++ sub r1, r1, #2 ++ b 7b ++ ++18: forward_copy_shift pull=24 push=8 ++ sub r1, r1, #1 ++ b 7b ++ ++ .size memcpy, . - memcpy ++END(memcpy) ++libc_hidden_builtin_def (memcpy) +--- /dev/null 2004-02-02 20:32:13.000000000 +0000 ++++ sysdeps/arm/memmove.S 2004-03-20 18:37:23.000000000 +0000 +@@ -0,0 +1,251 @@ ++/* ++ * Optimized memmove implementation for ARM processors ++ * ++ * Author: Nicolas Pitre ++ * Created: Dec 23, 2003 ++ * Copyright: (C) MontaVista Software, Inc. ++ * ++ * This file is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This file is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ */ ++ ++#include <sysdep.h> ++ ++ ++/* ++ * Endian independent macros for shifting bytes within registers. ++ */ ++#ifndef __ARMEB__ ++#define pull lsr ++#define push lsl ++#else ++#define pull lsl ++#define push lsr ++#endif ++ ++/* ++ * Enable data preload for architectures that support it (ARMv5 and above) ++ */ ++#if defined(__ARM_ARCH_5__) || \ ++ defined(__ARM_ARCH_5T__) || \ ++ defined(__ARM_ARCH_5TE__) ++#define PLD(code...) code ++#else ++#define PLD(code...) ++#endif ++ ++ ++/* char * memmove (char *dst, const char *src) */ ++ENTRY(memmove) ++ subs ip, r0, r1 ++ cmphi r2, ip ++ bls memcpy(PLT) ++ ++ stmfd sp!, {r0, r4, lr} ++ add r1, r1, r2 ++ add r0, r0, r2 ++ subs r2, r2, #4 ++ blt 25f ++ ands ip, r0, #3 ++ PLD( pld [r1, #-4] ) ++ bne 26f ++ ands ip, r1, #3 ++ bne 27f ++ ++19: subs r2, r2, #4 ++ blt 24f ++ subs r2, r2, #8 ++ blt 23f ++ subs r2, r2, #16 ++ blt 22f ++ ++ PLD( pld [r1, #-32] ) ++ PLD( subs r2, r2, #96 ) ++ stmfd sp!, {r5 - r8} ++ PLD( blt 21f ) ++ ++ PLD( @ cache alignment ) ++ PLD( ands ip, r1, #31 ) ++ PLD( pld [r1, #-64] ) ++ PLD( beq 20f ) ++ PLD( cmp r2, ip ) ++ PLD( pld [r1, #-96] ) ++ PLD( blt 20f ) ++ PLD( cmp ip, #16 ) ++ PLD( sub r2, r2, ip ) ++ PLD( ldmgedb r1!, {r3 - r6} ) ++ PLD( stmgedb r0!, {r3 - r6} ) ++ PLD( beq 20f ) ++ PLD( and ip, ip, #15 ) ++ PLD( cmp ip, #8 ) ++ PLD( ldr r3, [r1, #-4]! ) ++ PLD( ldrge r4, [r1, #-4]! ) ++ PLD( ldrgt r5, [r1, #-4]! ) ++ PLD( str r3, [r0, #-4]! ) ++ PLD( strge r4, [r0, #-4]! ) ++ PLD( strgt r5, [r0, #-4]! ) ++ ++20: PLD( pld [r1, #-96] ) ++ PLD( pld [r1, #-128] ) ++21: ldmdb r1!, {r3, r4, ip, lr} ++ subs r2, r2, #32 ++ stmdb r0!, {r3, r4, ip, lr} ++ ldmdb r1!, {r3, r4, ip, lr} ++ stmgedb r0!, {r3, r4, ip, lr} ++ ldmgedb r1!, {r3, r4, ip, lr} ++ stmgedb r0!, {r3, r4, ip, lr} ++ ldmgedb r1!, {r3, r4, ip, lr} ++ subges r2, r2, #32 ++ stmdb r0!, {r3, r4, ip, lr} ++ bge 20b ++ PLD( cmn r2, #96 ) ++ PLD( bge 21b ) ++ PLD( add r2, r2, #96 ) ++ tst r2, #31 ++ ldmfd sp!, {r5 - r8} ++ ldmeqfd sp!, {r0, r4, pc} ++ ++ tst r2, #16 ++22: ldmnedb r1!, {r3, r4, ip, lr} ++ stmnedb r0!, {r3, r4, ip, lr} ++ ++ tst r2, #8 ++23: ldmnedb r1!, {r3, r4} ++ stmnedb r0!, {r3, r4} ++ ++ tst r2, #4 ++24: ldrne r3, [r1, #-4]! ++ strne r3, [r0, #-4]! ++ ++25: ands r2, r2, #3 ++ ldmeqfd sp!, {r0, r4, pc} ++ ++ cmp r2, #2 ++ ldrb r3, [r1, #-1] ++ ldrgeb r4, [r1, #-2] ++ ldrgtb ip, [r1, #-3] ++ strb r3, [r0, #-1] ++ strgeb r4, [r0, #-2] ++ strgtb ip, [r0, #-3] ++ ldmfd sp!, {r0, r4, pc} ++ ++26: cmp ip, #2 ++ ldrb r3, [r1, #-1]! ++ ldrgeb r4, [r1, #-1]! ++ ldrgtb lr, [r1, #-1]! ++ strb r3, [r0, #-1]! ++ strgeb r4, [r0, #-1]! ++ strgtb lr, [r0, #-1]! ++ subs r2, r2, ip ++ blt 25b ++ ands ip, r1, #3 ++ beq 19b ++ ++27: bic r1, r1, #3 ++ cmp ip, #2 ++ ldr r3, [r1] ++ beq 35f ++ blt 36f ++ ++ ++ .macro backward_copy_shift push pull ++ ++ cmp r2, #12 ++ PLD( pld [r1, #-4] ) ++ blt 33f ++ subs r2, r2, #28 ++ stmfd sp!, {r5 - r9} ++ blt 31f ++ ++ PLD( subs r2, r2, #96 ) ++ PLD( pld [r1, #-32] ) ++ PLD( blt 30f ) ++ PLD( pld [r1, #-64] ) ++ ++ PLD( @ cache alignment ) ++ PLD( ands ip, r1, #31 ) ++ PLD( pld [r1, #-96] ) ++ PLD( beq 29f ) ++ PLD( cmp r2, ip ) ++ PLD( pld [r1, #-128] ) ++ PLD( blt 29f ) ++ PLD( sub r2, r2, ip ) ++28: PLD( mov r4, r3, push #\push ) ++ PLD( ldr r3, [r1, #-4]! ) ++ PLD( subs ip, ip, #4 ) ++ PLD( orr r4, r4, r3, pull #\pull ) ++ PLD( str r4, [r0, #-4]! ) ++ PLD( bgt 28b ) ++ ++29: PLD( pld [r1, #-128] ) ++30: mov lr, r3, push #\push ++ ldmdb r1!, {r3 - r9, ip} ++ subs r2, r2, #32 ++ orr lr, lr, ip, pull #\pull ++ mov ip, ip, push #\push ++ orr ip, ip, r9, pull #\pull ++ mov r9, r9, push #\push ++ orr r9, r9, r8, pull #\pull ++ mov r8, r8, push #\push ++ orr r8, r8, r7, pull #\pull ++ mov r7, r7, push #\push ++ orr r7, r7, r6, pull #\pull ++ mov r6, r6, push #\push ++ orr r6, r6, r5, pull #\pull ++ mov r5, r5, push #\push ++ orr r5, r5, r4, pull #\pull ++ mov r4, r4, push #\push ++ orr r4, r4, r3, pull #\pull ++ stmdb r0!, {r4 - r9, ip, lr} ++ bge 29b ++ PLD( cmn r2, #96 ) ++ PLD( bge 30b ) ++ PLD( add r2, r2, #96 ) ++ cmn r2, #16 ++ blt 32f ++31: mov r7, r3, push #\push ++ ldmdb r1!, {r3 - r6} ++ sub r2, r2, #16 ++ orr r7, r7, r6, pull #\pull ++ mov r6, r6, push #\push ++ orr r6, r6, r5, pull #\pull ++ mov r5, r5, push #\push ++ orr r5, r5, r4, pull #\pull ++ mov r4, r4, push #\push ++ orr r4, r4, r3, pull #\pull ++ stmdb r0!, {r4 - r7} ++32: adds r2, r2, #28 ++ ldmfd sp!, {r5 - r9} ++ blt 34f ++33: mov r4, r3, push #\push ++ ldr r3, [r1, #-4]! ++ subs r2, r2, #4 ++ orr r4, r4, r3, pull #\pull ++ str r4, [r0, #-4]! ++ bge 33b ++34: ++ .endm ++ ++ ++ backward_copy_shift push=8 pull=24 ++ add r1, r1, #3 ++ b 25b ++ ++35: backward_copy_shift push=16 pull=16 ++ add r1, r1, #2 ++ b 25b ++ ++36: backward_copy_shift push=24 pull=8 ++ add r1, r1, #1 ++ b 25b ++ ++ .size memmove, . - memmove ++END(memmove) ++libc_hidden_builtin_def (memmove) +--- /dev/null 2004-02-02 20:32:13.000000000 +0000 ++++ sysdeps/arm/bcopy.S 2004-03-20 18:37:48.000000000 +0000 +@@ -0,0 +1,255 @@ ++/* ++ * Optimized memmove implementation for ARM processors ++ * ++ * Author: Nicolas Pitre ++ * Created: Dec 23, 2003 ++ * Copyright: (C) MontaVista Software, Inc. ++ * ++ * This file is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This file is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ */ ++ ++#include <sysdep.h> ++ ++ ++/* ++ * Endian independent macros for shifting bytes within registers. ++ */ ++#ifndef __ARMEB__ ++#define pull lsr ++#define push lsl ++#else ++#define pull lsl ++#define push lsr ++#endif ++ ++/* ++ * Enable data preload for architectures that support it (ARMv5 and above) ++ */ ++#if defined(__ARM_ARCH_5__) || \ ++ defined(__ARM_ARCH_5T__) || \ ++ defined(__ARM_ARCH_5TE__) ++#define PLD(code...) code ++#else ++#define PLD(code...) ++#endif ++ ++dst .req r1 ++src .req r0 ++ ++/* void *bcopy (const char *src, char *dst, size_t size) */ ++ENTRY(bcopy) ++ subs ip, dst, src ++ cmphi r2, ip ++ movls r3, r0 ++ movls r0, r1 ++ movls r1, r3 ++ bls memcpy(PLT) ++ ++ stmfd sp!, {r4, lr} ++ add src, src, r2 ++ add dst, dst, r2 ++ subs r2, r2, #4 ++ blt 25f ++ ands ip, dst, #3 ++ PLD( pld [src, #-4] ) ++ bne 26f ++ ands ip, src, #3 ++ bne 27f ++ ++19: subs r2, r2, #4 ++ blt 24f ++ subs r2, r2, #8 ++ blt 23f ++ subs r2, r2, #16 ++ blt 22f ++ ++ PLD( pld [src, #-32] ) ++ PLD( subs r2, r2, #96 ) ++ stmfd sp!, {r5 - r8} ++ PLD( blt 21f ) ++ ++ PLD( @ cache alignment ) ++ PLD( ands ip, src, #31 ) ++ PLD( pld [src, #-64] ) ++ PLD( beq 20f ) ++ PLD( cmp r2, ip ) ++ PLD( pld [src, #-96] ) ++ PLD( blt 20f ) ++ PLD( cmp ip, #16 ) ++ PLD( sub r2, r2, ip ) ++ PLD( ldmgedb src!, {r3 - r6} ) ++ PLD( stmgedb dst!, {r3 - r6} ) ++ PLD( beq 20f ) ++ PLD( and ip, ip, #15 ) ++ PLD( cmp ip, #8 ) ++ PLD( ldr r3, [src, #-4]! ) ++ PLD( ldrge r4, [src, #-4]! ) ++ PLD( ldrgt r5, [src, #-4]! ) ++ PLD( str r3, [dst, #-4]! ) ++ PLD( strge r4, [dst, #-4]! ) ++ PLD( strgt r5, [dst, #-4]! ) ++ ++20: PLD( pld [src, #-96] ) ++ PLD( pld [src, #-128] ) ++21: ldmdb src!, {r3, r4, ip, lr} ++ subs r2, r2, #32 ++ stmdb dst!, {r3, r4, ip, lr} ++ ldmdb src!, {r3, r4, ip, lr} ++ stmgedb dst!, {r3, r4, ip, lr} ++ ldmgedb src!, {r3, r4, ip, lr} ++ stmgedb dst!, {r3, r4, ip, lr} ++ ldmgedb src!, {r3, r4, ip, lr} ++ subges r2, r2, #32 ++ stmdb dst!, {r3, r4, ip, lr} ++ bge 20b ++ PLD( cmn r2, #96 ) ++ PLD( bge 21b ) ++ PLD( add r2, r2, #96 ) ++ tst r2, #31 ++ ldmfd sp!, {r5 - r8} ++ ldmeqfd sp!, {r4, pc} ++ ++ tst r2, #16 ++22: ldmnedb src!, {r3, r4, ip, lr} ++ stmnedb dst!, {r3, r4, ip, lr} ++ ++ tst r2, #8 ++23: ldmnedb src!, {r3, r4} ++ stmnedb dst!, {r3, r4} ++ ++ tst r2, #4 ++24: ldrne r3, [src, #-4]! ++ strne r3, [dst, #-4]! ++ ++25: ands r2, r2, #3 ++ ldmeqfd sp!, {dst, r4, pc} ++ ++ cmp r2, #2 ++ ldrb r3, [src, #-1] ++ ldrgeb r4, [src, #-2] ++ ldrgtb ip, [src, #-3] ++ strb r3, [dst, #-1] ++ strgeb r4, [dst, #-2] ++ strgtb ip, [dst, #-3] ++ ldmfd sp!, {dst, r4, pc} ++ ++26: cmp ip, #2 ++ ldrb r3, [src, #-1]! ++ ldrgeb r4, [src, #-1]! ++ ldrgtb lr, [src, #-1]! ++ strb r3, [dst, #-1]! ++ strgeb r4, [dst, #-1]! ++ strgtb lr, [dst, #-1]! ++ subs r2, r2, ip ++ blt 25b ++ ands ip, src, #3 ++ beq 19b ++ ++27: bic src, src, #3 ++ cmp ip, #2 ++ ldr r3, [src] ++ beq 35f ++ blt 36f ++ ++ ++ .macro backward_copy_shift push pull ++ ++ cmp r2, #12 ++ PLD( pld [src, #-4] ) ++ blt 33f ++ subs r2, r2, #28 ++ stmfd sp!, {r5 - r9} ++ blt 31f ++ ++ PLD( subs r2, r2, #96 ) ++ PLD( pld [src, #-32] ) ++ PLD( blt 30f ) ++ PLD( pld [src, #-64] ) ++ ++ PLD( @ cache alignment ) ++ PLD( ands ip, src, #31 ) ++ PLD( pld [src, #-96] ) ++ PLD( beq 29f ) ++ PLD( cmp r2, ip ) ++ PLD( pld [src, #-128] ) ++ PLD( blt 29f ) ++ PLD( sub r2, r2, ip ) ++28: PLD( mov r4, r3, push #\push ) ++ PLD( ldr r3, [src, #-4]! ) ++ PLD( subs ip, ip, #4 ) ++ PLD( orr r4, r4, r3, pull #\pull ) ++ PLD( str r4, [dst, #-4]! ) ++ PLD( bgt 28b ) ++ ++29: PLD( pld [src, #-128] ) ++30: mov lr, r3, push #\push ++ ldmdb src!, {r3 - r9, ip} ++ subs r2, r2, #32 ++ orr lr, lr, ip, pull #\pull ++ mov ip, ip, push #\push ++ orr ip, ip, r9, pull #\pull ++ mov r9, r9, push #\push ++ orr r9, r9, r8, pull #\pull ++ mov r8, r8, push #\push ++ orr r8, r8, r7, pull #\pull ++ mov r7, r7, push #\push ++ orr r7, r7, r6, pull #\pull ++ mov r6, r6, push #\push ++ orr r6, r6, r5, pull #\pull ++ mov r5, r5, push #\push ++ orr r5, r5, r4, pull #\pull ++ mov r4, r4, push #\push ++ orr r4, r4, r3, pull #\pull ++ stmdb dst!, {r4 - r9, ip, lr} ++ bge 29b ++ PLD( cmn r2, #96 ) ++ PLD( bge 30b ) ++ PLD( add r2, r2, #96 ) ++ cmn r2, #16 ++ blt 32f ++31: mov r7, r3, push #\push ++ ldmdb src!, {r3 - r6} ++ sub r2, r2, #16 ++ orr r7, r7, r6, pull #\pull ++ mov r6, r6, push #\push ++ orr r6, r6, r5, pull #\pull ++ mov r5, r5, push #\push ++ orr r5, r5, r4, pull #\pull ++ mov r4, r4, push #\push ++ orr r4, r4, r3, pull #\pull ++ stmdb dst!, {r4 - r7} ++32: adds r2, r2, #28 ++ ldmfd sp!, {r5 - r9} ++ blt 34f ++33: mov r4, r3, push #\push ++ ldr r3, [src, #-4]! ++ subs r2, r2, #4 ++ orr r4, r4, r3, pull #\pull ++ str r4, [dst, #-4]! ++ bge 33b ++34: ++ .endm ++ ++ ++ backward_copy_shift push=8 pull=24 ++ add src, src, #3 ++ b 25b ++ ++35: backward_copy_shift push=16 pull=16 ++ add src, src, #2 ++ b 25b ++ ++36: backward_copy_shift push=24 pull=8 ++ add src, src, #1 ++ b 25b ++ ++ .size bcopy, . - bcopy ++END(bcopy) |