diff options
author | John Bowler <jbowler@nslu2-linux.org> | 2005-09-08 18:08:45 +0000 |
---|---|---|
committer | OpenEmbedded Project <openembedded-devel@lists.openembedded.org> | 2005-09-08 18:08:45 +0000 |
commit | fbe4ea88ca8ace2dbd766905c38a34d6e78c9704 (patch) | |
tree | b78db23a02de2794eed3715164b14348f032bafe | |
parent | 4bc4d1b947d2f371f236356da6a3bdc373d1f08e (diff) |
uclibc: put ARM thumb patches from 0.9.27 into 0.9.28
Some of the thumb patches had already been pushed into the uclibc release,
this commit adds corrected/updated versions of most of the ones which had
not been accepted. The controversial 'change the ldso symbol resolver'
patch is not in this set - it will be re-worked to match the changes being
made in binutils for the same problem.
-rw-r--r-- | packages/uclibc/uclibc-0.9.28/thumb-defined-arm-or-thumb.patch | 37 | ||||
-rw-r--r-- | packages/uclibc/uclibc-0.9.28/thumb-mov-pc-bx.patch | 102 | ||||
-rw-r--r-- | packages/uclibc/uclibc-0.9.28/thumb-resolve.patch | 172 | ||||
-rw-r--r-- | packages/uclibc/uclibc_0.9.28.bb | 9 |
4 files changed, 319 insertions, 1 deletions
diff --git a/packages/uclibc/uclibc-0.9.28/thumb-defined-arm-or-thumb.patch b/packages/uclibc/uclibc-0.9.28/thumb-defined-arm-or-thumb.patch new file mode 100644 index 0000000000..502c8db86f --- /dev/null +++ b/packages/uclibc/uclibc-0.9.28/thumb-defined-arm-or-thumb.patch @@ -0,0 +1,37 @@ +# in various places defined(__arm__) is used to protect/select code which +# is ARM specific, that code must also be selected for __thumb__ because +# __thumb__ is an ARM but __arm__ is not set... +# +--- ./ldso/include/dl-string.h.orig 2005-09-07 14:09:19.375564254 -0700 ++++ ./ldso/include/dl-string.h 2005-09-07 14:09:52.045620051 -0700 +@@ -270,7 +270,7 @@ + + /* On some arches constant strings are referenced through the GOT. + * This requires that load_addr must already be defined... */ +-#if defined(mc68000) || defined(__arm__) || defined(__mips__) \ ++#if defined(mc68000) || defined(__arm__) || defined(__thumb__) || defined(__mips__) \ + || defined(__sh__) || defined(__powerpc__) + # define CONSTANT_STRING_GOT_FIXUP(X) \ + if ((X) < (const char *) load_addr) (X) += load_addr +--- ./libc/sysdeps/linux/common/create_module.c.orig 2005-09-07 14:09:55.597843578 -0700 ++++ ./libc/sysdeps/linux/common/create_module.c 2005-09-07 14:10:11.650853730 -0700 +@@ -31,7 +31,7 @@ + + #ifdef __NR_create_module + +-#if defined(__i386__) || defined(__m68k__) || defined(__arm__) || defined(__cris__) || defined(__i960__) ++#if defined(__i386__) || defined(__m68k__) || defined(__arm__) || defined(__thumb__) || defined(__cris__) || defined(__i960__) + #define __NR___create_module __NR_create_module + #ifdef __STR_NR_create_module + #define __STR_NR___create_module __STR_NR_create_module +--- ./utils/ldd.c.orig 2005-09-07 14:10:32.368157388 -0700 ++++ ./utils/ldd.c 2005-09-07 14:11:23.735389724 -0700 +@@ -51,7 +51,7 @@ + #include <dmalloc.h> + #endif + +-#if defined(__arm__) ++#if defined(__arm__) || defined(__thumb__) + #define MATCH_MACHINE(x) (x == EM_ARM) + #define ELFCLASSM ELFCLASS32 + #endif diff --git a/packages/uclibc/uclibc-0.9.28/thumb-mov-pc-bx.patch b/packages/uclibc/uclibc-0.9.28/thumb-mov-pc-bx.patch new file mode 100644 index 0000000000..86713a7678 --- /dev/null +++ b/packages/uclibc/uclibc-0.9.28/thumb-mov-pc-bx.patch @@ -0,0 +1,102 @@ +# This patch changes all cases where the ARM assembler mov pc,rx +# instructions are used to ensure that the thumb/arm interwork change of +# process more works - in essence mov pc,rx needs to become bx rc. +# +# The ldr pc or ldm rx, {pc} instructions are not changed - this is +# fine on ARM >=v5 but will fail to restore thumb mode on ARM v4T, +# i.e. this code will not provide support for thumb on ARM v4T. +# +# One mov pc is left in resolve.S, this is fixed in a different patch - +# thumb-resolve.patch +# +# The changes are protected by __THUMB_INTERWORK__ - the original +# mov instruction will work on newer architectures and is required on +# arch v4 (not v4t) and earlier - those which did not support thumb - +# so this is safe. See gcc lib1asmfuncs for a more exact test. +# +--- uClibc-0.9.28/.pc/thumb-mov-pc-bx.patch/ldso/ldso/arm/dl-startup.h 2005-08-17 15:49:41.000000000 -0700 ++++ uClibc-0.9.28/ldso/ldso/arm/dl-startup.h 2005-09-08 09:34:22.918316874 -0700 +@@ -8,6 +8,7 @@ + " .text\n" + " .globl _start\n" + " .type _start,%function\n" ++ " .arm\n" + "_start:\n" + " @ at start time, all the args are on the stack\n" + " mov r0, sp\n" +@@ -40,7 +41,11 @@ + " ldr r0, .L_FINI_PROC\n" + " ldr r0, [sl, r0]\n" + " @ jump to the user_s entry point\n" ++#if defined(__THUMB_INTERWORK__) ++ " bx r6\n" ++#else + " mov pc, r6\n" ++#endif + ".L_GET_GOT:\n" + " .word _GLOBAL_OFFSET_TABLE_ - .L_GOT_GOT - 4\n" + ".L_SKIP_ARGS:\n" +--- uClibc-0.9.28/.pc/thumb-mov-pc-bx.patch/ldso/ldso/arm/dl-sysdep.h 2005-08-17 15:49:41.000000000 -0700 ++++ uClibc-0.9.28/ldso/ldso/arm/dl-sysdep.h 2005-09-07 20:10:35.923583424 -0700 +@@ -85,7 +85,19 @@ + extern void __dl_start asm ("_dl_start"); + Elf32_Addr got_addr = (Elf32_Addr) &__dl_start; + Elf32_Addr pcrel_addr; ++#if !defined(__thumb__) ++ /* On thumb this has to be two instructions because ++ * the offset is negative. ++ */ + asm ("adr %0, _dl_start" : "=r" (pcrel_addr)); ++#else ++ /* This is dumb, gcc should support a thumb adrl ++ * but it doesn't, so this is the same thing the ++ * hard way. If this code moves too far from _dl_start ++ * it will fail. ++ */ ++ asm ("adr\t%0, 1f\n1:\tsub\t%0, #1b-_dl_start\n" : "=r" (pcrel_addr)); ++#endif + return pcrel_addr - got_addr; + } + +--- uClibc-0.9.28/.pc/thumb-mov-pc-bx.patch/libc/sysdeps/linux/arm/clone.S 2005-08-17 15:49:41.000000000 -0700 ++++ uClibc-0.9.28/libc/sysdeps/linux/arm/clone.S 2005-09-08 09:36:24.801986529 -0700 +@@ -51,7 +51,11 @@ + swi __NR_clone + movs a1, a1 + blt __error +- movne pc, lr ++#if defined(__THUMB_INTERWORK__) ++ bxne lr ++#else ++ movne pc, lr ++#endif + + @ pick the function arg and call address off the stack and execute + ldr r0, [sp, #4] +--- uClibc-0.9.28/.pc/thumb-mov-pc-bx.patch/libc/sysdeps/linux/arm/vfork.S 2005-08-17 15:49:41.000000000 -0700 ++++ uClibc-0.9.28/libc/sysdeps/linux/arm/vfork.S 2005-09-08 09:51:13.377901086 -0700 +@@ -34,7 +34,11 @@ + #ifdef __NR_vfork + swi __NR_vfork + cmn r0, #4096 ++#if defined(__THUMB_INTERWORK__) ++ bxcc lr ++#else + movcc pc, lr ++#endif + + /* Check if vfork even exists. */ + ldr r1, =-ENOSYS +@@ -47,7 +51,11 @@ + cmn r0, #4096 + + /* Syscal worked. Return to child/parent */ +- movcc pc, lr ++#if defined(__THUMB_INTERWORK__) ++ bxcc lr ++#else ++ movcc pc, lr ++#endif + + __error: + b __syscall_error diff --git a/packages/uclibc/uclibc-0.9.28/thumb-resolve.patch b/packages/uclibc/uclibc-0.9.28/thumb-resolve.patch new file mode 100644 index 0000000000..e088757590 --- /dev/null +++ b/packages/uclibc/uclibc-0.9.28/thumb-resolve.patch @@ -0,0 +1,172 @@ +# This change reimplements the ARM _dl_linux_resolve entry point - this is +# called to resolve DLL PLT entries. The assembler is changed to be thumb +# compatible and slightly faster, the C function, _dl_linux_resolver (note +# the extra r) is changed to take a byte address in place of an 8 byte +# count (faster in caller and callee, and slightly easier to understand). +# +--- uClibc-0.9.28/.pc/thumb-resolve.patch/ldso/ldso/arm/elfinterp.c 2005-08-17 15:49:41.000000000 -0700 ++++ uClibc-0.9.28/ldso/ldso/arm/elfinterp.c 2005-09-07 20:10:36.231602806 -0700 +@@ -55,7 +55,7 @@ + + rel_addr = (ELF_RELOC *) tpnt->dynamic_info[DT_JMPREL]; + +- this_reloc = rel_addr + (reloc_entry >> 3); ++ this_reloc = rel_addr + reloc_entry; + reloc_type = ELF32_R_TYPE(this_reloc->r_info); + symtab_index = ELF32_R_SYM(this_reloc->r_info); + +--- uClibc-0.9.28/.pc/thumb-resolve.patch/ldso/ldso/arm/resolve.S 2005-08-17 15:49:41.000000000 -0700 ++++ uClibc-0.9.28/ldso/ldso/arm/resolve.S 2005-09-08 09:54:03.536608499 -0700 +@@ -1,43 +1,121 @@ + /* +- * This function is _not_ called directly. It is jumped to (so no return +- * address is on the stack) when attempting to use a symbol that has not yet +- * been resolved. The first time a jump symbol (such as a function call inside +- * a shared library) is used (before it gets resolved) it will jump here to +- * _dl_linux_resolve. When we get called the stack looks like this: +- * reloc_entry +- * tpnt +- * +- * This function saves all the registers, puts a copy of reloc_entry and tpnt +- * on the stack (as function arguments) then make the function call +- * _dl_linux_resolver(tpnt, reloc_entry). _dl_linux_resolver() figures out +- * where the jump symbol is _really_ supposed to have jumped to and returns +- * that to us. Once we have that, we overwrite tpnt with this fixed up +- * address. We then clean up after ourselves, put all the registers back how we +- * found them, then we jump to the fixed up address, which is where the jump +- * symbol that got us here really wanted to jump to in the first place. +- * -Erik Andersen ++ * On ARM the PLT contains the following three instructions (for ARM calls): ++ * ++ * add ip, pc, #0xNN00000 ++ * add ip, ip, #0xNN000 ++ * ldr pc, [ip, #0xNNN]! ++ * ++ * So that, effectively, causes the following to happen: ++ * ++ * ip := pc+0x0NNNNNNN ++ * pc := *ip ++ * ++ * For thumb the above fragment is preceded by "bx pc, nop" to switch to ARM ++ * mode and the thumb 'bl' must go to PLT-4 - the PLT entry is expanded by ++ * four bytes to accomodate the trampoline code. ++ * ++ * 0x0NNNNNNN is the offset of the GOT entry for this function relative to ++ * the PLT entry for this function (where the code is). So the code in the ++ * PLT causes a branch to whatever is in the GOT, leaving the actual address ++ * of the GOT entry in ip. (Note that the GOT must follow the PLT - the ++ * added value is 28 bit unsigned). ++ * ++ * ip is a pointer to the GOT entry for this function, the first time round ++ * *ip points to this code: ++ * ++ * str lr, [sp, #-4]! @ save lr ++ * ldr lr, [pc, #4] @ lr := *dat (&GOT_TABLE[0]-.) ++ * add lr, pc, lr @ lr += &dat (so lr == &GOT_TABLE[0]) ++ * ldr pc, [lr, #8]! @ pc := GOT_TABLE[2] ++ *dat: .long &GOT_TABLE[0] - . ++ * ++ * (this code is actually held in the first entry of the PLT). The code ++ * preserves lr then uses it as a scratch register (this preserves the ip ++ * value calculated above). GOT_TABLE[2] is initialized by INIT_GOT in ++ * dl-sysdep.h to point to _dl_linux_resolve - this function. The first ++ * three entries in the GOT are reserved, then they are followed by the ++ * entries for the PLT entries, in order. ++ * ++ * The linker initialises the following (non-reserved) GOT entries to ++ * the offset of the PLT with an associated relocation so that on load ++ * the entry is relocated to point to the PLT - the above code. ++ * ++ * The net effect of all this is that on the first call to an external (as ++ * yet unresolved) function all seven of the above instructions are ++ * executed in sequence and the program ends up executing _dl_linux_resolve ++ * with the following important values in registers: ++ * ++ * ip - a pointer to the GOT entry for the as yet unresolved function ++ * lr - &GOT_TABLE[2] ++ * ++ * GOT_TABLE[2] has already been initialised to _dl_linux_resolve, and ++ * GOT_TABLE[1] is a pointer to the (elf_resolve*) from INIT_GOT. ++ * _dl_linux_resolve unfrobnicates the ip and lr values to obtain arguments ++ * for a call to _dl_linux_resolver (not the additional 'r' on the end) - ++ * this is in elfinterp.c in this directory. The call takes arguments: ++ * ++ * _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) ++ * ++ * And returns the address of the function, it also overwrites the GOT ++ * table entry so that the next time round only the first code fragment will ++ * be executed - it will call the function directly. ++ * ++ * [[Of course, this simply doesn't work on ARM 4T with a thumb target - because ++ * 4T did not do the thumb/arm change on ldr pc! It can be made to work by ++ * changing _dl_linux_resolver to return __%s_from_arm for an STT_TFUNC, but ++ * this hasn't been done, and there is no guarantee that the linker generated ++ * that glue anyway.]] ++ * ++ * _dl_linux_resolve gets the arguments to call the resolver as follows: ++ * ++ * tpnt GOT_TABLE[1], [lr-4] ++ * reloc-entry &GOT-&GOT_TABLE[3], (ip - lr - 4)/4 ++ * ++ * (I.e. 'GOT' means the table entry for this function, the thing for which ++ * ip holds the address.) The reloc-entry is passed as an index, since ++ * since the GOT table has 4 byte entries the code needs to divide this by 4 ++ * to get the actual index. ++ * ++ * John Bowler, August 13, 2005 - determined by experiment and examination ++ * of generated ARM code (there was no documentation...) ++ * ++ * This code is all ARM code - not thumb - _dl_linux_resolver may, itself, ++ * be thumb, in which case the linker will insert the appropriate glue. A ++ * call from thumb to the PLT hits the trampoline code described above. ++ * This code (now) builds a proper stack frame. ++ * ++ * The code does *not* set sb (r9,v6) - to do that the basic PLT instructions ++ * would need to save sb and load the new value and that would require ++ * support in the linker since it generates those instructions. (Also note ++ * that linux/uclibc seems to be using r10 - sl - as a PIC base register - see ++ * dl-startup.c). + */ + +-#define sl r10 +-#define fp r11 +-#define ip r12 +- + .text + .globl _dl_linux_resolve + .type _dl_linux_resolve,%function +-.align 4; ++.align 4 @ 16 byte boundary and there are 32 bytes below + + _dl_linux_resolve: +- stmdb sp!, {r0, r1, r2, r3, sl, fp} +- sub r1, ip, lr +- sub r1, r1, #4 +- add r1, r1, r1 +- ldr r0, [lr, #-4] +- mov r3,r0 ++ @ _dl_linux_resolver is a standard subroutine call, therefore it ++ @ preserves everything except r0-r3 (a1-a4), ip and lr. This ++ @ function must branch to the real function, and that expects ++ @ r0-r3 and lr to be as they were before the whole PLT stuff - ++ @ ip can be trashed. ++ stmdb sp!, {r0-r3} ++ ldr r0, [lr, #-4] @ r0 := [lr-4] (GOT_TABLE[1]) ++ sub r1, lr, ip @ r1 := (lr-ip) (a multple of 4) ++ mvn r1, r1, ASR #2 @ r1 := ~((lr-ip)>>2), since -x = (1+~x) ++ @ ~x = -x-1, therefore ~(r1>>2) = (-((lr-ip)>>2)-1) ++ @ = - ((lr-ip)/4) - 1 = (ip - lr - 4)/4, as required + + bl _dl_linux_resolver + +- mov ip, r0 +- ldmia sp!, {r0, r1, r2, r3, sl, fp, lr} +- mov pc,ip ++ mov ip, r0 ++ ldmia sp!, {r0-r3, lr} ++#if defined(__THUMB_INTERWORK__) ++ bx ip ++#else ++ mov pc, ip ++#endif + .size _dl_linux_resolve, .-_dl_linux_resolve diff --git a/packages/uclibc/uclibc_0.9.28.bb b/packages/uclibc/uclibc_0.9.28.bb index ae3d576e4d..246e9bc772 100644 --- a/packages/uclibc/uclibc_0.9.28.bb +++ b/packages/uclibc/uclibc_0.9.28.bb @@ -1,5 +1,5 @@ DEFAULT_PREFERENCE = "1" -PR = "r0" +PR = "r1" include uclibc.inc @@ -20,3 +20,10 @@ S = "${WORKDIR}/uClibc-${PV}" # be necessary to add this for architectures which do not initially # have a 'good' set of kernel header files in the cross directory. #SRC_URI += "file://nokernelheadercheck.patch;patch=1" +# +# Thumb support +SRC_URI += " file://thumb-defined-arm-or-thumb.patch;patch=1" +# +# Thumb interworking support +SRC_URI += " file://thumb-mov-pc-bx.patch;patch=1" +SRC_URI += " file://thumb-resolve.patch;patch=1" |