summaryrefslogtreecommitdiff
path: root/packages/uclibc/uclibc-0.9.28/thumb-resolve.patch
blob: e088757590f2035b8ec0e98e8c93a47842a5f42c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# This change reimplements the ARM _dl_linux_resolve entry point - this is
# called to resolve DLL PLT entries.  The assembler is changed to be thumb
# compatible and slightly faster, the C function, _dl_linux_resolver (note
# the extra r) is changed to take a byte address in place of an 8 byte
# count (faster in caller and callee, and slightly easier to understand).
#
--- uClibc-0.9.28/.pc/thumb-resolve.patch/ldso/ldso/arm/elfinterp.c	2005-08-17 15:49:41.000000000 -0700
+++ uClibc-0.9.28/ldso/ldso/arm/elfinterp.c	2005-09-07 20:10:36.231602806 -0700
@@ -55,7 +55,7 @@
 
 	rel_addr = (ELF_RELOC *) tpnt->dynamic_info[DT_JMPREL];
 
-	this_reloc = rel_addr + (reloc_entry >> 3);
+	this_reloc = rel_addr + reloc_entry;
 	reloc_type = ELF32_R_TYPE(this_reloc->r_info);
 	symtab_index = ELF32_R_SYM(this_reloc->r_info);
 
--- uClibc-0.9.28/.pc/thumb-resolve.patch/ldso/ldso/arm/resolve.S	2005-08-17 15:49:41.000000000 -0700
+++ uClibc-0.9.28/ldso/ldso/arm/resolve.S	2005-09-08 09:54:03.536608499 -0700
@@ -1,43 +1,121 @@
 /*
- * This function is _not_ called directly.  It is jumped to (so no return
- * address is on the stack) when attempting to use a symbol that has not yet
- * been resolved.  The first time a jump symbol (such as a function call inside
- * a shared library) is used (before it gets resolved) it will jump here to
- * _dl_linux_resolve.  When we get called the stack looks like this:
- *	reloc_entry
- *	tpnt
- *
- * This function saves all the registers, puts a copy of reloc_entry and tpnt
- * on the stack (as function arguments) then make the function call
- * _dl_linux_resolver(tpnt, reloc_entry).  _dl_linux_resolver() figures out
- * where the jump symbol is _really_ supposed to have jumped to and returns
- * that to us.  Once we have that, we overwrite tpnt with this fixed up
- * address. We then clean up after ourselves, put all the registers back how we
- * found them, then we jump to the fixed up address, which is where the jump
- * symbol that got us here really wanted to jump to in the first place.
- *  -Erik Andersen
+ * On ARM the PLT contains the following three instructions (for ARM calls):
+ *
+ *	add   ip, pc, #0xNN00000
+ *	add   ip, ip, #0xNN000
+ *	ldr   pc, [ip, #0xNNN]!
+ *
+ * So that, effectively, causes the following to happen:
+ *
+ *	ip := pc+0x0NNNNNNN
+ *	pc := *ip
+ *
+ * For thumb the above fragment is preceded by "bx pc, nop" to switch to ARM
+ * mode and the thumb 'bl' must go to PLT-4 - the PLT entry is expanded by
+ * four bytes to accomodate the trampoline code.
+ *
+ * 0x0NNNNNNN is the offset of the GOT entry for this function relative to
+ * the PLT entry for this function (where the code is).  So the code in the
+ * PLT causes a branch to whatever is in the GOT, leaving the actual address
+ * of the GOT entry in ip.  (Note that the GOT must follow the PLT - the
+ * added value is 28 bit unsigned).
+ *
+ * ip is a pointer to the GOT entry for this function, the first time round
+ * *ip points to this code:
+ *
+ *	str   lr, [sp, #-4]!	@ save lr
+ *	ldr   lr, [pc, #4]	@ lr := *dat (&GOT_TABLE[0]-.)
+ *	add   lr, pc, lr	@ lr += &dat (so lr == &GOT_TABLE[0])
+ *	ldr   pc, [lr, #8]!	@ pc := GOT_TABLE[2]
+ *dat:	.long &GOT_TABLE[0] - .
+ *
+ * (this code is actually held in the first entry of the PLT).  The code
+ * preserves lr then uses it as a scratch register (this preserves the ip
+ * value calculated above).  GOT_TABLE[2] is initialized by INIT_GOT in
+ * dl-sysdep.h to point to _dl_linux_resolve - this function.  The first
+ * three entries in the GOT are reserved, then they are followed by the
+ * entries for the PLT entries, in order.
+ *
+ * The linker initialises the following (non-reserved) GOT entries to
+ * the offset of the PLT with an associated relocation so that on load
+ * the entry is relocated to point to the PLT - the above code.
+ *
+ * The net effect of all this is that on the first call to an external (as
+ * yet unresolved) function all seven of the above instructions are
+ * executed in sequence and the program ends up executing _dl_linux_resolve
+ * with the following important values in registers:
+ *
+ *	ip - a pointer to the GOT entry for the as yet unresolved function
+ *	lr - &GOT_TABLE[2]
+ *
+ * GOT_TABLE[2] has already been initialised to _dl_linux_resolve, and
+ * GOT_TABLE[1] is a pointer to the (elf_resolve*) from INIT_GOT.
+ * _dl_linux_resolve unfrobnicates the ip and lr values to obtain arguments
+ * for a call to _dl_linux_resolver (not the additional 'r' on the end) -
+ * this is in elfinterp.c in this directory.  The call takes arguments:
+ *
+ *	_dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
+ *
+ * And returns the address of the function, it also overwrites the GOT
+ * table entry so that the next time round only the first code fragment will
+ * be executed - it will call the function directly.
+ *
+ * [[Of course, this simply doesn't work on ARM 4T with a thumb target - because
+ * 4T did not do the thumb/arm change on ldr pc!  It can be made to work by
+ * changing _dl_linux_resolver to return __%s_from_arm for an STT_TFUNC, but
+ * this hasn't been done, and there is no guarantee that the linker generated
+ * that glue anyway.]]
+ *
+ * _dl_linux_resolve gets the arguments to call the resolver as follows:
+ *
+ *	tpnt		GOT_TABLE[1], [lr-4]
+ *	reloc-entry	&GOT-&GOT_TABLE[3], (ip - lr - 4)/4
+ *
+ * (I.e. 'GOT' means the table entry for this function, the thing for which
+ * ip holds the address.)  The reloc-entry is passed as an index, since
+ * since the GOT table has 4 byte entries the code needs to divide this by 4
+ * to get the actual index.
+ *
+ * John Bowler, August 13, 2005 - determined by experiment and examination
+ * of generated ARM code (there was no documentation...)
+ *
+ * This code is all ARM code - not thumb - _dl_linux_resolver may, itself,
+ * be thumb, in which case the linker will insert the appropriate glue.  A
+ * call from thumb to the PLT hits the trampoline code described above.
+ * This code (now) builds a proper stack frame.
+ *
+ * The code does *not* set sb (r9,v6) - to do that the basic PLT instructions
+ * would need to save sb and load the new value and that would require
+ * support in the linker since it generates those instructions.  (Also note
+ * that linux/uclibc seems to be using r10 - sl - as a PIC base register - see
+ * dl-startup.c).
  */
 
-#define sl r10
-#define fp r11
-#define ip r12
-
 .text
 .globl _dl_linux_resolve
 .type _dl_linux_resolve,%function
-.align 4;
+.align 4	@ 16 byte boundary and there are 32 bytes below
 
 _dl_linux_resolve:
-	stmdb sp!, {r0, r1, r2, r3, sl, fp}
-	sub r1, ip, lr
-	sub r1, r1, #4
-	add r1, r1, r1
-	ldr r0, [lr, #-4]
-	mov r3,r0
+	@ _dl_linux_resolver is a standard subroutine call, therefore it
+	@ preserves everything except r0-r3 (a1-a4), ip and lr.  This
+	@ function must branch to the real function, and that expects
+	@ r0-r3 and lr to be as they were before the whole PLT stuff -
+	@ ip can be trashed.
+	stmdb sp!, {r0-r3}
+	ldr r0, [lr, #-4]	@ r0 := [lr-4] (GOT_TABLE[1])
+	sub r1, lr, ip		@ r1 := (lr-ip) (a multple of 4)
+	mvn r1, r1, ASR #2	@ r1 := ~((lr-ip)>>2), since -x = (1+~x)
+				@ ~x = -x-1, therefore ~(r1>>2) = (-((lr-ip)>>2)-1)
+				@ = - ((lr-ip)/4) - 1 = (ip - lr - 4)/4, as required
 
 	bl _dl_linux_resolver
 
-	mov ip, r0
-	ldmia sp!, {r0, r1, r2, r3, sl, fp, lr}
-	mov pc,ip
+	mov   ip, r0
+	ldmia sp!, {r0-r3, lr}
+#if defined(__THUMB_INTERWORK__)
+	bx    ip
+#else
+	mov   pc, ip
+#endif
 .size _dl_linux_resolve, .-_dl_linux_resolve