summaryrefslogtreecommitdiff
path: root/packages/qemu/qemu-0.9.0+cvs20070701/qemu-0.9.0-gcc4.patch
diff options
context:
space:
mode:
Diffstat (limited to 'packages/qemu/qemu-0.9.0+cvs20070701/qemu-0.9.0-gcc4.patch')
-rw-r--r--packages/qemu/qemu-0.9.0+cvs20070701/qemu-0.9.0-gcc4.patch881
1 files changed, 881 insertions, 0 deletions
diff --git a/packages/qemu/qemu-0.9.0+cvs20070701/qemu-0.9.0-gcc4.patch b/packages/qemu/qemu-0.9.0+cvs20070701/qemu-0.9.0-gcc4.patch
new file mode 100644
index 0000000000..189cd09b67
--- /dev/null
+++ b/packages/qemu/qemu-0.9.0+cvs20070701/qemu-0.9.0-gcc4.patch
@@ -0,0 +1,881 @@
+diff -Naru qemu-neo1973.orig/dyngen.c qemu-neo1973/dyngen.c
+--- qemu-neo1973.orig/dyngen.c 2007-06-24 13:56:38.000000000 +0200
++++ qemu-neo1973/dyngen.c 2007-06-24 14:33:11.000000000 +0200
+@@ -32,6 +32,8 @@
+
+ #include "config-host.h"
+
++//#define DEBUG_OP
++
+ /* NOTE: we test CONFIG_WIN32 instead of _WIN32 to enabled cross
+ compilation */
+ #if defined(CONFIG_WIN32)
+@@ -1429,6 +1431,644 @@
+ #endif
+
+
++#if defined(HOST_I386) || defined(HOST_X86_64)
++
++/* This byte is the first byte of an instruction. */
++#define FLAG_INSN (1 << 0)
++/* This byte has been processed as part of an instruction. */
++#define FLAG_SCANNED (1 << 1)
++/* This instruction is a return instruction. Gcc cometimes generates prefix
++ bytes, so may be more than one byte long. */
++#define FLAG_RET (1 << 2)
++/* This is either the target of a jump, or the preceeding instruction uses
++ a pc-relative offset. */
++#define FLAG_TARGET (1 << 3)
++/* This is a magic instruction that needs fixing up. */
++#define FLAG_EXIT (1 << 4)
++#define MAX_EXITS 5
++
++static void
++bad_opcode(const char *name, uint32_t op)
++{
++ error("Unsupported opcode %0*x in %s", (op > 0xff) ? 4 : 2, op, name);
++}
++
++/* Mark len bytes as scanned, Returns insn_size + len. Reports an error
++ if these bytes have already been scanned. */
++static int
++eat_bytes(const char *name, char *flags, int insn, int insn_size, int len)
++{
++ while (len > 0) {
++ /* This should never occur in sane code. */
++ if (flags[insn + insn_size] & FLAG_SCANNED)
++ error ("Overlapping instructions in %s", name);
++ flags[insn + insn_size] |= FLAG_SCANNED;
++ insn_size++;
++ len--;
++ }
++ return insn_size;
++}
++
++static void
++trace_i386_insn (const char *name, uint8_t *start_p, char *flags, int insn,
++ int len)
++{
++ uint8_t *ptr;
++ uint8_t op;
++ int modrm;
++ int is_prefix;
++ int op_size;
++ int addr_size;
++ int insn_size;
++ int is_ret;
++ int is_condjmp;
++ int is_jmp;
++ int is_exit;
++ int is_pcrel;
++ int immed;
++ int seen_rexw;
++ int32_t disp;
++
++ ptr = start_p + insn;
++ /* nonzero if this insn has a ModR/M byte. */
++ modrm = 1;
++ /* The size of the immediate value in this instruction. */
++ immed = 0;
++ /* The operand size. */
++ op_size = 4;
++ /* The address size */
++ addr_size = 4;
++ /* The total length of this instruction. */
++ insn_size = 0;
++ is_prefix = 1;
++ is_ret = 0;
++ is_condjmp = 0;
++ is_jmp = 0;
++ is_exit = 0;
++ seen_rexw = 0;
++ is_pcrel = 0;
++
++ while (is_prefix) {
++ op = ptr[insn_size];
++ insn_size = eat_bytes(name, flags, insn, insn_size, 1);
++ is_prefix = 0;
++ switch (op >> 4) {
++ case 0:
++ case 1:
++ case 2:
++ case 3:
++ if (op == 0x0f) {
++ /* two-byte opcode. */
++ op = ptr[insn_size];
++ insn_size = eat_bytes(name, flags, insn, insn_size, 1);
++ switch (op >> 4) {
++ case 0:
++ if ((op & 0xf) > 3)
++ modrm = 0;
++ break;
++ case 1: /* vector move or prefetch */
++ case 2: /* various moves and vector compares. */
++ case 4: /* cmov */
++ case 5: /* vector instructions */
++ case 6:
++ case 13:
++ case 14:
++ case 15:
++ break;
++ case 7: /* mmx */
++ if (op & 0x77) /* emms */
++ modrm = 0;
++ break;
++ case 3: /* wrmsr, rdtsc, rdmsr, rdpmc, sysenter, sysexit */
++ modrm = 0;
++ break;
++ case 8: /* long conditional jump */
++ is_condjmp = 1;
++ immed = op_size;
++ modrm = 0;
++ break;
++ case 9: /* setcc */
++ break;
++ case 10:
++ switch (op & 0x7) {
++ case 0: /* push fs/gs */
++ case 1: /* pop fs/gs */
++ case 2: /* cpuid/rsm */
++ modrm = 0;
++ break;
++ case 4: /* shld/shrd immediate */
++ immed = 1;
++ break;
++ default: /* Normal instructions with a ModR/M byte. */
++ break;
++ }
++ break;
++ case 11:
++ switch (op & 0xf) {
++ case 10: /* bt, bts, btr, btc */
++ immed = 1;
++ break;
++ default:
++ /* cmpxchg, lss, btr, lfs, lgs, movzx, btc, bsf, bsr
++ undefined, and movsx */
++ break;
++ }
++ break;
++ case 12:
++ if (op & 8) {
++ /* bswap */
++ modrm = 0;
++ } else {
++ switch (op & 0x7) {
++ case 2:
++ case 4:
++ case 5:
++ case 6:
++ immed = 1;
++ break;
++ default:
++ break;
++ }
++ }
++ break;
++ }
++ } else if ((op & 0x07) <= 0x3) {
++ /* General arithmentic ax. */
++ } else if ((op & 0x07) <= 0x5) {
++ /* General arithmetic ax, immediate. */
++ if (op & 0x01)
++ immed = op_size;
++ else
++ immed = 1;
++ modrm = 0;
++ } else if ((op & 0x23) == 0x22) {
++ /* Segment prefix. */
++ is_prefix = 1;
++ } else {
++ /* Segment register push/pop or DAA/AAA/DAS/AAS. */
++ modrm = 0;
++ }
++ break;
++
++#if defined(HOST_X86_64)
++ case 4: /* rex prefix. */
++ is_prefix = 1;
++ /* The address/operand size is actually 64-bit, but the immediate
++ values in the instruction are still 32-bit. */
++ op_size = 4;
++ addr_size = 4;
++ if (op & 8)
++ seen_rexw = 1;
++ break;
++#else
++ case 4: /* inc/dec register. */
++#endif
++ case 5: /* push/pop general register. */
++ modrm = 0;
++ break;
++
++ case 6:
++ switch (op & 0x0f) {
++ case 0: /* pusha */
++ case 1: /* popa */
++ modrm = 0;
++ break;
++ case 2: /* bound */
++ case 3: /* arpl */
++ break;
++ case 4: /* FS */
++ case 5: /* GS */
++ is_prefix = 1;
++ break;
++ case 6: /* opcode size prefix. */
++ op_size = 2;
++ is_prefix = 1;
++ break;
++ case 7: /* Address size prefix. */
++ addr_size = 2;
++ is_prefix = 1;
++ break;
++ case 8: /* push immediate */
++ immed = op_size;
++ modrm = 0;
++ break;
++ case 10: /* push 8-bit immediate */
++ immed = 1;
++ modrm = 0;
++ break;
++ case 9: /* imul immediate */
++ immed = op_size;
++ break;
++ case 11: /* imul 8-bit immediate */
++ immed = 1;
++ break;
++ case 12: /* insb */
++ case 13: /* insw */
++ case 14: /* outsb */
++ case 15: /* outsw */
++ modrm = 0;
++ break;
++ }
++ break;
++
++ case 7: /* Short conditional jump. */
++ is_condjmp = 1;
++ immed = 1;
++ modrm = 0;
++ break;
++
++ case 8:
++ if ((op & 0xf) <= 3) {
++ /* arithmetic immediate. */
++ if ((op & 3) == 1)
++ immed = op_size;
++ else
++ immed = 1;
++ }
++ /* else test, xchg, mov, lea or pop general. */
++ break;
++
++ case 9:
++ /* Various single-byte opcodes with no modrm byte. */
++ modrm = 0;
++ if (op == 10) {
++ /* Call */
++ immed = 4;
++ }
++ break;
++
++ case 10:
++ switch ((op & 0xe) >> 1) {
++ case 0: /* mov absoliute immediate. */
++ case 1:
++ if (seen_rexw)
++ immed = 8;
++ else
++ immed = addr_size;
++ break;
++ case 4: /* test immediate. */
++ if (op & 1)
++ immed = op_size;
++ else
++ immed = 1;
++ break;
++ default: /* Various string ops. */
++ break;
++ }
++ modrm = 0;
++ break;
++
++ case 11: /* move immediate to register */
++ if (op & 8) {
++ if (seen_rexw)
++ immed = 8;
++ else
++ immed = op_size;
++ } else {
++ immed = 1;
++ }
++ modrm = 0;
++ break;
++
++ case 12:
++ switch (op & 0xf) {
++ case 0: /* shift immediate */
++ case 1:
++ immed = 1;
++ break;
++ case 2: /* ret immediate */
++ immed = 2;
++ modrm = 0;
++ bad_opcode(name, op);
++ break;
++ case 3: /* ret */
++ modrm = 0;
++ is_ret = 1;
++ case 4: /* les */
++ case 5: /* lds */
++ break;
++ case 6: /* mov immediate byte */
++ immed = 1;
++ break;
++ case 7: /* mov immediate */
++ immed = op_size;
++ break;
++ case 8: /* enter */
++ /* TODO: Is this right? */
++ immed = 3;
++ modrm = 0;
++ break;
++ case 10: /* retf immediate */
++ immed = 2;
++ modrm = 0;
++ bad_opcode(name, op);
++ break;
++ case 13: /* int */
++ immed = 1;
++ modrm = 0;
++ break;
++ case 11: /* retf */
++ case 15: /* iret */
++ modrm = 0;
++ bad_opcode(name, op);
++ break;
++ default: /* leave, int3 or into */
++ modrm = 0;
++ break;
++ }
++ break;
++
++ case 13:
++ if ((op & 0xf) >= 8) {
++ /* Coprocessor escape. For our purposes this is just a normal
++ instruction with a ModR/M byte. */
++ } else if ((op & 0xf) >= 4) {
++ /* AAM, AAD or XLAT */
++ modrm = 0;
++ }
++ /* else shift instruction */
++ break;
++
++ case 14:
++ switch ((op & 0xc) >> 2) {
++ case 0: /* loop or jcxz */
++ is_condjmp = 1;
++ immed = 1;
++ break;
++ case 1: /* in/out immed */
++ immed = 1;
++ break;
++ case 2: /* call or jmp */
++ switch (op & 3) {
++ case 0: /* call */
++ immed = op_size;
++ break;
++ case 1: /* long jump */
++ immed = 4;
++ is_jmp = 1;
++ break;
++ case 2: /* far jmp */
++ bad_opcode(name, op);
++ break;
++ case 3: /* short jmp */
++ immed = 1;
++ is_jmp = 1;
++ break;
++ }
++ break;
++ case 3: /* in/out register */
++ break;
++ }
++ modrm = 0;
++ break;
++
++ case 15:
++ switch ((op & 0xe) >> 1) {
++ case 0:
++ case 1:
++ is_prefix = 1;
++ break;
++ case 2:
++ case 4:
++ case 5:
++ case 6:
++ modrm = 0;
++ /* Some privileged insns are used as markers. */
++ switch (op) {
++ case 0xf4: /* hlt: Exit translation block. */
++ is_exit = 1;
++ break;
++ case 0xfa: /* cli: Jump to label. */
++ is_exit = 1;
++ immed = 4;
++ break;
++ case 0xfb: /* sti: TB patch jump. */
++ /* Mark the insn for patching, but continue sscanning. */
++ flags[insn] |= FLAG_EXIT;
++ immed = 4;
++ break;
++ }
++ break;
++ case 3: /* unary grp3 */
++ if ((ptr[insn_size] & 0x38) == 0) {
++ if (op == 0xf7)
++ immed = op_size;
++ else
++ immed = 1; /* test immediate */
++ }
++ break;
++ case 7: /* inc/dec grp4/5 */
++ /* TODO: This includes indirect jumps. We should fail if we
++ encounter one of these. */
++ break;
++ }
++ break;
++ }
++ }
++
++ if (modrm) {
++ if (addr_size != 4)
++ error("16-bit addressing mode used in %s", name);
++
++ disp = 0;
++ modrm = ptr[insn_size];
++ insn_size = eat_bytes(name, flags, insn, insn_size, 1);
++ modrm &= 0xc7;
++ switch ((modrm & 0xc0) >> 6) {
++ case 0:
++ if (modrm == 5)
++ disp = 4;
++ break;
++ case 1:
++ disp = 1;
++ break;
++ case 2:
++ disp = 4;
++ break;
++ }
++ if ((modrm & 0xc0) != 0xc0 && (modrm & 0x7) == 4) {
++ /* SIB byte */
++ if (modrm == 4 && (ptr[insn_size] & 0x7) == 5) {
++ disp = 4;
++ is_pcrel = 1;
++ }
++ insn_size = eat_bytes(name, flags, insn, insn_size, 1);
++ }
++ insn_size = eat_bytes(name, flags, insn, insn_size, disp);
++ }
++ insn_size = eat_bytes(name, flags, insn, insn_size, immed);
++ if (is_condjmp || is_jmp) {
++ if (immed == 1) {
++ disp = (int8_t)*(ptr + insn_size - 1);
++ } else {
++ disp = (((int32_t)*(ptr + insn_size - 1)) << 24)
++ | (((int32_t)*(ptr + insn_size - 2)) << 16)
++ | (((int32_t)*(ptr + insn_size - 3)) << 8)
++ | *(ptr + insn_size - 4);
++ }
++ disp += insn_size;
++ /* Jumps to external symbols point to the address of the offset
++ before relocation. */
++ /* ??? These are probably a tailcall. We could fix them up by
++ replacing them with jmp to EOB + call, but it's easier to just
++ prevent the compiler generating them. */
++ if (disp == 1)
++ error("Unconditional jump (sibcall?) in %s", name);
++ disp += insn;
++ if (disp < 0 || disp > len)
++ error("Jump outside instruction in %s", name);
++
++ if ((flags[disp] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_SCANNED)
++ error("Overlapping instructions in %s", name);
++
++ flags[disp] |= (FLAG_INSN | FLAG_TARGET);
++ is_pcrel = 1;
++ }
++ if (is_pcrel) {
++ /* Mark the following insn as a jump target. This will stop
++ this instruction being moved. */
++ flags[insn + insn_size] |= FLAG_TARGET;
++ }
++ if (is_ret)
++ flags[insn] |= FLAG_RET;
++
++ if (is_exit)
++ flags[insn] |= FLAG_EXIT;
++
++ if (!(is_jmp || is_ret || is_exit))
++ flags[insn + insn_size] |= FLAG_INSN;
++}
++
++/* Scan a function body. Returns the position of the return sequence.
++ Sets *patch_bytes to the number of bytes that need to be copied from that
++ location. If no patching is required (ie. the return is the last insn)
++ *patch_bytes will be set to -1. *plen is the number of code bytes to copy.
++ */
++static int trace_i386_op(const char * name, uint8_t *start_p, int *plen,
++ int *patch_bytes, int *exit_addrs)
++{
++ char *flags;
++ int more;
++ int insn;
++ int retpos;
++ int bytes;
++ int num_exits;
++ int len;
++ int last_insn;
++
++ len = *plen;
++ flags = malloc(len + 1);
++ memset(flags, 0, len + 1);
++ flags[0] |= FLAG_INSN;
++ more = 1;
++ while (more) {
++ more = 0;
++ for (insn = 0; insn < len; insn++) {
++ if ((flags[insn] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_INSN) {
++ trace_i386_insn(name, start_p, flags, insn, len);
++ more = 1;
++ }
++ }
++ }
++
++ /* Strip any unused code at the end of the function. */
++ while (len > 0 && flags[len - 1] == 0)
++ len--;
++
++ retpos = -1;
++ num_exits = 0;
++ last_insn = 0;
++ for (insn = 0; insn < len; insn++) {
++ if (flags[insn] & FLAG_RET) {
++ /* ??? In theory it should be possible to handle multiple return
++ points. In practice it's not worth the effort. */
++ if (retpos != -1)
++ error("Multiple return instructions in %s", name);
++ retpos = insn;
++ }
++ if (flags[insn] & FLAG_EXIT) {
++ if (num_exits == MAX_EXITS)
++ error("Too many block exits in %s", name);
++ exit_addrs[num_exits] = insn;
++ num_exits++;
++ }
++ if (flags[insn] & FLAG_INSN)
++ last_insn = insn;
++ }
++
++ exit_addrs[num_exits] = -1;
++ if (retpos == -1) {
++ if (num_exits == 0) {
++ error ("No return instruction found in %s", name);
++ } else {
++ retpos = len;
++ last_insn = len;
++ }
++ }
++
++ /* If the return instruction is the last instruction we can just
++ remove it. */
++ if (retpos == last_insn)
++ *patch_bytes = -1;
++ else
++ *patch_bytes = 0;
++
++ /* Back up over any nop instructions. */
++ while (retpos > 0
++ && (flags[retpos] & FLAG_TARGET) == 0
++ && (flags[retpos - 1] & FLAG_INSN) != 0
++ && start_p[retpos - 1] == 0x90) {
++ retpos--;
++ }
++
++ if (*patch_bytes == -1) {
++ *plen = retpos;
++ free (flags);
++ return retpos;
++ }
++ *plen = len;
++
++ /* The ret is in the middle of the function. Find four more bytes that
++ so the ret can be replaced by a jmp. */
++ /* ??? Use a short jump where possible. */
++ bytes = 4;
++ insn = retpos + 1;
++ /* We can clobber everything up to the next jump target. */
++ while (insn < len && bytes > 0 && (flags[insn] & FLAG_TARGET) == 0) {
++ insn++;
++ bytes--;
++ }
++ if (bytes > 0) {
++ /* ???: Strip out nop blocks. */
++ /* We can't do the replacement without clobbering anything important.
++ Copy preceeding instructions(s) to give us some space. */
++ while (retpos > 0) {
++ /* If this byte is the target of a jmp we can't move it. */
++ if (flags[retpos] & FLAG_TARGET)
++ break;
++
++ (*patch_bytes)++;
++ bytes--;
++ retpos--;
++
++ /* Break out of the loop if we have enough space and this is either
++ the first byte of an instruction or a pad byte. */
++ if ((flags[retpos] & (FLAG_INSN | FLAG_SCANNED)) != FLAG_SCANNED
++ && bytes <= 0) {
++ break;
++ }
++ }
++ }
++
++ if (bytes > 0)
++ error("Unable to replace ret with jmp in %s\n", name);
++
++ free(flags);
++ return retpos;
++}
++
++#endif
++
+ #define MAX_ARGS 3
+
+ /* generate op code */
+@@ -1442,6 +2082,11 @@
+ uint8_t args_present[MAX_ARGS];
+ const char *sym_name, *p;
+ EXE_RELOC *rel;
++#if defined(HOST_I386) || defined(HOST_X86_64)
++ int patch_bytes;
++ int retpos;
++ int exit_addrs[MAX_EXITS];
++#endif
+
+ /* Compute exact size excluding prologue and epilogue instructions.
+ * Increment start_offset to skip epilogue instructions, then compute
+@@ -1452,33 +2097,12 @@
+ p_end = p_start + size;
+ start_offset = offset;
+ #if defined(HOST_I386) || defined(HOST_X86_64)
+-#ifdef CONFIG_FORMAT_COFF
+- {
+- uint8_t *p;
+- p = p_end - 1;
+- if (p == p_start)
+- error("empty code for %s", name);
+- while (*p != 0xc3) {
+- p--;
+- if (p <= p_start)
+- error("ret or jmp expected at the end of %s", name);
+- }
+- copy_size = p - p_start;
+- }
+-#else
+ {
+ int len;
+ len = p_end - p_start;
+- if (len == 0)
+- error("empty code for %s", name);
+- if (p_end[-1] == 0xc3) {
+- len--;
+- } else {
+- error("ret or jmp expected at the end of %s", name);
+- }
++ retpos = trace_i386_op(name, p_start, &len, &patch_bytes, exit_addrs);
+ copy_size = len;
+ }
+-#endif
+ #elif defined(HOST_PPC)
+ {
+ uint8_t *p;
+@@ -1710,6 +2334,13 @@
+ }
+
+ if (gen_switch == 2) {
++#if defined(HOST_I386) || defined(HOST_X86_64)
++ if (patch_bytes != -1)
++ copy_size += patch_bytes;
++#ifdef DEBUG_OP
++ copy_size += 2;
++#endif
++#endif
+ fprintf(outfile, "DEF(%s, %d, %d)\n", name + 3, nb_args, copy_size);
+ } else if (gen_switch == 1) {
+
+@@ -1915,7 +2546,43 @@
+ #error unsupport object format
+ #endif
+ }
++ }
++ /* Replace the marker instructions with the actual opcodes. */
++ for (i = 0; exit_addrs[i] != -1; i++) {
++ int op;
++ switch (p_start[exit_addrs[i]])
++ {
++ case 0xf4: op = 0xc3; break; /* hlt -> ret */
++ case 0xfa: op = 0xe9; break; /* cli -> jmp */
++ case 0xfb: op = 0xe9; break; /* sti -> jmp */
++ default: error("Internal error");
++ }
++ fprintf(outfile,
++ " *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
++ exit_addrs[i], op);
+ }
++ /* Fix up the return instruction. */
++ if (patch_bytes != -1) {
++ if (patch_bytes) {
++ fprintf(outfile, " memcpy(gen_code_ptr + %d,"
++ "gen_code_ptr + %d, %d);\n",
++ copy_size, retpos, patch_bytes);
++ }
++ fprintf(outfile,
++ " *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
++ retpos);
++ fprintf(outfile,
++ " *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
++ retpos + 1, copy_size - (retpos + 5));
++
++ copy_size += patch_bytes;
++ }
++#ifdef DEBUG_OP
++ fprintf(outfile,
++ " *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
++ copy_size);
++ copy_size += 2;
++#endif
+ }
+ #elif defined(HOST_X86_64)
+ {
+@@ -1949,6 +2616,42 @@
+ }
+ }
+ }
++ /* Replace the marker instructions with the actual opcodes. */
++ for (i = 0; exit_addrs[i] != -1; i++) {
++ int op;
++ switch (p_start[exit_addrs[i]])
++ {
++ case 0xf4: op = 0xc3; break; /* hlt -> ret */
++ case 0xfa: op = 0xe9; break; /* cli -> jmp */
++ case 0xfb: op = 0xe9; break; /* sti -> jmp */
++ default: error("Internal error");
++ }
++ fprintf(outfile,
++ " *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
++ exit_addrs[i], op);
++ }
++ /* Fix up the return instruction. */
++ if (patch_bytes != -1) {
++ if (patch_bytes) {
++ fprintf(outfile, " memcpy(gen_code_ptr + %d,"
++ "gen_code_ptr + %d, %d);\n",
++ copy_size, retpos, patch_bytes);
++ }
++ fprintf(outfile,
++ " *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
++ retpos);
++ fprintf(outfile,
++ " *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
++ retpos + 1, copy_size - (retpos + 5));
++
++ copy_size += patch_bytes;
++ }
++#ifdef DEBUG_OP
++ fprintf(outfile,
++ " *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
++ copy_size);
++ copy_size += 2;
++#endif
+ }
+ #elif defined(HOST_PPC)
+ {
+diff -Naru qemu-neo1973.orig/dyngen-exec.h qemu-neo1973/dyngen-exec.h
+--- qemu-neo1973.orig/dyngen-exec.h 2007-06-24 13:56:38.000000000 +0200
++++ qemu-neo1973/dyngen-exec.h 2007-06-24 14:35:52.000000000 +0200
+@@ -194,7 +194,12 @@
+ #endif
+
+ /* force GCC to generate only one epilog at the end of the function */
++#if defined(__i386__) || defined(__x86_64__)
++/* Also add 4 bytes of padding so that we can replace the ret with a jmp. */
++#define FORCE_RET() asm volatile ("nop;nop;nop;nop");
++#else
+ #define FORCE_RET() __asm__ __volatile__("" : : : "memory");
++#endif
+
+ #ifndef OPPROTO
+ #define OPPROTO
+@@ -244,11 +249,18 @@
+ #endif
+
+ #if defined(__i386__)
+-#define EXIT_TB() asm volatile ("ret")
+-#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
++/* Dyngen will replace hlt instructions with a ret instruction. Inserting a
++ ret directly would confuse dyngen. */
++#define EXIT_TB() asm volatile ("hlt")
++/* Dyngen will replace cli with 0x9e (jmp).
++ We generate the offset manually. */
++#define GOTO_LABEL_PARAM(n) \
++ asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
+ #elif defined(__x86_64__)
+-#define EXIT_TB() asm volatile ("ret")
+-#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
++/* The same as i386. */
++#define EXIT_TB() asm volatile ("hlt")
++#define GOTO_LABEL_PARAM(n) \
++ asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
+ #elif defined(__powerpc__)
+ #define EXIT_TB() asm volatile ("blr")
+ #define GOTO_LABEL_PARAM(n) asm volatile ("b " ASM_NAME(__op_gen_label) #n)
+diff -Naru qemu-neo1973.orig/exec-all.h qemu-neo1973/exec-all.h
+--- qemu-neo1973.orig/exec-all.h 2007-06-24 14:31:58.000000000 +0200
++++ qemu-neo1973/exec-all.h 2007-06-24 14:33:11.000000000 +0200
+@@ -329,14 +329,15 @@
+
+ #elif defined(__i386__) && defined(USE_DIRECT_JUMP)
+
+-/* we patch the jump instruction directly */
++/* we patch the jump instruction directly. Use sti in place of the actual
++ jmp instruction so that dyngen can patch in the correct result. */
+ #define GOTO_TB(opname, tbparam, n)\
+ do {\
+ asm volatile (".section .data\n"\
+ ASM_OP_LABEL_NAME(n, opname) ":\n"\
+ ".long 1f\n"\
+ ASM_PREVIOUS_SECTION \
+- "jmp " ASM_NAME(__op_jmp) #n "\n"\
++ "sti;.long " ASM_NAME(__op_jmp) #n " - 1f\n"\
+ "1:\n");\
+ } while (0)
+
+diff -Naru qemu-neo1973.orig/target-ppc/exec.h qemu-neo1973/target-ppc/exec.h
+--- qemu-neo1973.orig/target-ppc/exec.h 2007-06-24 13:56:32.000000000 +0200
++++ qemu-neo1973/target-ppc/exec.h 2007-06-24 14:33:11.000000000 +0200
+@@ -66,11 +66,7 @@
+ #define FT1 (env->ft1)
+ #define FT2 (env->ft2)
+
+-#if defined (DEBUG_OP)
+-# define RETURN() __asm__ __volatile__("nop" : : : "memory");
+-#else
+-# define RETURN() __asm__ __volatile__("" : : : "memory");
+-#endif
++#define RETURN() FORCE_RET()
+
+ static inline target_ulong rotl8 (target_ulong i, int n)
+ {