diff -Naru qemu-neo1973.orig/dyngen.c qemu-neo1973/dyngen.c
--- qemu-neo1973.orig/dyngen.c	2007-06-24 13:56:38.000000000 +0200
+++ qemu-neo1973/dyngen.c	2007-06-24 14:33:11.000000000 +0200
@@ -32,6 +32,8 @@
 
 #include "config-host.h"
 
+//#define DEBUG_OP
+
 /* NOTE: we test CONFIG_WIN32 instead of _WIN32 to enabled cross
    compilation */
 #if defined(CONFIG_WIN32)
@@ -1429,6 +1431,644 @@
 #endif
 
 
+#if defined(HOST_I386) || defined(HOST_X86_64)
+
+/* This byte is the first byte of an instruction.  */
+#define FLAG_INSN     (1 << 0)
+/* This byte has been processed as part of an instruction.  */
+#define FLAG_SCANNED  (1 << 1)
+/* This instruction is a return instruction.  Gcc cometimes generates prefix
+   bytes, so may be more than one byte long.  */
+#define FLAG_RET      (1 << 2)
+/* This is either the target of a jump, or the preceeding instruction uses
+   a pc-relative offset.  */
+#define FLAG_TARGET   (1 << 3)
+/* This is a magic instruction that needs fixing up.  */
+#define FLAG_EXIT     (1 << 4)
+#define MAX_EXITS     5
+
+static void
+bad_opcode(const char *name, uint32_t op)
+{
+    error("Unsupported opcode %0*x in %s", (op > 0xff) ? 4 : 2, op, name);
+}
+
+/* Mark len bytes as scanned,  Returns insn_size + len.  Reports an error
+   if these bytes have already been scanned.  */
+static int
+eat_bytes(const char *name, char *flags, int insn, int insn_size, int len)
+{
+    while (len > 0) {
+        /* This should never occur in sane code.  */
+        if (flags[insn + insn_size] & FLAG_SCANNED)
+            error ("Overlapping instructions in %s", name);
+        flags[insn + insn_size] |= FLAG_SCANNED;
+        insn_size++;
+        len--;
+    }
+    return insn_size;
+}
+
+static void
+trace_i386_insn (const char *name, uint8_t *start_p, char *flags, int insn,
+                 int len)
+{
+    uint8_t *ptr;
+    uint8_t op;
+    int modrm;
+    int is_prefix;
+    int op_size;
+    int addr_size;
+    int insn_size;
+    int is_ret;
+    int is_condjmp;
+    int is_jmp;
+    int is_exit;
+    int is_pcrel;
+    int immed;
+    int seen_rexw;
+    int32_t disp;
+
+    ptr = start_p + insn;
+    /* nonzero if this insn has a ModR/M byte.  */
+    modrm = 1;
+    /* The size of the immediate value in this instruction.  */
+    immed = 0;
+    /* The operand size.  */
+    op_size = 4;
+    /* The address size */
+    addr_size = 4;
+    /* The total length of this instruction.  */
+    insn_size = 0;
+    is_prefix = 1;
+    is_ret = 0;
+    is_condjmp = 0;
+    is_jmp = 0;
+    is_exit = 0;
+    seen_rexw = 0;
+    is_pcrel = 0;
+
+    while (is_prefix) {
+        op = ptr[insn_size];
+        insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+        is_prefix = 0;
+        switch (op >> 4) {
+        case 0:
+        case 1:
+        case 2:
+        case 3:
+            if (op == 0x0f) {
+                /* two-byte opcode.  */
+                op = ptr[insn_size];
+                insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+                switch (op >> 4) {
+                case 0:
+                    if ((op & 0xf) > 3)
+                      modrm = 0;
+                    break;
+                case 1: /* vector move or prefetch */
+                case 2: /* various moves and vector compares.  */
+                case 4: /* cmov */
+                case 5: /* vector instructions */
+                case 6:
+                case 13:
+                case 14:
+                case 15:
+                    break;
+                case 7: /* mmx */
+                    if (op & 0x77) /* emms */
+                      modrm = 0;
+                    break;
+                case 3: /* wrmsr, rdtsc, rdmsr, rdpmc, sysenter, sysexit */
+                    modrm = 0;
+                    break;
+                case 8: /* long conditional jump */
+                    is_condjmp = 1;
+                    immed = op_size;
+                    modrm = 0;
+                    break;
+                case 9: /* setcc */
+                    break;
+                case 10:
+                    switch (op & 0x7) {
+                    case 0: /* push fs/gs */
+                    case 1: /* pop fs/gs */
+                    case 2: /* cpuid/rsm */
+                        modrm = 0;
+                        break;
+                    case 4: /* shld/shrd immediate */
+                        immed = 1;
+                        break;
+                    default: /* Normal instructions with a ModR/M byte.  */
+                        break;
+                    }
+                    break;
+                case 11:
+                    switch (op & 0xf) {
+                    case 10: /* bt, bts, btr, btc */
+                        immed = 1;
+                        break;
+                    default:
+                        /* cmpxchg, lss, btr, lfs, lgs, movzx, btc, bsf, bsr
+                           undefined, and movsx */
+                        break;
+                    }
+                    break;
+                case 12:
+                    if (op & 8) {
+                        /* bswap */
+                        modrm = 0;
+                    } else {
+                        switch (op & 0x7) {
+                        case 2:
+                        case 4:
+                        case 5:
+                        case 6:
+                            immed = 1;
+                            break;
+                        default:
+                            break;
+                        }
+                    }
+                    break;
+                }
+            } else if ((op & 0x07) <= 0x3) {
+                /* General arithmentic ax.  */
+            } else if ((op & 0x07) <= 0x5) {
+                /* General arithmetic ax, immediate.  */
+                if (op & 0x01)
+                    immed = op_size;
+                else
+                    immed = 1;
+                modrm = 0;
+            } else if ((op & 0x23) == 0x22) {
+                /* Segment prefix.  */
+                is_prefix = 1;
+            } else {
+                /* Segment register push/pop or DAA/AAA/DAS/AAS.  */
+                modrm = 0;
+            }
+            break;
+
+#if defined(HOST_X86_64)
+        case 4: /* rex prefix.  */
+            is_prefix = 1;
+            /* The address/operand size is actually 64-bit, but the immediate
+               values in the instruction are still 32-bit.  */
+            op_size = 4;
+            addr_size = 4;
+            if (op & 8)
+                seen_rexw = 1;
+            break;
+#else
+        case 4: /* inc/dec register.  */
+#endif
+        case 5: /* push/pop general register.  */
+            modrm = 0;
+            break;
+
+        case 6:
+            switch (op & 0x0f) {
+            case 0: /* pusha */
+            case 1: /* popa */
+                modrm = 0;
+                break;
+            case 2: /* bound */
+            case 3: /* arpl */
+                break;
+            case 4: /* FS */
+            case 5: /* GS */
+                is_prefix = 1;
+                break;
+            case 6: /* opcode size prefix.  */
+                op_size = 2;
+                is_prefix = 1;
+                break;
+            case 7: /* Address size prefix.  */
+                addr_size = 2;
+                is_prefix = 1;
+                break;
+            case 8: /* push immediate */
+                immed = op_size;
+                modrm = 0;
+                break;
+            case 10: /* push 8-bit immediate */
+                immed = 1;
+                modrm = 0;
+                break;
+            case 9: /* imul immediate */
+                immed = op_size;
+                break;
+            case 11: /* imul 8-bit immediate */
+                immed = 1;
+                break;
+            case 12: /* insb */
+            case 13: /* insw */
+            case 14: /* outsb */
+            case 15: /* outsw */
+                modrm = 0;
+                break;
+            }
+            break;
+
+        case 7: /* Short conditional jump.  */
+            is_condjmp = 1;
+            immed = 1;
+            modrm = 0;
+            break;
+          
+        case 8:
+            if ((op & 0xf) <= 3) {
+                /* arithmetic immediate.  */
+                if ((op & 3) == 1)
+                    immed = op_size;
+                else
+                    immed = 1;
+            }
+            /* else test, xchg, mov, lea or pop general.  */
+            break;
+
+        case 9:
+            /* Various single-byte opcodes with no modrm byte.  */
+            modrm = 0;
+            if (op == 10) {
+                /* Call */
+                immed = 4;
+            }
+            break;
+
+        case 10:
+            switch ((op & 0xe) >> 1) {
+            case 0: /* mov absoliute immediate.  */
+            case 1:
+                if (seen_rexw)
+                    immed = 8;
+                else
+                    immed = addr_size;
+                break;
+            case 4: /* test immediate.  */
+                if (op & 1)
+                    immed = op_size;
+                else
+                    immed = 1;
+                break;
+            default: /* Various string ops.  */
+                break;
+            }
+            modrm = 0;
+            break;
+
+        case 11: /* move immediate to register */
+            if (op & 8) {
+                if (seen_rexw)
+                    immed = 8;
+                else
+                    immed = op_size;
+            } else {
+                immed = 1;
+            }
+            modrm = 0;
+            break;
+
+          case 12:
+            switch (op & 0xf) {
+            case 0: /* shift immediate */
+            case 1:
+                immed = 1;
+                break;
+            case 2: /* ret immediate */
+                immed = 2;
+                modrm = 0;
+                bad_opcode(name, op);
+                break;
+            case 3: /* ret */
+                modrm = 0;
+                is_ret = 1;
+            case 4: /* les */
+            case 5: /* lds */
+                break;
+            case 6: /* mov immediate byte */
+                immed = 1;
+                break;
+            case 7: /* mov immediate */
+                immed = op_size;
+                break;
+            case 8: /* enter */
+                /* TODO: Is this right?  */
+                immed = 3;
+                modrm = 0;
+                break;
+            case 10: /* retf immediate */
+                immed = 2;
+                modrm = 0;
+                bad_opcode(name, op);
+                break;
+            case 13: /* int */
+                immed = 1;
+                modrm = 0;
+                break;
+            case 11: /* retf */
+            case 15: /* iret */
+                modrm = 0;
+                bad_opcode(name, op);
+                break;
+            default: /* leave, int3 or into */
+                modrm = 0;
+                break;
+            }
+            break;
+
+        case 13:
+            if ((op & 0xf) >= 8) {
+                /* Coprocessor escape.  For our purposes this is just a normal
+                   instruction with a ModR/M byte.  */
+            } else if ((op & 0xf) >= 4) {
+                /* AAM, AAD or XLAT */
+                modrm = 0;
+            }
+            /* else shift instruction */
+            break;
+
+        case 14:
+            switch ((op & 0xc) >> 2) {
+            case 0: /* loop or jcxz */
+                is_condjmp = 1;
+                immed = 1;
+                break;
+            case 1: /* in/out immed */
+                immed = 1;
+                break;
+            case 2: /* call or jmp */
+                switch (op & 3) {
+                case 0: /* call */
+                    immed = op_size;
+                    break;
+                case 1: /* long jump */
+                    immed = 4;
+                    is_jmp = 1;
+                    break;
+                case 2: /* far jmp */
+                    bad_opcode(name, op);
+                    break;
+                case 3: /* short jmp */
+                    immed = 1;
+                    is_jmp = 1;
+                    break;
+                }
+                break;
+            case 3: /* in/out register */
+                break;
+            }
+            modrm = 0;
+            break;
+
+        case 15:
+            switch ((op & 0xe) >> 1) {
+            case 0:
+            case 1:
+                is_prefix = 1;
+                break;
+            case 2:
+            case 4:
+            case 5:
+            case 6:
+                modrm = 0;
+                /* Some privileged insns are used as markers.  */
+                switch (op) {
+                case 0xf4: /* hlt: Exit translation block.  */
+                    is_exit = 1;
+                    break;
+                case 0xfa: /* cli: Jump to label.  */
+                    is_exit = 1;
+                    immed = 4;
+                    break;
+                case 0xfb: /* sti: TB patch jump.  */
+                    /* Mark the insn for patching, but continue sscanning.  */
+                    flags[insn] |= FLAG_EXIT;
+                    immed = 4;
+                    break;
+                }
+                break;
+            case 3: /* unary grp3 */
+                if ((ptr[insn_size] & 0x38) == 0) {
+                    if (op == 0xf7)
+                        immed = op_size;
+                    else
+                        immed = 1; /* test immediate */
+                }
+                break;
+            case 7: /* inc/dec grp4/5 */
+                /* TODO: This includes indirect jumps.  We should fail if we
+                   encounter one of these. */
+                break;
+            }
+            break;
+        }
+    }
+
+    if (modrm) {
+        if (addr_size != 4)
+            error("16-bit addressing mode used in %s", name);
+
+        disp = 0;
+        modrm = ptr[insn_size];
+        insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+        modrm &= 0xc7;
+        switch ((modrm & 0xc0) >> 6) {
+        case 0:
+            if (modrm == 5)
+              disp = 4;
+            break;
+        case 1:
+            disp = 1;
+            break;
+        case 2:
+            disp = 4;
+            break;
+        }
+        if ((modrm & 0xc0) != 0xc0 && (modrm & 0x7) == 4) {
+            /* SIB byte */
+            if (modrm == 4 && (ptr[insn_size] & 0x7) == 5) {
+                disp = 4;
+                is_pcrel = 1;
+            }
+            insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+        }
+        insn_size = eat_bytes(name, flags, insn, insn_size, disp);
+    }
+    insn_size = eat_bytes(name, flags, insn, insn_size, immed);
+    if (is_condjmp || is_jmp) {
+        if (immed == 1) {
+            disp = (int8_t)*(ptr + insn_size - 1);
+        } else {
+            disp = (((int32_t)*(ptr + insn_size - 1)) << 24)
+                   | (((int32_t)*(ptr + insn_size - 2)) << 16)
+                   | (((int32_t)*(ptr + insn_size - 3)) << 8)
+                   | *(ptr + insn_size - 4);
+        }
+        disp += insn_size;
+        /* Jumps to external symbols point to the address of the offset
+           before relocation.  */
+        /* ??? These are probably a tailcall.  We could fix them up by
+           replacing them with jmp to EOB + call, but it's easier to just
+           prevent the compiler generating them.  */
+        if (disp == 1)
+            error("Unconditional jump (sibcall?) in %s", name);
+        disp += insn;
+        if (disp < 0 || disp > len)
+            error("Jump outside instruction in %s", name);
+
+        if ((flags[disp] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_SCANNED)
+            error("Overlapping instructions in %s", name);
+
+        flags[disp] |= (FLAG_INSN | FLAG_TARGET);
+        is_pcrel = 1; 
+    }
+    if (is_pcrel) {
+        /* Mark the following insn as a jump target.  This will stop
+           this instruction being moved.  */
+        flags[insn + insn_size] |= FLAG_TARGET;
+    }
+    if (is_ret)
+      flags[insn] |= FLAG_RET;
+
+    if (is_exit)
+      flags[insn] |= FLAG_EXIT;
+
+    if (!(is_jmp || is_ret || is_exit))
+      flags[insn + insn_size] |= FLAG_INSN;
+}
+
+/* Scan a function body.  Returns the position of the return sequence.
+   Sets *patch_bytes to the number of bytes that need to be copied from that
+   location.  If no patching is required (ie. the return is the last insn)
+   *patch_bytes will be set to -1.  *plen is the number of code bytes to copy.
+ */
+static int trace_i386_op(const char * name, uint8_t *start_p, int *plen,
+                         int *patch_bytes, int *exit_addrs)
+{
+    char *flags;
+    int more;
+    int insn;
+    int retpos;
+    int bytes;
+    int num_exits;
+    int len;
+    int last_insn;
+
+    len = *plen;
+    flags = malloc(len + 1);
+    memset(flags, 0, len + 1);
+    flags[0] |= FLAG_INSN;
+    more = 1;
+    while (more) {
+        more = 0;
+        for (insn = 0; insn < len; insn++) {
+            if ((flags[insn] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_INSN) {
+                trace_i386_insn(name, start_p, flags, insn, len);
+                more = 1;
+            }
+        }
+    }
+
+    /* Strip any unused code at the end of the function.  */
+    while (len > 0 && flags[len - 1] == 0)
+      len--;
+
+    retpos = -1;
+    num_exits = 0;
+    last_insn = 0;
+    for (insn = 0; insn < len; insn++) {
+        if (flags[insn] & FLAG_RET) {
+            /* ??? In theory it should be possible to handle multiple return
+               points.  In practice it's not worth the effort.  */
+            if (retpos != -1)
+                error("Multiple return instructions in %s", name);
+            retpos = insn;
+        }
+        if (flags[insn] & FLAG_EXIT) {
+            if (num_exits == MAX_EXITS)
+                error("Too many block exits in %s", name);
+            exit_addrs[num_exits] = insn;
+            num_exits++;
+        }
+        if (flags[insn] & FLAG_INSN)
+            last_insn = insn;
+    }
+
+    exit_addrs[num_exits] = -1;
+    if (retpos == -1) {
+        if (num_exits == 0) {
+            error ("No return instruction found in %s", name);
+        } else {
+            retpos = len;
+            last_insn = len;
+        }
+    }
+    
+    /* If the return instruction is the last instruction we can just 
+       remove it.  */
+    if (retpos == last_insn)
+        *patch_bytes = -1;
+    else
+        *patch_bytes = 0;
+
+    /* Back up over any nop instructions.  */
+    while (retpos > 0
+           && (flags[retpos] & FLAG_TARGET) == 0
+           && (flags[retpos - 1] & FLAG_INSN) != 0
+           && start_p[retpos - 1] == 0x90) {
+        retpos--;
+    }
+
+    if (*patch_bytes == -1) {
+        *plen = retpos;
+        free (flags);
+        return retpos;
+    }
+    *plen = len;
+
+    /* The ret is in the middle of the function.  Find four more bytes that
+       so the ret can be replaced by a jmp. */
+    /* ??? Use a short jump where possible. */
+    bytes = 4;
+    insn = retpos + 1;
+    /* We can clobber everything up to the next jump target.  */
+    while (insn < len && bytes > 0 && (flags[insn] & FLAG_TARGET) == 0) {
+        insn++;
+        bytes--;
+    }
+    if (bytes > 0) {
+        /* ???: Strip out nop blocks.  */
+        /* We can't do the replacement without clobbering anything important.
+           Copy preceeding instructions(s) to give us some space.  */
+        while (retpos > 0) {
+            /* If this byte is the target of a jmp we can't move it.  */
+            if (flags[retpos] & FLAG_TARGET)
+                break;
+
+            (*patch_bytes)++;
+            bytes--;
+            retpos--;
+
+            /* Break out of the loop if we have enough space and this is either 
+               the first byte of an instruction or a pad byte.  */
+            if ((flags[retpos] & (FLAG_INSN | FLAG_SCANNED)) != FLAG_SCANNED
+                && bytes <= 0) {
+                break;
+            }
+        }
+    }
+
+    if (bytes > 0)
+        error("Unable to replace ret with jmp in %s\n", name);
+
+    free(flags);
+    return retpos;
+}
+
+#endif
+
 #define MAX_ARGS 3
 
 /* generate op code */
@@ -1442,6 +2082,11 @@
     uint8_t args_present[MAX_ARGS];
     const char *sym_name, *p;
     EXE_RELOC *rel;
+#if defined(HOST_I386) || defined(HOST_X86_64)
+    int patch_bytes;
+    int retpos;
+    int exit_addrs[MAX_EXITS];
+#endif
 
     /* Compute exact size excluding prologue and epilogue instructions.
      * Increment start_offset to skip epilogue instructions, then compute
@@ -1452,33 +2097,12 @@
     p_end = p_start + size;
     start_offset = offset;
 #if defined(HOST_I386) || defined(HOST_X86_64)
-#ifdef CONFIG_FORMAT_COFF
-    {
-        uint8_t *p;
-        p = p_end - 1;
-        if (p == p_start)
-            error("empty code for %s", name);
-        while (*p != 0xc3) {
-            p--;
-            if (p <= p_start)
-                error("ret or jmp expected at the end of %s", name);
-        }
-        copy_size = p - p_start;
-    }
-#else
     {
         int len;
         len = p_end - p_start;
-        if (len == 0)
-            error("empty code for %s", name);
-        if (p_end[-1] == 0xc3) {
-            len--;
-        } else {
-            error("ret or jmp expected at the end of %s", name);
-        }
+        retpos = trace_i386_op(name, p_start, &len, &patch_bytes, exit_addrs);
         copy_size = len;
     }
-#endif    
 #elif defined(HOST_PPC)
     {
         uint8_t *p;
@@ -1710,6 +2334,13 @@
     }
 
     if (gen_switch == 2) {
+#if defined(HOST_I386) || defined(HOST_X86_64)
+        if (patch_bytes != -1)
+            copy_size += patch_bytes;
+#ifdef DEBUG_OP
+        copy_size += 2;
+#endif
+#endif
         fprintf(outfile, "DEF(%s, %d, %d)\n", name + 3, nb_args, copy_size);
     } else if (gen_switch == 1) {
 
@@ -1915,7 +2546,43 @@
 #error unsupport object format
 #endif
                 }
+               }
+                /* Replace the marker instructions with the actual opcodes.  */
+                for (i = 0; exit_addrs[i] != -1; i++) {
+                    int op;
+                    switch (p_start[exit_addrs[i]])
+                      {
+                      case 0xf4: op = 0xc3; break; /* hlt -> ret */
+                      case 0xfa: op = 0xe9; break; /* cli -> jmp */
+                      case 0xfb: op = 0xe9; break; /* sti -> jmp */
+                      default: error("Internal error");
+                      }
+                    fprintf(outfile, 
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            exit_addrs[i], op);
                 }
+                /* Fix up the return instruction.  */
+                if (patch_bytes != -1) {
+                    if (patch_bytes) {
+                        fprintf(outfile, "    memcpy(gen_code_ptr + %d,"
+                                "gen_code_ptr + %d, %d);\n",
+                                copy_size, retpos, patch_bytes);
+                    }
+                    fprintf(outfile,
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
+                            retpos);
+                    fprintf(outfile,
+                            "    *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            retpos + 1, copy_size - (retpos + 5));
+                    
+                    copy_size += patch_bytes;
+                }
+#ifdef DEBUG_OP
+                fprintf(outfile,
+                        "    *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
+                        copy_size);
+                copy_size += 2;
+#endif
             }
 #elif defined(HOST_X86_64)
             {
@@ -1949,6 +2616,42 @@
                     }
                 }
                 }
+                /* Replace the marker instructions with the actual opcodes.  */
+                for (i = 0; exit_addrs[i] != -1; i++) {
+                    int op;
+                    switch (p_start[exit_addrs[i]])
+                      {
+                      case 0xf4: op = 0xc3; break; /* hlt -> ret */
+                      case 0xfa: op = 0xe9; break; /* cli -> jmp */
+                      case 0xfb: op = 0xe9; break; /* sti -> jmp */
+                      default: error("Internal error");
+                      }
+                    fprintf(outfile, 
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            exit_addrs[i], op);
+                }
+                /* Fix up the return instruction.  */
+                if (patch_bytes != -1) {
+                    if (patch_bytes) {
+                        fprintf(outfile, "    memcpy(gen_code_ptr + %d,"
+                                "gen_code_ptr + %d, %d);\n",
+                                copy_size, retpos, patch_bytes);
+                    }
+                    fprintf(outfile,
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
+                            retpos);
+                    fprintf(outfile,
+                            "    *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            retpos + 1, copy_size - (retpos + 5));
+                    
+                    copy_size += patch_bytes;
+                }
+#ifdef DEBUG_OP
+                fprintf(outfile,
+                        "    *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
+                        copy_size);
+                copy_size += 2;
+#endif
             }
 #elif defined(HOST_PPC)
             {
diff -Naru qemu-neo1973.orig/dyngen-exec.h qemu-neo1973/dyngen-exec.h
--- qemu-neo1973.orig/dyngen-exec.h	2007-06-24 13:56:38.000000000 +0200
+++ qemu-neo1973/dyngen-exec.h	2007-06-24 14:35:52.000000000 +0200
@@ -194,7 +194,12 @@
 #endif
 
 /* force GCC to generate only one epilog at the end of the function */
+#if defined(__i386__) || defined(__x86_64__)
+/* Also add 4 bytes of padding so that we can replace the ret with a jmp.  */
+#define FORCE_RET() asm volatile ("nop;nop;nop;nop");
+#else
 #define FORCE_RET() __asm__ __volatile__("" : : : "memory");
+#endif
 
 #ifndef OPPROTO
 #define OPPROTO
@@ -244,11 +249,18 @@
 #endif
 
 #if defined(__i386__)
-#define EXIT_TB() asm volatile ("ret")
-#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
+/* Dyngen will replace hlt instructions with a ret instruction.  Inserting a
+   ret directly would confuse dyngen.  */
+#define EXIT_TB() asm volatile ("hlt")
+/* Dyngen will replace cli with 0x9e (jmp).
+   We generate the offset manually.  */
+#define GOTO_LABEL_PARAM(n) \
+    asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
 #elif defined(__x86_64__)
-#define EXIT_TB() asm volatile ("ret")
-#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
+/* The same as i386.  */
+#define EXIT_TB() asm volatile ("hlt")
+#define GOTO_LABEL_PARAM(n) \
+    asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
 #elif defined(__powerpc__)
 #define EXIT_TB() asm volatile ("blr")
 #define GOTO_LABEL_PARAM(n) asm volatile ("b " ASM_NAME(__op_gen_label) #n)
diff -Naru qemu-neo1973.orig/exec-all.h qemu-neo1973/exec-all.h
--- qemu-neo1973.orig/exec-all.h	2007-06-24 14:31:58.000000000 +0200
+++ qemu-neo1973/exec-all.h	2007-06-24 14:33:11.000000000 +0200
@@ -329,14 +329,15 @@
 
 #elif defined(__i386__) && defined(USE_DIRECT_JUMP)
 
-/* we patch the jump instruction directly */
+/* we patch the jump instruction directly.  Use sti in place of the actual
+   jmp instruction so that dyngen can patch in the correct result.  */
 #define GOTO_TB(opname, tbparam, n)\
 do {\
     asm volatile (".section .data\n"\
 		  ASM_OP_LABEL_NAME(n, opname) ":\n"\
 		  ".long 1f\n"\
 		  ASM_PREVIOUS_SECTION \
-                  "jmp " ASM_NAME(__op_jmp) #n "\n"\
+                  "sti;.long " ASM_NAME(__op_jmp) #n " - 1f\n"\
 		  "1:\n");\
 } while (0)
 
diff -Naru qemu-neo1973.orig/target-ppc/exec.h qemu-neo1973/target-ppc/exec.h
--- qemu-neo1973.orig/target-ppc/exec.h	2007-06-24 13:56:32.000000000 +0200
+++ qemu-neo1973/target-ppc/exec.h	2007-06-24 14:33:11.000000000 +0200
@@ -66,11 +66,7 @@
 #define FT1 (env->ft1)
 #define FT2 (env->ft2)
 
-#if defined (DEBUG_OP)
-# define RETURN() __asm__ __volatile__("nop" : : : "memory");
-#else
-# define RETURN() __asm__ __volatile__("" : : : "memory");
-#endif
+#define RETURN() FORCE_RET()
 
 static inline target_ulong rotl8 (target_ulong i, int n)
 {