These are Paul Brook's patches to QEMU-0.8.2 to enable the running of single ARM binaries under QEMU's user-emulation mode. Without them, QEMU-0.8.1 immediately dies saying: Error: f0005 qemu: uncaught target signal 6 (Aborted) - exiting while qemu-0.8.2 dies saying: qemu: Unsupported syscall: 983045 cannot set up thread-local storage: unknown error This file is a rediffing of the patches visible at https://nowt.dyndns.org/patch.qemu_nptl on 27 Sept 2006 which "patch" fails to apply automatically. See also http://lists.gnu.org/archive/html/qemu-devel/2006-09/msg00194.html Martin Guy, 27 Sept 2006 --- configure | 25 ++++++ exec-all.h | 165 ------------------------------------------ linux-user/arm/syscall.h | 4 - linux-user/main.c | 94 +++++++++++++++++++++--- linux-user/qemu.h | 3 linux-user/syscall.c | 91 ++++++++++++++++++++++- qemu_spinlock.h | 181 +++++++++++++++++++++++++++++++++++++++++++++++ target-arm/cpu.h | 10 ++ target-arm/op.c | 6 + target-arm/translate.c | 9 ++ 10 files changed, 405 insertions(+), 183 deletions(-) Index: qemu/configure =================================================================== --- qemu.orig/configure 2008-04-09 23:02:37.000000000 +0100 +++ qemu/configure 2008-04-09 23:06:36.000000000 +0100 @@ -109,6 +109,7 @@ build_docs="no" uname_release="" curses="yes" +nptl="yes" # OS specific targetos=`uname -s` @@ -334,6 +335,8 @@ ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; + --disable-nptl) nptl="no" + ;; esac done @@ -429,6 +432,7 @@ echo " --disable-linux-user disable all linux usermode emulation targets" echo " --enable-darwin-user enable all darwin usermode emulation targets" echo " --disable-darwin-user disable all darwin usermode emulation targets" +echo " --disable-nptl disable usermode NPTL guest support" echo " --fmod-lib path to FMOD library" echo " --fmod-inc path to FMOD includes" echo " --enable-uname-release=R Return R for uname -r in usermode emulation" @@ -595,6 +599,23 @@ } EOF +# check NPTL support +cat > $TMPC < +void foo() +{ +#ifndef CLONE_SETTLS +#error bork +#endif +} +EOF + +if $cc -c -o $TMPO $TMPC 2> /dev/null ; then + : +else + nptl="no" +fi + ########################################## # SDL probe @@ -778,6 +799,7 @@ echo "Documentation $build_docs" [ ! -z "$uname_release" ] && \ echo "uname -r $uname_release" +echo "NPTL support $nptl" if test $sdl_too_old = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -1115,6 +1137,9 @@ echo "TARGET_ARCH=arm" >> $config_mak echo "#define TARGET_ARCH \"arm\"" >> $config_h echo "#define TARGET_ARM 1" >> $config_h + if test "$nptl" = "yes" ; then + echo "#define USE_NPTL 1" >> $config_h + fi bflt="yes" elif test "$target_cpu" = "sparc" ; then echo "TARGET_ARCH=sparc" >> $config_mak Index: qemu/exec-all.h =================================================================== --- qemu.orig/exec-all.h 2008-04-09 22:39:38.000000000 +0100 +++ qemu/exec-all.h 2008-04-09 23:05:55.000000000 +0100 @@ -297,170 +297,7 @@ extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4]; extern void *io_mem_opaque[IO_MEM_NB_ENTRIES]; -#if defined(__powerpc__) -static inline int testandset (int *p) -{ - int ret; - __asm__ __volatile__ ( - "0: lwarx %0,0,%1\n" - " xor. %0,%3,%0\n" - " bne 1f\n" - " stwcx. %2,0,%1\n" - " bne- 0b\n" - "1: " - : "=&r" (ret) - : "r" (p), "r" (1), "r" (0) - : "cr0", "memory"); - return ret; -} -#elif defined(__i386__) -static inline int testandset (int *p) -{ - long int readval = 0; - - __asm__ __volatile__ ("lock; cmpxchgl %2, %0" - : "+m" (*p), "+a" (readval) - : "r" (1) - : "cc"); - return readval; -} -#elif defined(__x86_64__) -static inline int testandset (int *p) -{ - long int readval = 0; - - __asm__ __volatile__ ("lock; cmpxchgl %2, %0" - : "+m" (*p), "+a" (readval) - : "r" (1) - : "cc"); - return readval; -} -#elif defined(__s390__) -static inline int testandset (int *p) -{ - int ret; - - __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n" - " jl 0b" - : "=&d" (ret) - : "r" (1), "a" (p), "0" (*p) - : "cc", "memory" ); - return ret; -} -#elif defined(__alpha__) -static inline int testandset (int *p) -{ - int ret; - unsigned long one; - - __asm__ __volatile__ ("0: mov 1,%2\n" - " ldl_l %0,%1\n" - " stl_c %2,%1\n" - " beq %2,1f\n" - ".subsection 2\n" - "1: br 0b\n" - ".previous" - : "=r" (ret), "=m" (*p), "=r" (one) - : "m" (*p)); - return ret; -} -#elif defined(__sparc__) -static inline int testandset (int *p) -{ - int ret; - - __asm__ __volatile__("ldstub [%1], %0" - : "=r" (ret) - : "r" (p) - : "memory"); - - return (ret ? 1 : 0); -} -#elif defined(__arm__) -static inline int testandset (int *spinlock) -{ - register unsigned int ret; - __asm__ __volatile__("swp %0, %1, [%2]" - : "=r"(ret) - : "0"(1), "r"(spinlock)); - - return ret; -} -#elif defined(__mc68000) -static inline int testandset (int *p) -{ - char ret; - __asm__ __volatile__("tas %1; sne %0" - : "=r" (ret) - : "m" (p) - : "cc","memory"); - return ret; -} -#elif defined(__ia64) - -#include - -static inline int testandset (int *p) -{ - return __sync_lock_test_and_set (p, 1); -} -#elif defined(__mips__) -static inline int testandset (int *p) -{ - int ret; - - __asm__ __volatile__ ( - " .set push \n" - " .set noat \n" - " .set mips2 \n" - "1: li $1, 1 \n" - " ll %0, %1 \n" - " sc $1, %1 \n" - " beqz $1, 1b \n" - " .set pop " - : "=r" (ret), "+R" (*p) - : - : "memory"); - - return ret; -} -#else -#error unimplemented CPU support -#endif - -typedef int spinlock_t; - -#define SPIN_LOCK_UNLOCKED 0 - -#if defined(CONFIG_USER_ONLY) -static inline void spin_lock(spinlock_t *lock) -{ - while (testandset(lock)); -} - -static inline void spin_unlock(spinlock_t *lock) -{ - *lock = 0; -} - -static inline int spin_trylock(spinlock_t *lock) -{ - return !testandset(lock); -} -#else -static inline void spin_lock(spinlock_t *lock) -{ -} - -static inline void spin_unlock(spinlock_t *lock) -{ -} - -static inline int spin_trylock(spinlock_t *lock) -{ - return 1; -} -#endif +#include "qemu_spinlock.h" extern spinlock_t tb_lock; Index: qemu/linux-user/arm/syscall.h =================================================================== --- qemu.orig/linux-user/arm/syscall.h 2007-11-27 12:09:33.000000000 +0000 +++ qemu/linux-user/arm/syscall.h 2008-04-09 23:05:55.000000000 +0100 @@ -28,7 +28,9 @@ #define ARM_SYSCALL_BASE 0x900000 #define ARM_THUMB_SYSCALL 0 -#define ARM_NR_cacheflush (ARM_SYSCALL_BASE + 0xf0000 + 2) +#define ARM_NR_BASE 0xf0000 +#define ARM_NR_cacheflush (ARM_NR_BASE + 2) +#define ARM_NR_set_tls (ARM_NR_BASE + 5) #define ARM_NR_semihosting 0x123456 #define ARM_NR_thumb_semihosting 0xAB Index: qemu/linux-user/main.c =================================================================== --- qemu.orig/linux-user/main.c 2008-04-09 23:02:37.000000000 +0100 +++ qemu/linux-user/main.c 2008-04-09 23:05:55.000000000 +0100 @@ -364,6 +364,50 @@ } } +/* Handle a jump to the kernel code page. */ +static int +do_kernel_trap(CPUARMState *env) +{ + uint32_t addr; + uint32_t *ptr; + uint32_t cpsr; + + switch (env->regs[15]) { + case 0xffff0fc0: /* __kernel_cmpxchg */ + /* XXX: This only works between threads, not between processes. + Use native atomic operations. */ + /* ??? This probably breaks horribly if the access segfaults. */ + cpu_lock(); + ptr = (uint32_t *)env->regs[2]; + cpsr = cpsr_read(env); + if (*ptr == env->regs[0]) { + *ptr = env->regs[1]; + env->regs[0] = 0; + cpsr |= CPSR_C; + } else { + env->regs[0] = -1; + cpsr &= ~CPSR_C; + } + cpsr_write(env, cpsr, CPSR_C); + cpu_unlock(); + break; + case 0xffff0fe0: /* __kernel_get_tls */ + env->regs[0] = env->cp15.c13_tls; + break; + default: + return 1; + } + /* Jump back to the caller. */ + addr = env->regs[14]; + if (addr & 1) { + env->thumb = 1; + addr &= ~1; + } + env->regs[15] = addr; + + return 0; +} + void cpu_loop(CPUARMState *env) { int trapnr; @@ -474,10 +518,8 @@ } } - if (n == ARM_NR_cacheflush) { - arm_cache_flush(env->regs[0], env->regs[1]); - } else if (n == ARM_NR_semihosting - || n == ARM_NR_thumb_semihosting) { + if (n == ARM_NR_semihosting + || n == ARM_NR_thumb_semihosting) { env->regs[0] = do_arm_semihosting (env); } else if (n == 0 || n >= ARM_SYSCALL_BASE || (env->thumb && n == ARM_THUMB_SYSCALL)) { @@ -488,14 +530,34 @@ n -= ARM_SYSCALL_BASE; env->eabi = 0; } - env->regs[0] = do_syscall(env, - n, - env->regs[0], - env->regs[1], - env->regs[2], - env->regs[3], - env->regs[4], - env->regs[5]); + if ( n > ARM_NR_BASE) { + switch (n) + { + case ARM_NR_cacheflush: + arm_cache_flush(env->regs[0], env->regs[1]); + break; +#ifdef USE_NPTL + case ARM_NR_set_tls: + cpu_set_tls(env, env->regs[0]); + env->regs[0] = 0; + break; +#endif + default: + printf ("Error: Bad syscall: %x\n", n); + goto error; + } + } + else + { + env->regs[0] = do_syscall(env, + n, + env->regs[0], + env->regs[1], + env->regs[2], + env->regs[3], + env->regs[4], + env->regs[5]); + } } else { goto error; } @@ -534,6 +596,10 @@ } } break; + case EXCP_KERNEL_TRAP: + if (do_kernel_trap(env)) + goto error; + break; default: error: fprintf(stderr, "qemu: unhandled CPU exception 0x%x - aborting\n", @@ -2402,6 +2468,10 @@ ts->heap_base = info->brk; /* This will be filled in on the first SYS_HEAPINFO call. */ ts->heap_limit = 0; + /* Register the magic kernel code page. The cpu will generate a + special exception when it tries to execute code here. We can't + put real code here because it may be in use by the host kernel. */ + page_set_flags(0xffff0000, 0xffff0fff, 0); #endif if (gdbstub_port) { Index: qemu/linux-user/qemu.h =================================================================== --- qemu.orig/linux-user/qemu.h 2008-01-02 15:48:21.000000000 +0000 +++ qemu/linux-user/qemu.h 2008-04-09 23:05:55.000000000 +0100 @@ -107,6 +107,9 @@ uint32_t heap_base; uint32_t heap_limit; #endif +#ifdef USE_NPTL + uint32_t *child_tidptr; +#endif int used; /* non zero if used */ struct image_info *info; uint8_t stack[0]; Index: qemu/linux-user/syscall.c =================================================================== --- qemu.orig/linux-user/syscall.c 2008-04-09 23:02:38.000000000 +0100 +++ qemu/linux-user/syscall.c 2008-04-09 23:05:55.000000000 +0100 @@ -71,9 +71,18 @@ #include #include "qemu.h" +#include "qemu_spinlock.h" //#define DEBUG +#ifdef USE_NPTL +#define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \ + CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID) +#else +/* XXX: Hardcode the above values. */ +#define CLONE_NPTL_FLAGS2 0 +#endif + #if defined(TARGET_I386) || defined(TARGET_ARM) || defined(TARGET_SPARC) \ || defined(TARGET_M68K) || defined(TARGET_SH4) || defined(TARGET_CRIS) /* 16 bit uid wrappers emulation */ @@ -2702,9 +2711,19 @@ thread/process */ #define NEW_STACK_SIZE 8192 +#ifdef USE_NPTL +static spinlock_t nptl_lock = SPIN_LOCK_UNLOCKED; +#endif + static int clone_func(void *arg) { CPUState *env = arg; +#ifdef HAVE_NPTL + /* Wait until the parent has finshed initializing the tls state. */ + while (!spin_trylock(&nptl_lock)) + usleep(1); + spin_unlock(&nptl_lock); +#endif cpu_loop(env); /* never exits */ return 0; @@ -2712,13 +2731,22 @@ /* do_fork() Must return host values and target errnos (unlike most do_*() functions). */ -int do_fork(CPUState *env, unsigned int flags, abi_ulong newsp) +int do_fork(CPUState *env, unsigned int flags, unsigned long newsp, + uint32_t *parent_tidptr, void *newtls, + uint32_t *child_tidptr) { int ret; TaskState *ts; uint8_t *new_stack; CPUState *new_env; +#ifdef USE_NPTL + unsigned int nptl_flags; + + if (flags & CLONE_PARENT_SETTID) + *parent_tidptr = gettid(); +#endif + if (flags & CLONE_VM) { ts = malloc(sizeof(TaskState) + NEW_STACK_SIZE); memset(ts, 0, sizeof(TaskState)); @@ -2784,16 +2812,67 @@ #error unsupported target CPU #endif new_env->opaque = ts; +#ifdef USE_NPTL + nptl_flags = flags; + flags &= ~CLONE_NPTL_FLAGS2; + + if (nptl_flags & CLONE_CHILD_CLEARTID) { + ts->child_tidptr = child_tidptr; + } + + if (nptl_flags & CLONE_SETTLS) + cpu_set_tls (new_env, newtls); + + /* Grab the global cpu lock so that the thread setup appears + atomic. */ + if (nptl_flags & CLONE_CHILD_SETTID) + spin_lock(&nptl_lock); + +#else + if (flags & CLONE_NPTL_FLAGS2) + return -EINVAL; +#endif + + if (CLONE_VFORK & flags) + flags ^= CLONE_VM; #ifdef __ia64__ ret = __clone2(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env); #else ret = clone(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env); #endif +#ifdef USE_NPTL + if (ret != -1) { + if (nptl_flags & CLONE_CHILD_SETTID) + *child_tidptr = ret; + } + + /* Allow the child to continue. */ + if (nptl_flags & CLONE_CHILD_SETTID) + spin_unlock(&nptl_lock); +#endif } else { /* if no CLONE_VM, we consider it is a fork */ - if ((flags & ~CSIGNAL) != 0) + if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) return -EINVAL; ret = fork(); +#ifdef USE_NPTL + /* There is a race condition here. The parent process could + theoretically read the TID in the child process before the child + tid is set. This would require using either ptrace + (not implemented) or having *_tidptr to point at a shared memory + mapping. We can't repeat the spinlock hack used above because + the child process gets its own copy of the lock. */ + if (ret == 0) { + /* Child Process. */ + if (flags & CLONE_CHILD_SETTID) + *child_tidptr = gettid(); + ts = (TaskState *)env->opaque; + if (flags & CLONE_CHILD_CLEARTID) + ts->child_tidptr = child_tidptr; + if (flags & CLONE_SETTLS) + cpu_set_tls (env, newtls); + } +#endif } return ret; } @@ -3118,7 +3197,7 @@ ret = do_brk(arg1); break; case TARGET_NR_fork: - ret = get_errno(do_fork(cpu_env, SIGCHLD, 0)); + ret = get_errno(do_fork(cpu_env, SIGCHLD, 0, NULL, NULL, NULL)); break; #ifdef TARGET_NR_waitpid case TARGET_NR_waitpid: @@ -4481,7 +4560,8 @@ ret = get_errno(fsync(arg1)); break; case TARGET_NR_clone: - ret = get_errno(do_fork(cpu_env, arg1, arg2)); + ret = get_errno(do_fork(cpu_env, arg1, arg2, (uint32_t *)arg3, + (void *)arg4, (uint32_t *)arg5)); break; #ifdef __NR_exit_group /* new thread calls */ @@ -4928,7 +5008,8 @@ #endif #ifdef TARGET_NR_vfork case TARGET_NR_vfork: - ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, 0)); + ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, 0, + NULL, NULL, NULL)); break; #endif #ifdef TARGET_NR_ugetrlimit Index: qemu/qemu_spinlock.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ qemu/qemu_spinlock.h 2008-04-09 23:05:55.000000000 +0100 @@ -0,0 +1,181 @@ +/* + * Atomic operation helper include + * + * Copyright (c) 2005 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef QEMU_SPINLOCK_H +#define QEMU_SPINLOCK_H + +#ifdef __powerpc__ +static inline int testandset (int *p) +{ + int ret; + __asm__ __volatile__ ( + "0: lwarx %0,0,%1\n" + " xor. %0,%3,%0\n" + " bne 1f\n" + " stwcx. %2,0,%1\n" + " bne- 0b\n" + "1: " + : "=&r" (ret) + : "r" (p), "r" (1), "r" (0) + : "cr0", "memory"); + return ret; +} +#endif + +#ifdef __i386__ +static inline int testandset (int *p) +{ + long int readval = 0; + + __asm__ __volatile__ ("lock; cmpxchgl %2, %0" + : "+m" (*p), "+a" (readval) + : "r" (1) + : "cc"); + return readval; +} +#endif + +#ifdef __x86_64__ +static inline int testandset (int *p) +{ + long int readval = 0; + + __asm__ __volatile__ ("lock; cmpxchgl %2, %0" + : "+m" (*p), "+a" (readval) + : "r" (1) + : "cc"); + return readval; +} +#endif + +#ifdef __s390__ +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n" + " jl 0b" + : "=&d" (ret) + : "r" (1), "a" (p), "0" (*p) + : "cc", "memory" ); + return ret; +} +#endif + +#ifdef __alpha__ +static inline int testandset (int *p) +{ + int ret; + unsigned long one; + + __asm__ __volatile__ ("0: mov 1,%2\n" + " ldl_l %0,%1\n" + " stl_c %2,%1\n" + " beq %2,1f\n" + ".subsection 2\n" + "1: br 0b\n" + ".previous" + : "=r" (ret), "=m" (*p), "=r" (one) + : "m" (*p)); + return ret; +} +#endif + +#ifdef __sparc__ +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__("ldstub [%1], %0" + : "=r" (ret) + : "r" (p) + : "memory"); + + return (ret ? 1 : 0); +} +#endif + +#ifdef __arm__ +static inline int testandset (int *spinlock) +{ + register unsigned int ret; + __asm__ __volatile__("swp %0, %1, [%2]" + : "=r"(ret) + : "0"(1), "r"(spinlock)); + + return ret; +} +#endif + +#ifdef __mc68000 +static inline int testandset (int *p) +{ + char ret; + __asm__ __volatile__("tas %1; sne %0" + : "=r" (ret) + : "m" (p) + : "cc","memory"); + return ret; +} +#endif + +#ifdef __ia64 +#include + +static inline int testandset (int *p) +{ + return __sync_lock_test_and_set (p, 1); +} +#endif + +typedef int spinlock_t; + +#define SPIN_LOCK_UNLOCKED 0 + +#if defined(CONFIG_USER_ONLY) +static inline void spin_lock(spinlock_t *lock) +{ + while (testandset(lock)); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + *lock = 0; +} + +static inline int spin_trylock(spinlock_t *lock) +{ + return !testandset(lock); +} +#else +static inline void spin_lock(spinlock_t *lock) +{ +} + +static inline void spin_unlock(spinlock_t *lock) +{ +} + +static inline int spin_trylock(spinlock_t *lock) +{ + return 1; +} +#endif + +#endif Index: qemu/target-arm/cpu.h =================================================================== --- qemu.orig/target-arm/cpu.h 2007-11-27 12:09:57.000000000 +0000 +++ qemu/target-arm/cpu.h 2008-04-09 23:05:55.000000000 +0100 @@ -38,6 +38,7 @@ #define EXCP_FIQ 6 #define EXCP_BKPT 7 #define EXCP_EXCEPTION_EXIT 8 /* Return from v7M exception. */ +#define EXCP_KERNEL_TRAP 9 /* Jumped to kernel code page. */ #define ARMV7M_EXCP_RESET 1 #define ARMV7M_EXCP_NMI 2 @@ -222,6 +223,15 @@ void cpu_lock(void); void cpu_unlock(void); +void cpu_lock(void); +void cpu_unlock(void); +#if defined(USE_NPTL) +static inline void cpu_set_tls(CPUARMState *env, void *newtls) +{ + env->cp15.c13_tls2 = (uint32_t)(long)newtls; +} +#endif + #define CPSR_M (0x1f) #define CPSR_T (1 << 5) #define CPSR_F (1 << 6) Index: qemu/target-arm/op.c =================================================================== --- qemu.orig/target-arm/op.c 2008-04-09 22:40:01.000000000 +0100 +++ qemu/target-arm/op.c 2008-04-09 23:05:55.000000000 +0100 @@ -994,6 +994,12 @@ cpu_loop_exit(); } +void OPPROTO op_kernel_trap(void) +{ + env->exception_index = EXCP_KERNEL_TRAP; + cpu_loop_exit(); +} + /* VFP support. We follow the convention used for VFP instrunctions: Single precition routines have a "s" suffix, double precision a "d" suffix. */ Index: qemu/target-arm/translate.c =================================================================== --- qemu.orig/target-arm/translate.c 2008-04-09 22:40:01.000000000 +0100 +++ qemu/target-arm/translate.c 2008-04-09 23:05:55.000000000 +0100 @@ -7496,7 +7496,14 @@ gen_op_exception_exit(); } #endif - +#ifdef CONFIG_USER_ONLY + /* Intercept jump to the magic kernel page. */ + if (dc->pc > 0xffff0000) { + gen_op_kernel_trap(); + dc->is_jmp = DISAS_UPDATE; + break; + } +#endif if (env->nb_breakpoints > 0) { for(j = 0; j < env->nb_breakpoints; j++) { if (env->breakpoints[j] == dc->pc) {