diff --git a/arch/arm/atomic.h b/arch/arm/atomic.h index 8665c87..c73d3b5 100644 --- a/arch/arm/atomic.h +++ b/arch/arm/atomic.h @@ -22,67 +22,193 @@ static inline int a_ctz_64(uint64_t x) return a_ctz_l(y); } -#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \ - || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 - #if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 -#define MEM_BARRIER "dmb ish" -#else -#define MEM_BARRIER "mcr p15,0,r0,c7,c10,5" -#endif -static inline int __k_cas(int t, int s, volatile int *p) +static inline void a_barrier() +{ + __asm__ __volatile__("dmb ish"); +} + +static inline int a_cas(volatile int *p, int t, int s) { - int ret; - __asm__( - " " MEM_BARRIER "\n" + int old; + __asm__ __volatile__( + " dmb ish\n" "1: ldrex %0,%3\n" - " subs %0,%0,%1\n" -#ifdef __thumb__ - " itt eq\n" -#endif - " strexeq %0,%2,%3\n" - " teqeq %0,#1\n" - " beq 1b\n" - " " MEM_BARRIER "\n" - : "=&r"(ret) + " cmp %0,%1\n" + " bne 1f\n" + " strex %0,%2,%3\n" + " cmp %0, #0\n" + " bne 1b\n" + " mov %0, %1\n" + "1: dmb ish\n" + : "=&r"(old) : "r"(t), "r"(s), "Q"(*p) : "memory", "cc" ); - return ret; + return old; +} + +static inline int a_swap(volatile int *x, int v) +{ + int old, tmp; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%3\n" + " strex %1,%2,%3\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(old), "=&r"(tmp) + : "r"(v), "Q"(*x) + : "memory", "cc" ); + return old; +} + +static inline int a_fetch_add(volatile int *x, int v) +{ + int old, tmp; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%3\n" + " add %0,%0,%2\n" + " strex %1,%0,%3\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(old), "=&r"(tmp) + : "r"(v), "Q"(*x) + : "memory", "cc" ); + return old-v; +} + +static inline void a_inc(volatile int *x) +{ + int tmp, tmp2; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%2\n" + " add %0,%0,#1\n" + " strex %1,%0,%2\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(tmp), "=&r"(tmp2) + : "Q"(*x) + : "memory", "cc" ); +} + +static inline void a_dec(volatile int *x) +{ + int tmp, tmp2; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%2\n" + " sub %0,%0,#1\n" + " strex %1,%0,%2\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(tmp), "=&r"(tmp2) + : "Q"(*x) + : "memory", "cc" ); } + +static inline void a_and(volatile int *x, int v) +{ + int tmp, tmp2; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%3\n" + " and %0,%0,%2\n" + " strex %1,%0,%3\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(tmp), "=&r"(tmp2) + : "r"(v), "Q"(*x) + : "memory", "cc" ); +} + +static inline void a_or(volatile int *x, int v) +{ + int tmp, tmp2; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%3\n" + " orr %0,%0,%2\n" + " strex %1,%0,%3\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(tmp), "=&r"(tmp2) + : "r"(v), "Q"(*x) + : "memory", "cc" ); +} + +static inline void a_store(volatile int *p, int x) +{ + __asm__ __volatile__( + " dmb ish\n" + " str %1,%0\n" + " dmb ish\n" + : "=m"(*p) + : "r"(x) + : "memory", "cc" ); +} + +#else + +extern const unsigned char __atomics_base[] __attribute__((__visibility__("hidden"))); +extern uintptr_t __atomics_selector __attribute__((__visibility__("hidden"))); + +static inline void a_barrier() +{ + __asm__ __volatile__( + " mov lr,pc\n" +#ifdef __thumb__ + " bx %0\n" #else -#define __k_cas ((int (*)(int, int, volatile int *))0xffff0fc0) + " mov pc,%0\n" #endif + : + : "r"((uintptr_t)__atomics_base + __atomics_selector + 0) + : "memory", "lr" ); +} + static inline int a_cas(volatile int *p, int t, int s) { + int (*cas)(int, int, volatile int *) = + ((int (*)(int, int, volatile int *)) + ((uintptr_t)__atomics_base + __atomics_selector + 32)); int old; for (;;) { - if (!__k_cas(t, s, p)) + if (!cas(t, s, p)) return t; if ((old=*p) != t) return old; } } -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - return (void *)a_cas(p, (int)t, (int)s); -} - static inline int a_swap(volatile int *x, int v) { + int (*cas)(int, int, volatile int *) = + ((int (*)(int, int, volatile int *)) + ((uintptr_t)__atomics_base + __atomics_selector + 32)); int old; do old = *x; - while (__k_cas(old, v, x)); + while (cas(old, v, x)); return old; } static inline int a_fetch_add(volatile int *x, int v) { + int (*cas)(int, int, volatile int *) = + ((int (*)(int, int, volatile int *)) + ((uintptr_t)__atomics_base + __atomics_selector + 32)); int old; do old = *x; - while (__k_cas(old, old+v, x)); + while (cas(old, old+v, x)); return old; } @@ -96,35 +222,45 @@ static inline void a_dec(volatile int *x) a_fetch_add(x, -1); } -static inline void a_store(volatile int *p, int x) +static inline void a_and(volatile int *p, int v) { - while (__k_cas(*p, x, p)); + int (*cas)(int, int, volatile int *) = + ((int (*)(int, int, volatile int *)) + ((uintptr_t)__atomics_base + __atomics_selector + 32)); + int old; + do old = *p; + while (cas(old, old&v, p)); } -#define a_spin a_barrier - -static inline void a_barrier() +static inline void a_or(volatile int *p, int v) { - __k_cas(0, 0, &(int){0}); + int (*cas)(int, int, volatile int *) = + ((int (*)(int, int, volatile int *)) + ((uintptr_t)__atomics_base + __atomics_selector + 32)); + int old; + do old = *p; + while (cas(old, old|v, p)); } -static inline void a_crash() +static inline void a_store(volatile int *p, int x) { - *(volatile char *)0=0; + a_barrier(); + *p = x; + a_barrier(); } -static inline void a_and(volatile int *p, int v) +#endif + +static inline void *a_cas_p(volatile void *p, void *t, void *s) { - int old; - do old = *p; - while (__k_cas(old, old&v, p)); + return (void *)a_cas(p, (int)t, (int)s); } -static inline void a_or(volatile int *p, int v) +#define a_spin a_barrier + +static inline void a_crash() { - int old; - do old = *p; - while (__k_cas(old, old|v, p)); + *(volatile char *)0=0; } static inline void a_or_l(volatile void *p, long v) diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h index 6d9dc3a..b292e01 100644 --- a/arch/arm/pthread_arch.h +++ b/arch/arm/pthread_arch.h @@ -10,9 +10,13 @@ static inline __attribute__((const)) pthread_t __pthread_self() #else +extern const unsigned char __atomics_base[] __attribute__((__visibility__("hidden"))); +extern uintptr_t __atomics_selector __attribute__((__visibility__("hidden"))); + typedef char *(*__ptr_func_t)(void) __attribute__((const)); -#define __pthread_self() \ - ((pthread_t)(((__ptr_func_t)0xffff0fe0)()+8-sizeof(struct pthread))) +#define __pthread_self() ((pthread_t)( \ + ((__ptr_func_t)((unsigned long)__atomics_base+__atomics_selector+64))() \ + +8-sizeof(struct pthread))) #endif diff --git a/arch/arm/src/__set_thread_area.c b/arch/arm/src/__set_thread_area.c index e69de29..ba6a30b 100644 --- a/arch/arm/src/__set_thread_area.c +++ b/arch/arm/src/__set_thread_area.c @@ -0,0 +1,37 @@ +#include +#include +#include "pthread_impl.h" +#include "libc.h" + +#define HWCAP_TLS (1 << 15) + +extern const unsigned char __attribute__((__visibility__("hidden"))) + __atomics_base[], __atomics_oldkuser_base[], + __atomics_v6_base[], __atomics_v7_base[]; + +extern uintptr_t __atomics_selector __attribute__((__visibility__("hidden"))); + +int __set_thread_area(void *p) +{ +#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7 + uintptr_t sel; + if (__hwcap & HWCAP_TLS) { + size_t *aux; + sel = (uintptr_t)__atomics_v7_base; + for (aux=libc.auxv; *aux; aux+=2) { + if (*aux != AT_PLATFORM) continue; + const char *s = (void *)aux[1]; + if (s[0]=='v' && s[1]=='6' && s[2]-'0'>=10u) + sel = (uintptr_t)__atomics_v6_base; + break; + } + } else { + int ver = *(int *)0xffff0ffc; + sel = (uintptr_t)0xffff0fa0; + if (ver < 2) a_crash(); + if (ver < 3) sel = (uintptr_t)__atomics_oldkuser_base; + } + __atomics_selector = sel - (uintptr_t)__atomics_base; +#endif + return __syscall(0xf0005, p); +} diff --git a/arch/arm/src/arm/atomics.s b/arch/arm/src/arm/atomics.s index e69de29..b55abdf 100644 --- a/arch/arm/src/arm/atomics.s +++ b/arch/arm/src/arm/atomics.s @@ -0,0 +1,114 @@ +.text + +.global __atomics_base +.global __atomics_dummy_base +.global __atomics_oldkuser_base +.global __atomics_v6_base +.global __atomics_v7_base + +.hidden __atomics_base +.hidden __atomics_dummy_base +.hidden __atomics_oldkuser_base +.hidden __atomics_v6_base +.hidden __atomics_v7_base + +.p2align 5 +__atomics_base: +__atomics_dummy_base: +dummy_barrier: + tst lr,#1 + moveq pc,lr + bx lr + +.p2align 5 +dummy_cas: + ldr r3,[r2] + subs r3,r3,r0 + streq r1,[r2] + rsbs r0,r3,#0 + tst lr,#1 + moveq pc,lr + bx lr + +.p2align 5 +dummy_gettp: + mov r0,#0 + tst lr,#1 + moveq pc,lr + bx lr + +.p2align 5 +__atomics_oldkuser_base: +oldkuser_barrier: + tst lr,#1 + moveq pc,lr + bx lr + +.p2align 5 +oldkuser_cas: + adr ip,1f + ldr ip,[ip] + mov pc,ip +1: .word 0xffff0fc0 + +.p2align 5 +oldkuser_gettp: + adr r0,1f + ldr r0,[r0] + mov pc,r0 +1: .word 0xffff0fe0 + +.p2align 5 +__atomics_v6_base: +v6_barrier: + mcr p15,0,r0,c7,c10,5 + bx lr + +.p2align 5 +v6_cas: + mov r3,r0 + mcr p15,0,r0,c7,c10,5 +1: .word 0xe1920f9f /* ldrex r0,[r2] */ + subs r0,r0,r3 + .word 0x01820f91 /* strexeq r0,r1,[r2] */ + teqeq r0,#1 + beq 1b + b v6_barrier + +.p2align 5 +v6_gettp: + mrc p15,0,r0,c13,c0,3 + bx lr + + + +.p2align 5 +__atomics_v7_base: +v7_barrier: + .word 0xf57ff05b /* dmb ish */ + bx lr + +.p2align 5 +v7_cas: + mov r3,r0 + .word 0xf57ff05b /* dmb ish */ +1: .word 0xe1920f9f /* ldrex r0,[r2] */ + subs r0,r0,r3 + .word 0x01820f91 /* strexeq r0,r1,[r2] */ + teqeq r0,#1 + beq 1b + b v7_barrier + +.p2align 5 +v7_gettp: + mrc p15,0,r0,c13,c0,3 + bx lr + + + + +.data +.global __atomics_selector +.hidden __atomics_selector +__atomics_selector: + .word 0 diff --git a/src/thread/arm/__set_thread_area.s b/src/thread/arm/__set_thread_area.s index 63d8884..4a4cd0d 100644 --- a/src/thread/arm/__set_thread_area.s +++ b/src/thread/arm/__set_thread_area.s @@ -1,12 +1 @@ -.text -.global __set_thread_area -.type __set_thread_area,%function -__set_thread_area: - mov r1,r7 - mov r7,#0x0f0000 - add r7,r7,#5 - svc 0 - mov r7,r1 - tst lr,#1 - moveq pc,lr - bx lr +/* Replaced by C code in arch/arm/src */