diff --git a/arch/arm/atomic.h b/arch/arm/atomic.h index 8665c87..8ae35bb 100644 --- a/arch/arm/atomic.h +++ b/arch/arm/atomic.h @@ -22,37 +22,150 @@ static inline int a_ctz_64(uint64_t x) return a_ctz_l(y); } -#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \ - || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 - #if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 -#define MEM_BARRIER "dmb ish" -#else -#define MEM_BARRIER "mcr p15,0,r0,c7,c10,5" -#endif -static inline int __k_cas(int t, int s, volatile int *p) +static inline void a_barrier() { - int ret; - __asm__( - " " MEM_BARRIER "\n" + __asm__ __volatile__("dmb ish"); +} + +static inline int a_cas(volatile int *p, int t, int s) +{ + int old; + __asm__ __volatile__( + " dmb ish\n" "1: ldrex %0,%3\n" - " subs %0,%0,%1\n" -#ifdef __thumb__ - " itt eq\n" -#endif - " strexeq %0,%2,%3\n" - " teqeq %0,#1\n" - " beq 1b\n" - " " MEM_BARRIER "\n" - : "=&r"(ret) + " cmp %0,%1\n" + " bne 1f\n" + " strex %0,%2,%3\n" + " cmp %0, #0\n" + " bne 1b\n" + " mov %0, %1\n" + "1: dmb ish\n" + : "=&r"(old) : "r"(t), "r"(s), "Q"(*p) : "memory", "cc" ); - return ret; + return old; +} + +static inline int a_swap(volatile int *x, int v) +{ + int old, tmp; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%3\n" + " strex %1,%2,%3\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(old), "=&r"(tmp) + : "r"(v), "Q"(*x) + : "memory", "cc" ); + return old; +} + +static inline int a_fetch_add(volatile int *x, int v) +{ + int old, tmp; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%3\n" + " add %0,%0,%2\n" + " strex %1,%0,%3\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(old), "=&r"(tmp) + : "r"(v), "Q"(*x) + : "memory", "cc" ); + return old-v; +} + +static inline void a_inc(volatile int *x) +{ + int tmp, tmp2; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%2\n" + " add %0,%0,#1\n" + " strex %1,%0,%2\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(tmp), "=&r"(tmp2) + : "Q"(*x) + : "memory", "cc" ); +} + +static inline void a_dec(volatile int *x) +{ + int tmp, tmp2; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%2\n" + " sub %0,%0,#1\n" + " strex %1,%0,%2\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(tmp), "=&r"(tmp2) + : "Q"(*x) + : "memory", "cc" ); +} + +static inline void a_and(volatile int *x, int v) +{ + int tmp, tmp2; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%3\n" + " and %0,%0,%2\n" + " strex %1,%0,%3\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(tmp), "=&r"(tmp2) + : "r"(v), "Q"(*x) + : "memory", "cc" ); +} + +static inline void a_or(volatile int *x, int v) +{ + int tmp, tmp2; + __asm__ __volatile__( + " dmb ish\n" + "1: ldrex %0,%3\n" + " orr %0,%0,%2\n" + " strex %1,%0,%3\n" + " cmp %1, #0\n" + " bne 1b\n" + " dmb ish\n" + : "=&r"(tmp), "=&r"(tmp2) + : "r"(v), "Q"(*x) + : "memory", "cc" ); +} + +static inline void a_store(volatile int *p, int x) +{ + __asm__ __volatile__( + " dmb ish\n" + " str %1,%0\n" + " dmb ish\n" + : "=m"(*p) + : "r"(x) + : "memory", "cc" ); } + #else -#define __k_cas ((int (*)(int, int, volatile int *))0xffff0fc0) -#endif + +int __a_cas(int, int, volatile int *) __attribute__((__visibility__("hidden"))); +#define __k_cas __a_cas + +static inline void a_barrier() +{ + __asm__ __volatile__("bl __a_barrier" + : : : "memory", "cc", "ip", "lr" ); +} static inline int a_cas(volatile int *p, int t, int s) { @@ -65,11 +178,6 @@ static inline int a_cas(volatile int *p, int t, int s) } } -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - return (void *)a_cas(p, (int)t, (int)s); -} - static inline int a_swap(volatile int *x, int v) { int old; @@ -98,19 +206,9 @@ static inline void a_dec(volatile int *x) static inline void a_store(volatile int *p, int x) { - while (__k_cas(*p, x, p)); -} - -#define a_spin a_barrier - -static inline void a_barrier() -{ - __k_cas(0, 0, &(int){0}); -} - -static inline void a_crash() -{ - *(volatile char *)0=0; + a_barrier(); + *p = x; + a_barrier(); } static inline void a_and(volatile int *p, int v) @@ -127,6 +225,20 @@ static inline void a_or(volatile int *p, int v) while (__k_cas(old, old|v, p)); } +#endif + +static inline void *a_cas_p(volatile void *p, void *t, void *s) +{ + return (void *)a_cas(p, (int)t, (int)s); +} + +#define a_spin a_barrier + +static inline void a_crash() +{ + *(volatile char *)0=0; +} + static inline void a_or_l(volatile void *p, long v) { a_or(p, v); diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h index 6d9dc3a..e72f74d 100644 --- a/arch/arm/pthread_arch.h +++ b/arch/arm/pthread_arch.h @@ -10,9 +10,17 @@ static inline __attribute__((const)) pthread_t __pthread_self() #else -typedef char *(*__ptr_func_t)(void) __attribute__((const)); -#define __pthread_self() \ - ((pthread_t)(((__ptr_func_t)0xffff0fe0)()+8-sizeof(struct pthread))) +static inline __attribute__((const)) pthread_t __pthread_self() +{ +#ifdef __clang__ + char *p; + __asm__( "bl __a_gettp\n\tmov %0,r0" : "=r"(p) : : "cc", "r0", "ip", "lr" ); +#else + register char *p __asm__("r0"); + __asm__( "bl __a_gettp" : "=r"(p) : : "cc", "ip", "lr" ); +#endif + return (void *)(p+8-sizeof(struct pthread)); +} #endif diff --git a/arch/arm/src/__set_thread_area.c b/arch/arm/src/__set_thread_area.c index e69de29..680510e 100644 --- a/arch/arm/src/__set_thread_area.c +++ b/arch/arm/src/__set_thread_area.c @@ -0,0 +1,50 @@ +#include +#include +#include "pthread_impl.h" +#include "libc.h" + +#define HWCAP_TLS (1 << 15) + +extern const unsigned char __attribute__((__visibility__("hidden"))) + __a_barrier_dummy[], __a_barrier_oldkuser[], + __a_barrier_v6[], __a_barrier_v7[], + __a_cas_dummy[], __a_cas_v6[], __a_cas_v7[], + __a_gettp_dummy[], __a_gettp_native[]; + +#define __a_barrier_kuser 0xffff0fa0 +#define __a_cas_kuser 0xffff0fc0 +#define __a_gettp_kuser 0xffff0fe0 + +extern uintptr_t __attribute__((__visibility__("hidden"))) + __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr; + +#define SET(op,ver) (__a_##op##_ptr = \ + (uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy) + +int __set_thread_area(void *p) +{ +#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7 + if (__hwcap & HWCAP_TLS) { + size_t *aux; + SET(gettp, native); + SET(cas, v7); + SET(barrier, v7); + for (aux=libc.auxv; *aux; aux+=2) { + if (*aux != AT_PLATFORM) continue; + const char *s = (void *)aux[1]; + if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break; + SET(cas, v6); + SET(barrier, v6); + break; + } + } else { + int ver = *(int *)0xffff0ffc; + SET(gettp, kuser); + SET(cas, kuser); + SET(barrier, kuser); + if (ver < 2) a_crash(); + if (ver < 3) SET(barrier, oldkuser); + } +#endif + return __syscall(0xf0005, p); +} diff --git a/arch/arm/src/arm/atomics.s b/arch/arm/src/arm/atomics.s index e69de29..93e5928 100644 --- a/arch/arm/src/arm/atomics.s +++ b/arch/arm/src/arm/atomics.s @@ -0,0 +1,119 @@ +.text + +.global __a_barrier +.hidden __a_barrier +.type __a_barrier,%function +__a_barrier: + ldr ip,1f + ldr ip,[pc,ip] + add pc,pc,ip +1: .word __a_barrier_ptr-1b +.global __a_barrier_dummy +.hidden __a_barrier_dummy +__a_barrier_dummy: + tst lr,#1 + moveq pc,lr + bx lr +.global __a_barrier_oldkuser +.hidden __a_barrier_oldkuser +__a_barrier_oldkuser: + push {r0,r1,r2,r3,ip,lr} + mov r1,r0 + mov r2,sp + ldr ip,=0xffff0fc0 + mov lr,pc + mov pc,ip + pop {r0,r1,r2,r3,ip,lr} + tst lr,#1 + moveq pc,lr + bx lr +.global __a_barrier_v6 +.hidden __a_barrier_v6 +__a_barrier_v6: + mcr p15,0,r0,c7,c10,5 + bx lr +.global __a_barrier_v7 +.hidden __a_barrier_v7 +__a_barrier_v7: + .word 0xf57ff05b /* dmb ish */ + bx lr + +.global __a_cas +.hidden __a_cas +.type __a_cas,%function +__a_cas: + ldr ip,1f + ldr ip,[pc,ip] + add pc,pc,ip +1: .word __a_cas_ptr-1b +.global __a_cas_dummy +.hidden __a_cas_dummy +__a_cas_dummy: + mov r3,r0 + ldr r0,[r2] + subs r0,r3,r0 + streq r1,[r2] + tst lr,#1 + moveq pc,lr + bx lr +.global __a_cas_v6 +.hidden __a_cas_v6 +__a_cas_v6: + mov r3,r0 + mcr p15,0,r0,c7,c10,5 +1: .word 0xe1920f9f /* ldrex r0,[r2] */ + subs r0,r3,r0 + .word 0x01820f91 /* strexeq r0,r1,[r2] */ + teqeq r0,#1 + beq 1b + mcr p15,0,r0,c7,c10,5 + bx lr +.global __a_cas_v7 +.hidden __a_cas_v7 +__a_cas_v7: + mov r3,r0 + .word 0xf57ff05b /* dmb ish */ +1: .word 0xe1920f9f /* ldrex r0,[r2] */ + subs r0,r3,r0 + .word 0x01820f91 /* strexeq r0,r1,[r2] */ + teqeq r0,#1 + beq 1b + .word 0xf57ff05b /* dmb ish */ + bx lr + +.global __a_gettp +.hidden __a_gettp +.type __a_gettp,%function +__a_gettp: + ldr ip,1f + ldr ip,[pc,ip] + add pc,pc,ip +1: .word __a_gettp_ptr-1b +.global __a_gettp_dummy +.hidden __a_gettp_dummy +__a_gettp_dummy: + .word 0xe7fddef1 + tst lr,#1 + moveq pc,lr + bx lr +.global __a_gettp_native +.hidden __a_gettp_native +__a_gettp_native: + mrc p15,0,r0,c13,c0,3 + bx lr + +.data +.global __a_barrier_ptr +.hidden __a_barrier_ptr +__a_barrier_ptr: + .word 0 + +.global __a_cas_ptr +.hidden __a_cas_ptr +__a_cas_ptr: + .word 0 + +.global __a_gettp_ptr +.hidden __a_gettp_ptr +__a_gettp_ptr: + .word 0 diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c index c90fe99..1de430c 100644 --- a/src/ldso/dynlink.c +++ b/src/ldso/dynlink.c @@ -1126,6 +1126,7 @@ void *__dynlink(int argc, char **argv) libc.secure = 1; } libc.page_size = aux[AT_PAGESZ]; + libc.auxv = auxv; /* If the dynamic linker was invoked as a program itself, AT_BASE * will not be set. In that case, we assume the base address is diff --git a/src/thread/arm/__set_thread_area.s b/src/thread/arm/__set_thread_area.s index 63d8884..4a4cd0d 100644 --- a/src/thread/arm/__set_thread_area.s +++ b/src/thread/arm/__set_thread_area.s @@ -1,12 +1 @@ -.text -.global __set_thread_area -.type __set_thread_area,%function -__set_thread_area: - mov r1,r7 - mov r7,#0x0f0000 - add r7,r7,#5 - svc 0 - mov r7,r1 - tst lr,#1 - moveq pc,lr - bx lr +/* Replaced by C code in arch/arm/src */