diff --git a/arch/arm/atomic_arch.h b/arch/arm/atomic_arch.h index 9e3937cc..54b743bb 100644 --- a/arch/arm/atomic_arch.h +++ b/arch/arm/atomic_arch.h @@ -27,16 +27,6 @@ static inline int a_sc(volatile int *p, int v) return !r; } -#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 - -#define a_barrier a_barrier -static inline void a_barrier() -{ - __asm__ __volatile__ ("dmb ish" : : : "memory"); -} - -#endif - #define a_pre_llsc a_barrier #define a_post_llsc a_barrier @@ -62,13 +52,22 @@ static inline int a_cas(volatile int *p, int t, int s) #endif -#ifndef a_barrier #define a_barrier a_barrier +#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 || __ARM_ARCH_PROFILE == 'M' + +static inline void a_barrier() +{ + __asm__ __volatile__ ("dmb ish" : : : "memory"); +} + +#else + static inline void a_barrier() { register uintptr_t ip __asm__("ip") = __a_barrier_ptr; __asm__ __volatile__( BLX " ip" : "+r"(ip) : : "memory", "cc", "lr" ); } + #endif #define a_crash a_crash diff --git a/arch/arm/crt_arch.h b/arch/arm/crt_arch.h index 99508b1d..66080422 100644 --- a/arch/arm/crt_arch.h +++ b/arch/arm/crt_arch.h @@ -3,13 +3,15 @@ __asm__( ".global " START " \n" ".type " START ",%function \n" START ": \n" -" mov fp, #0 \n" -" mov lr, #0 \n" +" movs a3, #0 \n" +" mov fp, a3 \n" +" mov lr, a3 \n" " ldr a2, 1f \n" " add a2, pc, a2 \n" " mov a1, sp \n" -"2: and ip, a1, #-16 \n" -" mov sp, ip \n" +"2: subs a3, #16 \n" +" ands a1, a3 \n" +" mov sp, a1 \n" " bl " START "_c \n" ".weak _DYNAMIC \n" ".hidden _DYNAMIC \n" diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h index e689ea21..9155b9a4 100644 --- a/arch/arm/pthread_arch.h +++ b/arch/arm/pthread_arch.h @@ -1,5 +1,5 @@ #if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \ - || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 + || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || (__ARM_ARCH >= 7 && __ARM_ARCH_PROFILE != 'M') static inline pthread_t __pthread_self() { diff --git a/crt/arm/crtn.s b/crt/arm/crtn.s index dc020f92..547e64b7 100644 --- a/crt/arm/crtn.s +++ b/crt/arm/crtn.s @@ -1,9 +1,9 @@ .syntax unified .section .init - pop {r0,lr} - bx lr + pop {r0,r1} + bx r1 .section .fini - pop {r0,lr} - bx lr + pop {r0,r1} + bx r1 diff --git a/src/ldso/arm/tlsdesc.S b/src/ldso/arm/tlsdesc.S index 3ae133c9..33216200 100644 --- a/src/ldso/arm/tlsdesc.S +++ b/src/ldso/arm/tlsdesc.S @@ -12,13 +12,13 @@ __tlsdesc_static: .hidden __tlsdesc_dynamic .type __tlsdesc_dynamic,%function __tlsdesc_dynamic: - push {r2,r3,ip,lr} + push {r2,r3,r4,lr} ldr r1,[r0] ldr r2,[r1,#4] // r2 = offset ldr r1,[r1] // r1 = modid #if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \ - || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 + || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || (__ARM_ARCH >= 7 && __ARM_ARCH_PROFILE != 'M') mrc p15,0,r0,c13,c0,3 #else ldr r0,1f @@ -36,19 +36,28 @@ __tlsdesc_dynamic: bx r0 #endif #endif +#if defined(__thumb2__) || !defined(__thumb__) ldr r3,[r0,#-4] // r3 = dtv - ldr ip,[r3,r1,LSL #2] - sub r0,ip,r0 + ldr r4,[r3,r1,LSL #2] + sub r0,r4,r0 +#else + mov r4,r0 + subs r4,#4 + ldr r3,[r4] + lsls r4,r1,#2 + ldr r4,[r3,r4] + subs r0,r4,r0 +#endif add r0,r0,r2 // r0 = r3[r1]-r0+r2 #if __ARM_ARCH >= 5 - pop {r2,r3,ip,pc} + pop {r2,r3,r4,pc} #else - pop {r2,r3,ip,lr} + pop {r2,r3,r4,lr} bx lr #endif #if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \ - || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 + || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || (__ARM_ARCH >= 7 && __ARM_ARCH_PROFILE != 'M') #else .align 2 1: .word __a_gettp_ptr - 2b diff --git a/src/process/arm/vfork.s b/src/process/arm/vfork.s index d7ec41b3..b6f0260e 100644 --- a/src/process/arm/vfork.s +++ b/src/process/arm/vfork.s @@ -3,7 +3,7 @@ .type vfork,%function vfork: mov ip, r7 - mov r7, 190 + movs r7, 190 svc 0 mov r7, ip .hidden __syscall_ret diff --git a/src/setjmp/arm/longjmp.S b/src/setjmp/arm/longjmp.S index 8df0b819..a2641b92 100644 --- a/src/setjmp/arm/longjmp.S +++ b/src/setjmp/arm/longjmp.S @@ -7,16 +7,32 @@ _longjmp: longjmp: mov ip,r0 movs r0,r1 +#if defined(__thumb2__) || !defined(__thumb__) moveq r0,#1 ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp} ldmia ip!, {r2,lr} mov sp,r2 - +#else + bne 4f + movs r0,#1 +4: mov r1,ip + adds r1,#16 + ldmia r1!, {r2-r7} + mov lr,r7 + mov sp,r6 + mov r11,r5 + mov r10,r4 + mov r9,r3 + mov r8,r2 + mov ip,r1 + subs r1,#40 + ldmia r1!, {r4-r7} +#endif adr r1,1f ldr r2,1f ldr r1,[r1,r2] -#if __ARM_ARCH < 8 +#if __ARM_ARCH_PROFILE != 'M' && __ARM_ARCH < 8 tst r1,#0x260 beq 3f // HWCAP_ARM_FPA @@ -24,14 +40,15 @@ longjmp: beq 2f ldc p2, cr4, [ip], #48 #endif -2: tst r1,#0x40 +2: movs r2,#0x40 + tst r1,r2 beq 2f .fpu vfp vldmia ip!, {d8-d15} .fpu softvfp .eabi_attribute 10, 0 .eabi_attribute 27, 0 -#if __ARM_ARCH < 8 +#if __ARM_ARCH_PROFILE != 'M' && __ARM_ARCH < 8 // HWCAP_ARM_IWMMXT 2: tst r1,#0x200 beq 3f diff --git a/src/setjmp/arm/setjmp.S b/src/setjmp/arm/setjmp.S index 45731d22..7ca51886 100644 --- a/src/setjmp/arm/setjmp.S +++ b/src/setjmp/arm/setjmp.S @@ -8,17 +8,28 @@ __setjmp: _setjmp: setjmp: +#if defined(__thumb2__) || !defined(__thumb__) mov ip,r0 stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp} mov r2,sp stmia ip!,{r2,lr} - mov r0,#0 - +#else + stmia r0!,{r4-r7} + mov r1,r8 + mov r2,r9 + mov r3,r10 + mov r4,r11 + mov r5,sp + mov r6,lr + stmia r0!,{r1-r6} + mov ip,r0 +#endif + movs r0,#0 adr r1,1f ldr r2,1f ldr r1,[r1,r2] -#if __ARM_ARCH < 8 +#if __ARM_ARCH_PROFILE != 'M' && __ARM_ARCH < 8 tst r1,#0x260 beq 3f // HWCAP_ARM_FPA @@ -26,14 +37,15 @@ setjmp: beq 2f stc p2, cr4, [ip], #48 #endif -2: tst r1,#0x40 +2: movs r2,#0x40 + tst r1,r2 beq 2f .fpu vfp vstmia ip!, {d8-d15} .fpu softvfp .eabi_attribute 10, 0 .eabi_attribute 27, 0 -#if __ARM_ARCH < 8 +#if __ARM_ARCH_PROFILE != 'M' && __ARM_ARCH < 8 // HWCAP_ARM_IWMMXT 2: tst r1,#0x200 beq 3f diff --git a/src/signal/arm/restore.s b/src/signal/arm/restore.s index fb086d9b..2b7621b1 100644 --- a/src/signal/arm/restore.s +++ b/src/signal/arm/restore.s @@ -4,12 +4,12 @@ .hidden __restore .type __restore,%function __restore: - mov r7,#119 + movs r7,#119 swi 0x0 .global __restore_rt .hidden __restore_rt .type __restore_rt,%function __restore_rt: - mov r7,#173 + movs r7,#173 swi 0x0 diff --git a/src/signal/arm/sigsetjmp.s b/src/signal/arm/sigsetjmp.s index 69ebbf49..8ef51de3 100644 --- a/src/signal/arm/sigsetjmp.s +++ b/src/signal/arm/sigsetjmp.s @@ -9,16 +9,20 @@ __sigsetjmp: bne 1f b setjmp -1: str lr,[r0,#256] - str r4,[r0,#260+8] +1: mov r2,lr + adds r0,#200 + str r2,[r0,#56] + str r4,[r0,#60+8] mov r4,r0 bl setjmp mov r1,r0 mov r0,r4 - ldr lr,[r0,#256] - ldr r4,[r0,#260+8] + ldr r2,[r0,#56] + mov lr,r2 + ldr r4,[r0,#60+8] + subs r0,#200 .hidden __sigsetjmp_tail b __sigsetjmp_tail diff --git a/src/string/arm/memcpy.S b/src/string/arm/memcpy.S index 869e3448..2eb28eec 100644 --- a/src/string/arm/memcpy.S +++ b/src/string/arm/memcpy.S @@ -43,6 +43,8 @@ * building as thumb 2 and big-endian. */ +#if defined(__thumb2__) || !defined(__thumb__) + .syntax unified .global memcpy @@ -477,3 +479,4 @@ copy_last_3_and_return: ldmfd sp!, {r0, r4, lr} bx lr +#endif /* defined(__thumb2__) || !defined(__thumb__) */ diff --git a/src/string/arm/memcpy_thumb1.c b/src/string/arm/memcpy_thumb1.c new file mode 100755 index 00000000..f8ca3f13 --- /dev/null +++ b/src/string/arm/memcpy_thumb1.c @@ -0,0 +1,5 @@ +#if defined(__thumb__) && !defined(__thumb2__) + +#include "../memcpy.c" + +#endif diff --git a/src/thread/arm/__set_thread_area.c b/src/thread/arm/__set_thread_area.c index 09de65aa..a4fb2cbb 100644 --- a/src/thread/arm/__set_thread_area.c +++ b/src/thread/arm/__set_thread_area.c @@ -6,27 +6,54 @@ #define HWCAP_TLS (1 << 15) extern hidden const unsigned char - __a_barrier_oldkuser[], __a_barrier_v6[], __a_barrier_v7[], - __a_cas_v6[], __a_cas_v7[], - __a_gettp_cp15[]; + __a_barrier_oldkuser[], __a_barrier_v6[], __a_barrier_v7[], __a_barrier_m[], + __a_cas_v6[], __a_cas_v7[], __a_cas_m[], __a_cas_intmask_m[], + __a_gettp_cp15[], __a_gettp_cp15_m[]; #define __a_barrier_kuser 0xffff0fa0 #define __a_barrier_oldkuser (uintptr_t)__a_barrier_oldkuser #define __a_barrier_v6 (uintptr_t)__a_barrier_v6 #define __a_barrier_v7 (uintptr_t)__a_barrier_v7 +#define __a_barrier_m (uintptr_t)__a_barrier_m #define __a_cas_kuser 0xffff0fc0 #define __a_cas_v6 (uintptr_t)__a_cas_v6 #define __a_cas_v7 (uintptr_t)__a_cas_v7 +#define __a_cas_m (uintptr_t)__a_cas_m +#define __a_cas_intmask_m (uintptr_t)__a_cas_intmask_m #define __a_gettp_kuser 0xffff0fe0 #define __a_gettp_cp15 (uintptr_t)__a_gettp_cp15 +#define __a_gettp_cp15_m (uintptr_t)__a_gettp_cp15_m extern hidden uintptr_t __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr; +static long __a_gettp_syscall(void) +{ + return __syscall(__ARM_NR_get_tls); +} + int __set_thread_area(void *p) { -#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7 +#if __ARM_ARCH_PROFILE == 'M' + __a_cas_ptr = __a_cas_m; + __a_barrier_ptr = __a_barrier_m; + + if (__hwcap & HWCAP_TLS) { + __a_gettp_ptr = __a_gettp_cp15_m; + } else { + size_t *aux; + __a_gettp_ptr = __a_gettp_syscall; + for (aux=libc.auxv; *aux; aux+=2) { + if (*aux != AT_PLATFORM) continue; + const char *s = (void *)aux[1]; + if (s[0]=='v' && s[1]=='6') { + __a_cas_ptr = __a_cas_intmask_m; + break; + } + } + } +#elif !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7 if (__hwcap & HWCAP_TLS) { size_t *aux; __a_cas_ptr = __a_cas_v7; diff --git a/src/thread/arm/__unmapself.c b/src/thread/arm/__unmapself.c new file mode 100755 index 00000000..d22a760f --- /dev/null +++ b/src/thread/arm/__unmapself.c @@ -0,0 +1,21 @@ +#if __ARM_ARCH_PROFILE != 'M' + +#include "pthread_impl.h" + +void __unmapself(void *base, size_t size) +{ + register void *r0 __asm__("r0") = base; + register size_t r1 __asm__("r1") = size; + __asm__ __volatile__ ( + " movs r7,#91 \n" + " svc 0 \n" + " movs r7,#1 \n" + " svc 0 \n" + :: "r"(r0), "r"(r1)); +} + +#else + +#include "../__unmapself.c" + +#endif diff --git a/src/thread/arm/__unmapself.s b/src/thread/arm/__unmapself.s deleted file mode 100644 index 29c2d07b..00000000 --- a/src/thread/arm/__unmapself.s +++ /dev/null @@ -1,9 +0,0 @@ -.syntax unified -.text -.global __unmapself -.type __unmapself,%function -__unmapself: - mov r7,#91 - svc 0 - mov r7,#1 - svc 0 diff --git a/src/thread/arm/atomics.s b/src/thread/arm/atomics.s index da50508d..5fd77ad9 100644 --- a/src/thread/arm/atomics.s +++ b/src/thread/arm/atomics.s @@ -11,6 +11,8 @@ __a_barrier_dummy: .hidden __a_barrier_oldkuser .type __a_barrier_oldkuser,%function __a_barrier_oldkuser: + .arch armv6 + .arm push {r0,r1,r2,r3,ip,lr} mov r1,r0 mov r2,sp @@ -25,6 +27,7 @@ __a_barrier_oldkuser: .type __a_barrier_v6,%function __a_barrier_v6: .arch armv6t2 + .arm mcr p15,0,r0,c7,c10,5 bx lr @@ -33,24 +36,38 @@ __a_barrier_v6: .type __a_barrier_v7,%function __a_barrier_v7: .arch armv7-a + .arm dmb ish bx lr +.global __a_barrier_m +.hidden __a_barrier_m +.type __a_barrier_m,%function +__a_barrier_m: + .thumb + .arch armv6-m + dmb + bx lr + .global __a_cas_dummy .hidden __a_cas_dummy .type __a_cas_dummy,%function __a_cas_dummy: + .arch armv7-a + .arm mov r3,r0 ldr r0,[r2] subs r0,r3,r0 - streq r1,[r2] - bx lr + bne 1f + str r1,[r2] +1: bx lr .global __a_cas_v6 .hidden __a_cas_v6 .type __a_cas_v6,%function __a_cas_v6: .arch armv6t2 + .arm mov r3,r0 mcr p15,0,r0,c7,c10,5 1: ldrex r0,[r2] @@ -66,6 +83,7 @@ __a_cas_v6: .type __a_cas_v7,%function __a_cas_v7: .arch armv7-a + .arm mov r3,r0 dmb ish 1: ldrex r0,[r2] @@ -76,10 +94,55 @@ __a_cas_v7: dmb ish bx lr +.global __a_cas_m +.hidden __a_cas_m +.type __a_cas_m,%function +__a_cas_m: + .thumb + .arch armv7-m + mov r3,r0 + dmb +1: ldrex r0,[r2] + subs r0,r3,r0 + bne 1b + strex r0,r1,[r2] + tst r0,r0 + bne 1b + dmb + bx lr + +.global __a_cas_intmask_m +.hidden __a_cas_intmask_m +.type __a_cas_intmask_m,%function +__a_cas_intmask_m: + .thumb + .arch armv6-m + mov r3,r0 + dmb + cpsid i +1: ldr r0,[r2] + subs r0,r3,r0 + bne 1b + str r1,[r2] + cpsie i + dmb + bx lr + .global __a_gettp_cp15 .hidden __a_gettp_cp15 .type __a_gettp_cp15,%function __a_gettp_cp15: + .arch armv6 + .arm + mrc p15,0,r0,c13,c0,3 + bx lr + +.global __a_gettp_cp15_m +.hidden __a_gettp_cp15_m +.type __a_gettp_cp15_m,%function +__a_gettp_cp15_m: + .thumb + .arch armv7-m mrc p15,0,r0,c13,c0,3 bx lr diff --git a/src/thread/arm/clone.s b/src/thread/arm/clone.s index bb0965da..33c2e59b 100644 --- a/src/thread/arm/clone.s +++ b/src/thread/arm/clone.s @@ -4,24 +4,26 @@ .hidden __clone .type __clone,%function __clone: - stmfd sp!,{r4,r5,r6,r7} - mov r7,#120 + push {r4,r5,r6,r7} + movs r7,#120 mov r6,r3 mov r5,r0 mov r0,r2 - and r1,r1,#-16 + movs r2,#0 + subs r2,#16 + ands r1,r2 ldr r2,[sp,#16] ldr r3,[sp,#20] ldr r4,[sp,#24] svc 0 tst r0,r0 beq 1f - ldmfd sp!,{r4,r5,r6,r7} + pop {r4,r5,r6,r7} bx lr 1: mov r0,r6 bl 3f -2: mov r7,#1 +2: movs r7,#1 svc 0 b 2b diff --git a/src/thread/arm/syscall_cp.s b/src/thread/arm/syscall_cp.s index e607dd42..421e64f4 100644 --- a/src/thread/arm/syscall_cp.s +++ b/src/thread/arm/syscall_cp.s @@ -11,7 +11,7 @@ .type __syscall_cp_asm,%function __syscall_cp_asm: mov ip,sp - stmfd sp!,{r4,r5,r6,r7} + push {r4,r5,r6,r7} __cp_begin: ldr r0,[r0] cmp r0,#0 @@ -19,11 +19,12 @@ __cp_begin: mov r7,r1 mov r0,r2 mov r1,r3 - ldmfd ip,{r2,r3,r4,r5,r6} + mov r2,ip + ldmfd r2,{r2,r3,r4,r5,r6} svc 0 __cp_end: - ldmfd sp!,{r4,r5,r6,r7} + pop {r4,r5,r6,r7} bx lr __cp_cancel: - ldmfd sp!,{r4,r5,r6,r7} + pop {r4,r5,r6,r7} b __cancel