diff --git a/src/internal/atomic_clang_c11.h b/src/internal/atomic_clang_c11.h new file mode 100644 index 0000000..bb3629d --- /dev/null +++ b/src/internal/atomic_clang_c11.h @@ -0,0 +1,82 @@ +#ifndef _STDATOMIC_CLANG_C11_H_ +#define _STDATOMIC_CLANG_C11_H_ 1 + +#include + +#define ATOMIC_VAR_INIT(...) __VA_ARGS__ +#define atomic_init __c11_atomic_init + +/* Map operations to the special builtins that clang provides. */ + +/* Map all non-explicit macros to the builtin with forced memory order. */ +#define atomic_fetch_add(X, Y) __c11_atomic_fetch_add((X), (Y), memory_order_seq_cst) +#define atomic_fetch_sub(X, Y) __c11_atomic_fetch_sub((X), (Y), memory_order_seq_cst) +#define atomic_fetch_and(X, Y) __c11_atomic_fetch_and((X), (Y), memory_order_seq_cst) +#define atomic_fetch_or(X, Y) __c11_atomic_fetch_or((X), (Y), memory_order_seq_cst) +#define atomic_fetch_xor(X, Y) __c11_atomic_fetch_xor((X), (Y), memory_order_seq_cst) +#define atomic_load(X) __c11_atomic_load((X), memory_order_seq_cst) +#define atomic_store(X, V) __c11_atomic_store((X), (V), memory_order_seq_cst) +#define atomic_exchange(X, V) __c11_atomic_exchange((X), (V), memory_order_seq_cst) +#define atomic_compare_exchange_weak(X, E, V) __c11_atomic_compare_exchange_weak((X), (E), (V), memory_order_seq_cst, memory_order_seq_cst) +#define atomic_compare_exchange_strong(X, E, V) __c11_atomic_compare_exchange_strong((X), (E), (V), memory_order_seq_cst, memory_order_seq_cst) + +/* Map allexplicit macros to the corresponding builtin. */ +#define atomic_fetch_add_explicit __c11_atomic_fetch_add +#define atomic_fetch_sub_explicit __c11_atomic_fetch_sub +#define atomic_fetch_and_explicit __c11_atomic_fetch_and +#define atomic_fetch_or_explicit __c11_atomic_fetch_or +#define atomic_fetch_xor_explicit __c11_atomic_fetch_xor +#define atomic_load_explicit __c11_atomic_load +#define atomic_store_explicit __c11_atomic_store +#define atomic_exchange_explicit __c11_atomic_exchange +#define atomic_compare_exchange_strong_explicit __c11_atomic_compare_exchange_strong +#define atomic_compare_exchange_weak_explicit __c11_atomic_compare_exchange_weak + +#define INSTANTIATE_STUB_LF(N, T) \ +T __impl_fetch_add_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_fetch_add(_X, _V, _mo); \ +} \ +T __impl_fetch_sub_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_fetch_sub(_X, _V, _mo); \ +} \ +T __impl_fetch_and_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_fetch_and(_X, _V, _mo); \ +} \ +T __impl_fetch_xor_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_fetch_xor(_X, _V, _mo); \ +} \ +T __impl_fetch_or_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_fetch_or(_X, _V, _mo); \ +} \ +T __impl_add_fetch_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_fetch_add(_X, _V, _mo) + _V; \ +} \ +T __impl_sub_fetch_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_fetch_sub(_X, _V, _mo) - _V; \ +} \ +T __impl_and_fetch_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_fetch_and(_X, _V, _mo) & _V; \ +} \ +T __impl_xor_fetch_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_fetch_xor(_X, _V, _mo) ^ _V; \ +} \ +T __impl_or_fetch_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_fetch_or(_X, _V, _mo) | _V; \ +} \ +T __impl_load_ ## N(_Atomic(T)* _X, int _mo) { \ + return __c11_atomic_load(_X, _mo); \ +} \ +void __impl_store_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + __c11_atomic_store(_X, _V, _mo); \ +} \ +T __impl_exchange_ ## N(_Atomic(T)* _X, T const _V, int _mo) { \ + return __c11_atomic_exchange(_X, _V, _mo); \ +} \ +_Bool __impl_compare_exchange_ ## N(_Atomic(T)* _X, T* _E, T const _V, int _mos, int _mof) { \ + return __c11_atomic_compare_exchange_strong(_X, _E, _V, _mos, _mof); \ +} \ + INSTANTIATE_STUB_NAND(N, T) + +#define INSTANTIATE_STUB(N, T) INSTANTIATE_STUB_ ## N(T) + +#endif diff --git a/src/internal/atomic_constants.h b/src/internal/atomic_constants.h new file mode 100644 index 0000000..327241b --- /dev/null +++ b/src/internal/atomic_constants.h @@ -0,0 +1,162 @@ +#ifndef _STDATOMIC_ATOMIC_CONSTANTS_H_ +#define _STDATOMIC_ATOMIC_CONSTANTS_H_ 1 + +#include + +#if !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1) || __GNUC__ < 4 +# error "this implementation of stdatomic need support that is compatible with the gcc ABI" +#endif + +/* gcc 4.7 and 4.8 implement atomic operations but not atomic + types. This test is meant to stay simple, we don't know of any + other compiler that fakes to be gcc 4.[78] or 4.[78].x */ +#if !defined(__ATOMIC_RELAXED) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#undef __ATOMIC_FORCE_SYNC +#define __ATOMIC_FORCE_SYNC 1 +#endif + +#ifdef __SIZEOF_INT128__ +# define __UINT128__ 1 +typedef __uint128_t __impl_uint128_t; +#else +# define __UINT128__ 0 +typedef struct { uint64_t a[2]; } __impl_uint128_t; +#endif + +#define __atomic_align(T) \ +(sizeof(T) == 1 ? __alignof__(uint8_t) \ + : (sizeof(T) == 2 ? __alignof__(uint16_t) \ + : (sizeof(T) == 4 ? __alignof__(uint32_t) \ + : ((sizeof(T) == 8) ? __alignof__(uint64_t) \ + : ((sizeof(T) == 16) ? __alignof__(__impl_uint128_t) \ + : __alignof__(T)))))) + +#if __ATOMIC_FORCE_SYNC +/* There is no compiler support for _Atomic type qualification, so we + use the type specifier variant. The idea is to use a one element + array to ensure that such an _Atomic(something) can never be used + in operators. + + Underneath we will use uintXX_t for special cases. To be sure that + no bad things can happen, then, we ensure that the alignment for + these special cases is as wide as possible, namely sizeof the + type. */ +#define _Atomic(T) __typeof__(T volatile[1]) +#define _Atomic_aligned(T) __attribute__ ((__aligned__(__atomic_align(T)))) __typeof__(T[1]) +#endif + +#ifndef __ATOMIC_RELAXED +#define __ATOMIC_RELAXED 0 +#endif +#ifndef __ATOMIC_CONSUME +#define __ATOMIC_CONSUME 1 +#endif +#ifndef __ATOMIC_ACQUIRE +#define __ATOMIC_ACQUIRE 2 +#endif +#ifndef __ATOMIC_RELEASE +#define __ATOMIC_RELEASE 3 +#endif +#ifndef __ATOMIC_ACQ_REL +#define __ATOMIC_ACQ_REL 4 +#endif +#ifndef __ATOMIC_SEQ_CST +#define __ATOMIC_SEQ_CST 5 +#endif + +enum memory_order { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_consume = __ATOMIC_CONSUME, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST, +}; +typedef enum memory_order memory_order; + +#ifndef __GCC_ATOMIC_BOOL_LOCK_FREE +#define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +#define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +#define __GCC_ATOMIC_SHORT_T_LOCK_FREE 2 +# if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16) +# define __GCC_ATOMIC_INT_T_LOCK_FREE 2 +# define __GCC_ATOMIC_LONG_T_LOCK_FREE 2 +# define __GCC_ATOMIC_LLONG_T_LOCK_FREE 2 +# define __GCC_ATOMIC_POINTER_T_LOCK_FREE 2 +# define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +# define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +# define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 +# elsif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) +# define __GCC_ATOMIC_INT_T_LOCK_FREE ((UINT_MAX <= 0xFFFFFFFFFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_LONG_T_LOCK_FREE ((ULONG_MAX <= 0xFFFFFFFFFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_LLONG_T_LOCK_FREE ((ULLONG_MAX <= 0xFFFFFFFFFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_POINTER_T_LOCK_FREE ((UINTPTR_MAX <= 0xFFFFFFFFFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_CHAR16_T_LOCK_FREE ((UINT_LEAST16_MAX <= 0xFFFFFFFFFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_CHAR32_T_LOCK_FREE ((UINT_LEAST32_MAX <= 0xFFFFFFFFFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_WCHAR_T_LOCK_FREE ((WCHAR_MAX <= 0xFFFFFFFFFFFFFFFFU) ? 2 : 0) +# elsif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) +# define __GCC_ATOMIC_INT_T_LOCK_FREE ((UINT_MAX <= 0xFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_LONG_T_LOCK_FREE ((ULONG_MAX <= 0xFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_LLONG_T_LOCK_FREE ((ULLONG_MAX <= 0xFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_POINTER_T_LOCK_FREE ((UINTPTR_MAX <= 0xFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_CHAR16_T_LOCK_FREE ((UINT_LEAST16_MAX <= 0xFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_CHAR32_T_LOCK_FREE ((UINT_LEAST32_MAX <= 0xFFFFFFFFU) ? 2 : 0) +# define __GCC_ATOMIC_WCHAR_T_LOCK_FREE ((WCHAR_MAX <= 0xFFFFFFFFU) ? 2 : 0) +# endif +#endif + + +#define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE +#define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE +#define ATOMIC_SHORT_T_LOCK_FREE __GCC_ATOMIC_SHORT_T_LOCK_FREE +#define ATOMIC_INT_T_LOCK_FREE __GCC_ATOMIC_INT_T_LOCK_FREE +#define ATOMIC_LONG_T_LOCK_FREE __GCC_ATOMIC_LONG_T_LOCK_FREE +#define ATOMIC_LLONG_T_LOCK_FREE __GCC_ATOMIC_LLONG_T_LOCK_FREE + +#define ATOMIC_POINTER_T_LOCK_FREE __GCC_ATOMIC_POINTER_T_LOCK_FREE + +#define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE +#define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE +#define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE + +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 +# define ATOMIC_UINT8_LOCK_FREE 2 +#else +# define ATOMIC_UINT8_LOCK_FREE 0 +#endif + +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 +# define ATOMIC_UINT16_LOCK_FREE 2 +#else +# define ATOMIC_UINT16_LOCK_FREE 0 +#endif + +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 +# define ATOMIC_UINT32_LOCK_FREE 2 +#else +# define ATOMIC_UINT32_LOCK_FREE 0 +#endif + +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +# define ATOMIC_UINT64_LOCK_FREE 2 +#else +# define ATOMIC_UINT64_LOCK_FREE 0 +#endif + +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 +# define ATOMIC_UINT128_LOCK_FREE 2 +#else +# define ATOMIC_UINT128_LOCK_FREE 0 +#endif + + +#define atomic_is_lock_free(O) \ +(sizeof*(O) == 1 ? ATOMIC_UINT8_LOCK_FREE \ + : (sizeof*(O) == 2 ? ATOMIC_UINT16_LOCK_FREE \ + : (sizeof*(O) == 4 ? ATOMIC_UINT32_LOCK_FREE \ + : ((sizeof*(O) == 8) ? ATOMIC_UINT64_LOCK_FREE \ + : ((sizeof*(O) == 16) ? ATOMIC_UINT128_LOCK_FREE \ + : 0))))) + + +#endif diff --git a/src/internal/atomic_fence.h b/src/internal/atomic_fence.h new file mode 100644 index 0000000..162507a --- /dev/null +++ b/src/internal/atomic_fence.h @@ -0,0 +1,29 @@ +#ifndef _STDATOMIC_ATOMIC_FENCE_H_ +#define _STDATOMIC_ATOMIC_FENCE_H_ 1 + +#include + + +void atomic_thread_fence(memory_order mo); + +void atomic_signal_fence(memory_order mo); + +#define kill_dependency(X) \ +({ \ + register __typeof__(X) kill_dependency = (X); \ + kill_dependency; \ + }) + +#ifndef __ATOMIC_FORCE_SYNC +# define atomic_thread_fence(MO) __atomic_thread_fence(MO) +# define atomic_signal_fence(MO) __atomic_signal_fence(MO) +#else +# define atomic_thread_fence(MO) \ +({ \ + if (MO != memory_order_relaxed) __sync_synchronize(); \ + else __asm__ volatile("# relaxed fence"); \ + }) +#define atomic_signal_fence(MO) __asm__ volatile("# signal fence") +#endif + +#endif diff --git a/src/internal/atomic_flag.h b/src/internal/atomic_flag.h new file mode 100644 index 0000000..0d3344b --- /dev/null +++ b/src/internal/atomic_flag.h @@ -0,0 +1,47 @@ +#ifndef _STDATOMIC_ATOMIC_FLAG_H_ +#define _STDATOMIC_ATOMIC_FLAG_H_ 1 + +#include + +#ifndef __GCC_ATOMIC_TEST_AND_SET_TRUEVAL +# define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +#endif + +typedef struct atomic_flag atomic_flag; +struct atomic_flag { + _Bool f; +}; + +_Bool atomic_flag_test_and_set(volatile atomic_flag*); +_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag*, memory_order); +void atomic_flag_clear(volatile atomic_flag*); +void atomic_flag_clear_explicit(volatile atomic_flag*, memory_order); + +#define ATOMIC_FLAG_INIT { .f = 0, } + +#define atomic_flag_test_and_set(A) atomic_flag_test_and_set_explicit((A), memory_order_seq_cst) +#define atomic_flag_clear(A) atomic_flag_clear_explicit((A), memory_order_seq_cst) + +#ifndef __ATOMIC_FORCE_SYNC +# define atomic_flag_test_and_set_explicit(A, MO) (__atomic_test_and_set(&((A)->f), MO) == __GCC_ATOMIC_TEST_AND_SET_TRUEVAL) +# define atomic_flag_clear_explicit(A, MO) __atomic_clear(&(A)->f, MO) +#else +# define atomic_flag_test_and_set_explicit(A, O) \ +({ \ + register _Bool atomic_flag_test_and_set_explicit \ + = (__sync_lock_test_and_set(&(A)->f, __GCC_ATOMIC_TEST_AND_SET_TRUEVAL) == __GCC_ATOMIC_TEST_AND_SET_TRUEVAL); \ + /* gcc guarantees that this was an acquire operation. */ \ + /* synchronize even stronger if we need to */ \ + if ((O) == memory_order_seq_cst) __sync_synchronize(); \ + atomic_flag_test_and_set_explicit; \ + }) +# define atomic_flag_clear_explicit(A, O) \ +({ \ + /* gcc guarantees that this will be a release operation. */ \ + /* synchronize even stronger if we need to */ \ + if ((O) == memory_order_seq_cst) __sync_synchronize(); \ + __sync_lock_release(&(A)->f); \ + }) +#endif + +#endif diff --git a/src/internal/atomic_gcc_atomic.h b/src/internal/atomic_gcc_atomic.h new file mode 100644 index 0000000..e030b85 --- /dev/null +++ b/src/internal/atomic_gcc_atomic.h @@ -0,0 +1,107 @@ +#ifndef _STDATOMIC_GCC_ATOMIC_H_ +#define _STDATOMIC_GCC_ATOMIC_H_ 1 + +#include + +#define ATOMIC_VAR_INIT(...) __VA_ARGS__ +#define atomic_init(X, V) ((void)((*(X))=(V))) + +/* Map all non-explicit macros to the explicit version. */ +#define atomic_fetch_add(X, Y) atomic_fetch_add_explicit((X), (Y), memory_order_seq_cst) +#define atomic_fetch_sub(X, Y) atomic_fetch_sub_explicit((X), (Y), memory_order_seq_cst) +#define atomic_fetch_and(X, Y) atomic_fetch_and_explicit((X), (Y), memory_order_seq_cst) +#define atomic_fetch_or(X, Y) atomic_fetch_or_explicit((X), (Y), memory_order_seq_cst) +#define atomic_fetch_xor(X, Y) atomic_fetch_xor_explicit((X), (Y), memory_order_seq_cst) +#define atomic_load(X) atomic_load_explicit((X), memory_order_seq_cst) +#define atomic_store(X, V) atomic_store_explicit((X), (V), memory_order_seq_cst) +#define atomic_exchange(X, V) atomic_exchange_explicit((X), (V), memory_order_seq_cst) +#define atomic_compare_exchange_weak(X, E, V) atomic_compare_exchange_weak_explicit((X), (E), (V), memory_order_seq_cst, memory_order_seq_cst) +#define atomic_compare_exchange_strong(X, E, V) atomic_compare_exchange_strong_explicit((X), (E), (V), memory_order_seq_cst, memory_order_seq_cst) + +/* Map allexplicit macros to the corresponding builtin. */ +/* The arithmetic operations don't have to use a memory operand. */ +#define atomic_fetch_add_explicit(X, Y, MO) __atomic_fetch_add((X), (Y), (MO)) +#define atomic_fetch_sub_explicit(X, Y, MO) __atomic_fetch_sub((X), (Y), (MO)) +#define atomic_fetch_and_explicit(X, Y, MO) __atomic_fetch_and((X), (Y), (MO)) +#define atomic_fetch_or_explicit(X, Y, MO) __atomic_fetch_or((X), (Y), (MO)) +#define atomic_fetch_xor_explicit(X, Y, MO) __atomic_fetch_xor((X), (Y), (MO)) + +/* The interfaces for the universal functions need to operate on + memory operands, only. */ + +#define atomic_load_explicit(X, MO) \ +({ \ + __atyp(*X) _r; \ + __atomic_load((X), _r, (MO)); \ + __aret(_r[0]); \ + }) + +#define atomic_store_explicit(X, V, MO) \ + __atomic_store((X), &__atmp(*X, V), (MO)) + +#define atomic_exchange_explicit(X, V, MO) \ +({ \ + __atyp(*X) _r; \ + __atomic_exchange((X), &__atmp(*X, V), _r, (MO)); \ + __aret(_r[0]); \ + }) + +#define atomic_compare_exchange_weak_explicit(X, E, V, MOS, MOF) \ + __atomic_compare_exchange((X), (E), &__atmp(*(X), (V)), 1, (MOS), (MOF)) + +#define atomic_compare_exchange_strong_explicit(X, E, V, MOS, MOF) \ + __atomic_compare_exchange((X), (E), &__atmp(*(X), (V)), 0, (MOS), (MOF)) + + +#define INSTANTIATE_STUB_LF(N, T) \ +T __impl_fetch_add_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_fetch_add(X, V, M); \ +} \ +T __impl_fetch_sub_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_fetch_sub(X, V, M); \ +} \ +T __impl_fetch_and_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_fetch_and(X, V, M); \ +} \ +T __impl_fetch_xor_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_fetch_xor(X, V, M); \ +} \ +T __impl_fetch_nand_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_fetch_nand(X, V, M); \ +} \ +T __impl_fetch_or_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_fetch_or(X, V, M); \ +} \ +T __impl_add_fetch_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_add_fetch(X, V, M); \ +} \ +T __impl_sub_fetch_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_sub_fetch(X, V, M); \ +} \ +T __impl_and_fetch_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_and_fetch(X, V, M); \ +} \ +T __impl_xor_fetch_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_xor_fetch(X, V, M); \ +} \ +T __impl_nand_fetch_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_nand_fetch(X, V, M); \ +} \ +T __impl_or_fetch_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_or_fetch(X, V, M); \ +} \ +T __impl_load_ ## N(_Atomic(T)* X, int M) { \ + return __atomic_load_n(X, M); \ +} \ +void __impl_store_ ## N(_Atomic(T)* X, T const V, int M) { \ + __atomic_store_n(X, V, M); \ +} \ +T __impl_exchange_ ## N(_Atomic(T)* X, T const V, int M) { \ + return __atomic_exchange_n(X, V, M); \ +} \ +_Bool __impl_compare_exchange_ ## N(_Atomic(T)* X, T* E, T const V, int MS, int MF) { \ + return __atomic_compare_exchange_n(X, E, V, 0, MS, MF); \ +} + + +#endif diff --git a/src/internal/atomic_gcc_sync.h b/src/internal/atomic_gcc_sync.h new file mode 100644 index 0000000..2d42a57 --- /dev/null +++ b/src/internal/atomic_gcc_sync.h @@ -0,0 +1,250 @@ +#ifndef _STDATOMIC_GCC_SYNC_H_ +#define _STDATOMIC_GCC_SYNC_H_ 1 + +#define ATOMIC_VAR_INIT(...) { [0] = __VA_ARGS__, } +#define atomic_init(X, V) ((void)((*(X))[0]=(V))) + +/* Map all non-explicit macros to the explicit version. */ +#define atomic_fetch_add(X, Y) atomic_fetch_add_explicit((X), (Y), memory_order_seq_cst) +#define atomic_fetch_sub(X, Y) atomic_fetch_sub_explicit((X), (Y), memory_order_seq_cst) +#define atomic_fetch_and(X, Y) atomic_fetch_and_explicit((X), (Y), memory_order_seq_cst) +#define atomic_fetch_or(X, Y) atomic_fetch_or_explicit((X), (Y), memory_order_seq_cst) +#define atomic_fetch_xor(X, Y) atomic_fetch_xor_explicit((X), (Y), memory_order_seq_cst) +#define atomic_load(X) atomic_load_explicit((X), memory_order_seq_cst) +#define atomic_store(X, V) atomic_store_explicit((X), (V), memory_order_seq_cst) +#define atomic_exchange(X, V) atomic_exchange_explicit((X), (V), memory_order_seq_cst) +#define atomic_compare_exchange_weak(X, E, V) atomic_compare_exchange_strong_explicit((X), (E), (V), memory_order_seq_cst, memory_order_seq_cst) +#define atomic_compare_exchange_strong(X, E, V) atomic_compare_exchange_strong_explicit((X), (E), (V), memory_order_seq_cst, memory_order_seq_cst) + +/* The argument X is supposed to be pointer to a one element array of + the base type. In evaluation context ``*(X)'' decays to a pointer + to the base type. In __typeof__ context we have to use + ``&(*(X))[0]'' for that. */ +#define atomic_fetch_add_explicit(X, Y, MO) __sync_fetch_and_add(*(X), (Y)) +#define atomic_fetch_sub_explicit(X, Y, MO) __sync_fetch_and_sub(*(X), (Y)) +#define atomic_fetch_and_explicit(X, Y, MO) __sync_fetch_and_or(*(X), (Y)) +#define atomic_fetch_or_explicit(X, Y, MO) __sync_fetch_and_and(*(X), (Y)) +#define atomic_fetch_xor_explicit(X, Y, MO) __sync_fetch_and_xor(*(X), (Y)) + +#define atomic_compare_exchange_weak(X, E, D, MOS, MOF) atomic_compare_exchange_strong((X), (E), (V), (MOS), (MOF)) + +#define INSTANTIATE_STUB_LF(N, T) \ +T __impl_fetch_add_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + return __sync_fetch_and_add(&((*X)[0]), _V); \ +} \ +T __impl_fetch_sub_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + return __sync_fetch_and_sub(&((*X)[0]), _V); \ +} \ +T __impl_fetch_and_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + return __sync_fetch_and_and(&((*X)[0]), _V); \ +} \ +T __impl_fetch_xor_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + return __sync_fetch_and_xor(&((*X)[0]), _V, _mo); \ +} \ +T __impl_fetch_or_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + return __sync_fetch_and_or(&((*X)[0]), _V, _mo); \ +} \ +T __impl_add_fetch_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + return __sync_add_and_fetch(&((*X)[0]), _V); \ +} \ +T __impl_sub_fetch_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + return __sync_sub_and_fetch(&((*X)[0]), _V); \ +} \ +T __impl_and_fetch_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + return __sync_and_and_fetch(&((*X)[0]), _V); \ +} \ +T __impl_xor_fetch_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + return __sync_xor_and_fetch(&((*X)[0]), _V, _mo); \ +} \ +T __impl_or_fetch_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + return __sync_or_and_fetch(&((*X)[0]), _V, _mo); \ +} \ +T __impl_load_ ## N(__typeof__(T volatile[1])* X, int _mo) { \ + return __sync_val_compare_and_swap(&((*X)[0]), 0, 0); \ +} \ +T __impl_exchange_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + T _r = _V, _e; \ + do { \ + _e = _r; \ + _r = __sync_val_compare_and_swap(&((*X)[0]), _e, _V); \ + } while (_r != _e); \ + return _r; \ +} \ +void __impl_store_ ## N(__typeof__(T volatile[1])* X, T const _V, int _mo) { \ + (void)__impl_exchange_ ## N(X, _V, _mo); \ +} \ +_Bool __impl_compare_exchange_ ## N(__typeof__(T volatile[1])* X, T* _E, T const _D, int _mos, int _mof) { \ + T _v = *_E; \ + T _n = __sync_val_compare_and_swap(&((*X)[0]), _v, _D); \ + if (_v != _n) { \ + *_E = _n; \ + return 0; \ + } \ + return 1; \ +} \ + INSTANTIATE_STUB_NAND(N, T) + + +#define atomic_compare_exchange_strong_explicit(X, E, D, MOS, MOF) \ +({ \ + _Bool ret; \ + __typeof__((*X)[0])* _e = (E); \ + __typeof__((*X)[0]) const _d = (D); \ + switch (sizeof _d) { \ + case 8: ret = __sync_val_compare_and_swap((uint64_t*)(X), *_e, (D)); break; \ + case 4: ret = __sync_val_compare_and_swap((uint32_t*)(X), *_e, (D)); break; \ + case 2: ret = __sync_val_compare_and_swap((uint16_t*)(X), *_e, (D)); break; \ + case 1: ret = __sync_val_compare_and_swap((uint8_t*)(X), *_e, (D)); break; \ + default: ret = __impl_compare_exchange(sizeof (*X), (void*)(X), _e, &_d, MOS, MOS); \ + } \ + __aret(ret); \ + }) + +#define __impl_union(T, X) union { __typeof__(*(X)) x; T t; } +#define __impl_union2T(T, X) (((__impl_union(T, X)){ .x = (*(X)), }).t) +#define __impl_union2X(T, X, V) (((__impl_union(T, X)){ .t = (V), }).x) + +#define __impl_load_union(T, X) \ +__impl_union2X(T, X, __sync_val_compare_and_swap((T*)X, 0, 0)) + +#define __impl_exchange_union(T, X, V) \ +({ \ + __impl_union(T, X) _V = { .t = (V), }; \ + T _r = _V.t, _e; \ + do { \ + _e = _r; \ + _r = __sync_val_compare_and_swap((T*)X, _e, _V.t); \ + } while (_r != _e); \ + __impl_union2X(T, X, _r); \ + }) + +#define __impl_store_union(T, X, V) \ +({ \ + __impl_union(T, X) _V = { .t = (V), }; \ + T _r = _V.t, _e; \ + do { \ + _e = _r; \ + _r = __sync_val_compare_and_swap((T*)X, _e, _V.t); \ + } while (_r != _e); \ + }) + +#define __impl_compare_exchange_union(T, X, E, V) \ +({ \ + __typeof__(*E)* _e = (E); \ + __impl_union(T, X) _V = { .x = (V), }; \ + __impl_union(T, X) _E = { .x = *_e, }; \ + __impl_union(T, X) _R = { .t = __sync_val_compare_and_swap((T*)X, _E.t, _V.t), }; \ + _Bool _r = (_E.t == _R.t); \ + if (!_r) _E.x = _R.x; \ + _r; \ + }) + +#define atomic_load_explicit(X, MO) \ +__builtin_choose_expr \ +( \ + __UINT128__ && sizeof(*X)==16, \ + __impl_load_union(__impl_uint128_t, &((*X)[0])), \ +__builtin_choose_expr \ +( \ + sizeof(*X)==8, \ + __impl_load_union(uint64_t, &((*X)[0])), \ +__builtin_choose_expr \ +( \ + sizeof(*X)==4, \ + __impl_load_union(uint32_t, &((*X)[0])), \ + __builtin_choose_expr \ +( \ + sizeof(*X)==2, \ + __impl_load_union(uint16_t, &((*X)[0])), \ + __builtin_choose_expr \ +( \ + sizeof(*X)==1, \ + __impl_load_union(uint8_t, &((*X)[0])), \ + ({ \ + __typeof__((*X)[0]) _r; \ + __impl_load(sizeof _r, (void*)(&((*X)[0])), &_r, MO); \ + _r; \ + })))))) + +#define atomic_store_explicit(X, V, MO) \ +__builtin_choose_expr \ +( \ + __UINT128__ && sizeof(*X)==16, \ + __impl_store_union(__impl_uint128_t, &((*X)[0]), (V)), \ +__builtin_choose_expr \ +( \ + sizeof(*X)==8, \ + __impl_store_union(uint64_t, &((*X)[0]), (V)), \ +__builtin_choose_expr \ +( \ + sizeof(*X)==4, \ + __impl_store_union(uint32_t, &((*X)[0]), (V)), \ + __builtin_choose_expr \ +( \ + sizeof(*X)==2, \ + __impl_store_union(uint16_t, &((*X)[0]), (V)), \ + __builtin_choose_expr \ +( \ + sizeof(*X)==1, \ + __impl_store_union(uint8_t, &((*X)[0]), (V)), \ + ({ \ + __typeof__((*X)[0]) const _v = (V); \ + __impl_store(sizeof _v, &((*X)[0]), &_v, MO); \ + })))))) + +#define atomic_exchange_explicit(X, V, MO) \ +__builtin_choose_expr \ +( \ + __UINT128__ && sizeof(*X)==16, \ + __impl_exchange_union(__impl_uint128_t, &((*(X))[0]), (V)), \ +__builtin_choose_expr \ +( \ + sizeof(*X)==8, \ + __impl_exchange_union(uint64_t, &((*(X))[0]), (V)), \ +__builtin_choose_expr \ +( \ + sizeof(*X)==4, \ + __impl_exchange_union(uint32_t, &((*(X))[0]), (V)), \ + __builtin_choose_expr \ +( \ + sizeof(*X)==2, \ + __impl_exchange_union(uint16_t, &((*(X))[0]), (V)), \ + __builtin_choose_expr \ +( \ + sizeof(*X)==1, \ + __impl_exchange_union(uint8_t, &((*(X))[0]), (V)), \ + ({ \ + __typeof__((*X)[0]) const _v = (V); \ + __typeof__((*X)[0]) _r = (V); \ + __impl_exchange(sizeof _r, (&((*X)[0])), &_r, &_v, MO); \ + _r; \ + })))))) + +#define atomic_compare_exchange_explicit(X, E, V, MOS, MOF) \ +__builtin_choose_expr \ +( \ + __UINT128__ && sizeof(*X)==16, \ + __impl_compare_exchange_union(__impl_uint128_t, &((*(X))[0]), (E), (V)), \ +__builtin_choose_expr \ +( \ + sizeof(*X)==8, \ + __impl_compare_exchange_union(uint64_t, &((*(X))[0]), (E), (V)), \ +__builtin_choose_expr \ +( \ + sizeof(*X)==4, \ + __impl_compare_exchange_union(uint32_t, &((*(X))[0]), (E), (V)), \ + __builtin_choose_expr \ +( \ + sizeof(*X)==2, \ + __impl_compare_exchange_union(uint16_t, &((*(X))[0]), (E), (V)), \ + __builtin_choose_expr \ +( \ + sizeof(*X)==1, \ + __impl_compare_exchange_union(uint8_t, &((*(X))[0]), (E), (V)), \ + ({ \ + __typeof__((*X)[0])* _e = (E); \ + __typeof__((*X)[0]) const _v = (V); \ + __impl_compare_exchange(sizeof _r, (&((*X)[0])), _e, &_v, MOS, MOF); \ + })))))) + +#endif diff --git a/src/internal/atomic_generic.h b/src/internal/atomic_generic.h new file mode 100644 index 0000000..6ec6bb8 --- /dev/null +++ b/src/internal/atomic_generic.h @@ -0,0 +1,10 @@ +#ifndef _STDATOMIC_ATOMIC_GENERIC_H_ +#define _STDATOMIC_ATOMIC_GENERIC_H_ 1 + +void __impl_load (size_t size, void volatile* ptr, void volatile* ret, int mo); +void __impl_store (size_t size, void volatile* ptr, void const volatile* val, int mo); +void __impl_exchange (size_t size, void volatile*__restrict__ ptr, void const volatile* val, void volatile* ret, int mo); +_Bool __impl_compare_exchange (size_t size, void volatile* ptr, void volatile* expected, void const volatile* desired, int mos, int mof); +void __impl_print_stat(void); + +#endif diff --git a/src/internal/atomic_stub.h b/src/internal/atomic_stub.h new file mode 100644 index 0000000..c5e3329 --- /dev/null +++ b/src/internal/atomic_stub.h @@ -0,0 +1,208 @@ +#ifndef _STDATOMIC_STUB_H_ +#define _STDATOMIC_STUB_H_ 1 + +#include + +#define INSTANTIATE_STUB_LCA(N, T) \ +T __impl_fetch_add_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = V; \ + int mof = (MO == memory_order_relaxed \ + ? memory_order_relaxed \ + : memory_order_consume); \ + while (!__impl_compare_exchange(N, X, &E, &R, MO, mof)){ \ + R = E + V; \ + } \ + return E; \ +} \ +T __impl_fetch_sub_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = -V; \ + int mof = (MO == memory_order_relaxed \ + ? memory_order_relaxed \ + : memory_order_consume); \ + while (!__impl_compare_exchange(N, X, &E, &R, MO, mof)){ \ + R = E - V; \ + } \ + return E; \ +} \ +T __impl_fetch_and_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = 0; \ + int mof = (MO == memory_order_relaxed \ + ? memory_order_relaxed \ + : memory_order_consume); \ + while (!__impl_compare_exchange(N, X, &E, &R, MO, mof)){ \ + R = E & V; \ + } \ + return E; \ +} \ +T __impl_fetch_xor_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = V; \ + int mof = (MO == memory_order_relaxed \ + ? memory_order_relaxed \ + : memory_order_consume); \ + while (!__impl_compare_exchange(N, X, &E, &R, MO, mof)){ \ + R = E ^ V; \ + } \ + return E; \ +} \ +T __impl_fetch_or_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = V; \ + int mof = MO == memory_order_relaxed ? memory_order_relaxed : memory_order_consume; \ + while (!__impl_compare_exchange(N, X, &E, &R, MO, mof)){ \ + R = E | V; \ + } \ + return E; \ +} \ +T __impl_add_fetch_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = V; \ + int mof = (MO == memory_order_relaxed \ + ? memory_order_relaxed \ + : memory_order_consume); \ + while (!__impl_compare_exchange(N, X, &E, &R, MO, mof)){ \ + R = E + V; \ + } \ + return R; \ +} \ +T __impl_sub_fetch_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = -V; \ + int mof = (MO == memory_order_relaxed \ + ? memory_order_relaxed \ + : memory_order_consume); \ + while (!__impl_compare_exchange(N, X, &E, &R, MO, mof)){ \ + R = E - V; \ + } \ + return R; \ +} \ +T __impl_and_fetch_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = 0; \ + int mof = (MO == memory_order_relaxed \ + ? memory_order_relaxed \ + : memory_order_consume); \ + while (!__impl_compare_exchange(N, X, &E, &R, MO, mof)){ \ + R = E & V; \ + } \ + return R; \ +} \ +T __impl_xor_fetch_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = V; \ + int mof = (MO == memory_order_relaxed \ + ? memory_order_relaxed \ + : memory_order_consume); \ + while (!__impl_compare_exchange(N, X, &E, &R, MO, mof)){ \ + R = E ^ V; \ + } \ + return R; \ +} \ +T __impl_or_fetch_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = V; \ + int mof = MO == memory_order_relaxed ? memory_order_relaxed : memory_order_consume; \ + while (!__impl_compare_exchange(N, X, &E, &R, MO, mof)){ \ + R = E | V; \ + } \ + return R; \ +} + +#define INSTANTIATE_STUB_NAND(N, T) \ +T __impl_fetch_nand_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = ~0; \ + int mof = (MO == memory_order_relaxed \ + ? memory_order_relaxed \ + : memory_order_consume); \ + while (!atomic_compare_exchange_strong_explicit((_Atomic(T)*)X, &E, R, MO, mof)){ \ + R = ~(E & V); \ + } \ + return E; \ +} \ +T __impl_nand_fetch_ ## N(void volatile* X, T const V, int MO) { \ + T E = 0; \ + T R = ~E; \ + int mof = (MO == memory_order_relaxed \ + ? memory_order_relaxed \ + : memory_order_consume); \ + while (!atomic_compare_exchange_strong_explicit((_Atomic(T)*)X, &E, R, MO, mof)){ \ + R = ~(E & V); \ + } \ + return R; \ +} + + +#define INSTANTIATE_STUB_LCM(N, T) \ +T __impl_load_ ## N(void volatile* X, int MO) { \ + T ret; \ + __impl_load(N, X, &ret, MO); \ + return ret; \ +} \ +void __impl_store_ ## N(void volatile* X, T const V, int MO) { \ + __impl_store(N, X, &V, MO); \ +} \ +T __impl_exchange_ ## N(void volatile* X, T const V, int MO) { \ + T ret; \ + __impl_exchange(N, X, &V, &ret, MO); \ + return ret; \ +} \ +_Bool __impl_compare_exchange_ ## N(void volatile* X, T* E, T const V, int MOS, int MOf) { \ + return __impl_compare_exchange(N, X, E, &V, MOS, MOf); \ +} \ + INSTANTIATE_STUB_NAND(N, T) + +#define INSTANTIATE_STUB_LC(N, T) INSTANTIATE_STUB_LCA(N, T) INSTANTIATE_STUB_LCM(N, T) + + +#define INSTANTIATE_SYNCA(N, T) \ +T __impl_fetch_and_add_ ## N(void volatile* X, T const V) { \ + return __impl_fetch_add_ ## N((_Atomic(T)*)X, V, memory_order_seq_cst); \ +} \ +T __impl_fetch_and_sub_ ## N(void volatile* X, T const V) { \ + return __impl_fetch_sub_ ## N((_Atomic(T)*)X, V, memory_order_seq_cst); \ +} \ +T __impl_fetch_and_and_ ## N(void volatile* X, T const V) { \ + return __impl_fetch_and_ ## N((_Atomic(T)*)X, V, memory_order_seq_cst); \ +} \ +T __impl_fetch_and_or_ ## N(void volatile* X, T const V) { \ + return __impl_fetch_or_ ## N((_Atomic(T)*)X, V, memory_order_seq_cst); \ +} \ +T __impl_fetch_and_xor_ ## N(void volatile* X, T const V) { \ + return __impl_fetch_xor_ ## N((_Atomic(T)*)X, V, memory_order_seq_cst); \ +} \ +T __impl_add_and_fetch_ ## N(void volatile* X, T const V) { \ + return __impl_add_fetch_ ## N((_Atomic(T)*)X, V, memory_order_seq_cst); \ +} \ +T __impl_sub_and_fetch_ ## N(void volatile* X, T const V) { \ + return __impl_sub_fetch_ ## N((_Atomic(T)*)X, V, memory_order_seq_cst); \ +} \ +T __impl_and_and_fetch_ ## N(void volatile* X, T const V) { \ + return __impl_and_fetch_ ## N((_Atomic(T)*)X, V, memory_order_seq_cst); \ +} \ +T __impl_or_and_fetch_ ## N(void volatile* X, T const V) { \ + return __impl_or_fetch_ ## N((_Atomic(T)*)X, V, memory_order_seq_cst); \ +} \ +T __impl_xor_and_fetch_ ## N(void volatile* X, T const V) { \ + return __impl_xor_fetch_ ## N((_Atomic(T)*)X, V, memory_order_seq_cst); \ +} + +#define INSTANTIATE_SYNCM(N, T) \ +_Bool __impl_bool_compare_and_swap_ ## N(void volatile* X, T E, T const V) { \ + T R = E; \ + return __impl_compare_exchange_ ## N((_Atomic(T)*)X, &R, V, \ + memory_order_seq_cst, memory_order_seq_cst); \ +} \ +T __impl_val_compare_and_swap_ ## N(void volatile* X, T E, T const V) { \ + T R = E; \ + __impl_compare_exchange_ ## N((_Atomic(T)*)X, &R, V, \ + memory_order_seq_cst, memory_order_seq_cst); \ + return R; \ +} + +#define INSTANTIATE_SYNC(N, T) INSTANTIATE_SYNCA(N, T) INSTANTIATE_SYNCM(N, T) + +#endif diff --git a/src/internal/atomic_types.h b/src/internal/atomic_types.h new file mode 100644 index 0000000..e873f35 --- /dev/null +++ b/src/internal/atomic_types.h @@ -0,0 +1,46 @@ +#ifndef _STDATOMIC_TYPES_H_ +#define _STDATOMIC_TYPES_ 1 + +#include +#include +#include + +typedef _Atomic(_Bool) atomic_bool; +typedef _Atomic(char) atomic_char; +typedef _Atomic(int) atomic_int; +typedef _Atomic(int_fast16_t) atomic_int_fast16_t; +typedef _Atomic(int_fast32_t) atomic_int_fast32_t; +typedef _Atomic(int_fast64_t) atomic_int_fast64_t; +typedef _Atomic(int_fast8_t) atomic_int_fast8_t; +typedef _Atomic(int_least16_t) atomic_int_least16_t; +typedef _Atomic(int_least32_t) atomic_int_least32_t; +typedef _Atomic(int_least64_t) atomic_int_least64_t; +typedef _Atomic(int_least8_t) atomic_int_least8_t; +typedef _Atomic(intmax_t) atomic_intmax_t; +typedef _Atomic(intptr_t) atomic_intptr_t; +typedef _Atomic(long long) atomic_llong; +typedef _Atomic(long) atomic_long; +typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t; +typedef _Atomic(short) atomic_short; +typedef _Atomic(signed char) atomic_schar; +typedef _Atomic(size_t) atomic_size_t; +typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t; +typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t; +typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t; +typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t; +typedef _Atomic(uint_least16_t) atomic_char16_t; +typedef _Atomic(uint_least16_t) atomic_uint_least16_t; +typedef _Atomic(uint_least32_t) atomic_char32_t; +typedef _Atomic(uint_least32_t) atomic_uint_least32_t; +typedef _Atomic(uint_least64_t) atomic_uint_least64_t; +typedef _Atomic(uint_least8_t) atomic_uint_least8_t; +typedef _Atomic(uintmax_t) atomic_uintmax_t; +typedef _Atomic(uintptr_t) atomic_uintptr_t; +typedef _Atomic(unsigned char) atomic_uchar; +typedef _Atomic(unsigned int) atomic_uint; +typedef _Atomic(unsigned long long) atomic_ullong; +typedef _Atomic(unsigned long) atomic_ulong; +typedef _Atomic(unsigned short) atomic_ushort; +typedef _Atomic(wchar_t) atomic_wchar_t; + +#endif diff --git a/src/internal/stdatomic-impl.h b/src/internal/stdatomic-impl.h new file mode 100644 index 0000000..7aaa0d5 --- /dev/null +++ b/src/internal/stdatomic-impl.h @@ -0,0 +1,75 @@ +#ifndef _STDATOMIC_H_ +#define _STDATOMIC_H_ 1 + +/* Copyright 2015, Jens Gustedt, France. */ + +/** + ** @file + ** + ** @brief An realization of the stdatomic.h interface by means of gcc + ** or clang compiler extensions. + ** + ** This has three different realizations, using intrinsics for modern + ** clang (__c11_atomic ...), modern gcc (__atomic ...) or for the old + ** gcc __sync interface. The later should be available on a lot of + ** platforms, many other compilers, including clang implement these + ** interfaces. + ** + ** For the first two, user code should be able to use all C11 atomic + ** features without problems. + ** + ** For the __sync interface, we can't assume that there is support + ** for operators on atomics, so such code should simply not use + ** them. But the "functional" approach to atomics should work even + ** then. That is code that uses the _Atomic() variant to declare + ** atomic objects and only uses the atomic_... macros as of the C11 + ** standard to act upon these objects should work. + ** + ** The sync code also depends a lot of other gcc extensions to C: + ** + ** - compound expressions + ** - __typeof__ + ** - __alignof__ + ** - __attribute__((aligned(something))) + **/ + + +#include +#include +#include +#include +#include +#include + +/* In some places we need a type that is almost the same as base type + T, but + + - returns a pointer to T in evaluation context + - can't be assigned to + + T can be a type or an expression. +*/ +#define __atyp(T) __typeof__(__typeof__(T)[1]) + +/* To evaluate expressions we sometimes need temporaries of that type + with a certain value. */ +#define __atmp(T, V) (__atyp(T)){ [0] = (V), } + +/* When evaluating lvalues in gcc's compound expressions to return a + value, we want to take care that these lvalues can't be + accidentally be subject to the & operator. Force it to be an + rvalue. */ +#define __aret(V) (1 ? (V) : (V)) + + +#ifdef __ATOMIC_FORCE_SYNC +#include +#elif defined(__clang__) +#include +#else +#include +#endif + +#include + +#endif diff --git a/src/stdatomic/atomic_fence.c b/src/stdatomic/atomic_fence.c new file mode 100644 index 0000000..996286f --- /dev/null +++ b/src/stdatomic/atomic_fence.c @@ -0,0 +1,9 @@ +#include "atomic_fence.h" + +void (atomic_thread_fence)(memory_order mo) { + atomic_thread_fence(mo); +} + +void (atomic_signal_fence)(memory_order mo) { + atomic_signal_fence(mo); +} diff --git a/src/stdatomic/atomic_flag.c b/src/stdatomic/atomic_flag.c new file mode 100644 index 0000000..a27fbe3 --- /dev/null +++ b/src/stdatomic/atomic_flag.c @@ -0,0 +1,17 @@ +#include "atomic_flag.h" + +_Bool (atomic_flag_test_and_set)(volatile atomic_flag* f) { + return atomic_flag_test_and_set(f); +} + +_Bool (atomic_flag_test_and_set_explicit)(volatile atomic_flag* f, memory_order mo) { + return atomic_flag_test_and_set_explicit(f, mo); +} + +void (atomic_flag_clear)(volatile atomic_flag* f) { + atomic_flag_clear(f); +} + +void (atomic_flag_clear_explicit)(volatile atomic_flag* f, memory_order mo) { + atomic_flag_clear_explicit(f, mo); +} diff --git a/src/stdatomic/atomic_futex_lock.c b/src/stdatomic/atomic_futex_lock.c new file mode 100644 index 0000000..ded9eb0 --- /dev/null +++ b/src/stdatomic/atomic_futex_lock.c @@ -0,0 +1,88 @@ +#include "pthread_impl.h" +#include "stdatomic-impl.h" + +/* The HO bit. */ +static unsigned const lockbit = (UINT_MAX/2u)+1u; +static unsigned const contrib = (UINT_MAX/2u)+2u; + +size_t __impl_total = 0; +size_t __impl_fast = 0; +size_t __impl_slow = 0; +size_t __impl_futex = 0; +size_t __impl_again = 0; +size_t __impl_spin = 0; + +#ifdef BENCH +# define ACCOUNT(X, V) (X) += (V) +#else +# define ACCOUNT(X, V) do { } while(0) +#endif + +void __impl_mut_lock_slow(_Atomic(unsigned)* loc) +{ +#ifdef BENCH + size_t slow = 0; + size_t futex = 0; + size_t again = 0; + size_t spin = 0; +#endif + register unsigned spins = 0; + unsigned val = 1+atomic_fetch_add_explicit(loc, 1, memory_order_relaxed); + if (!(val & lockbit)) goto BIT_UNSET; + /* The lock acquisition loop. This has been designed such that the + only possible change that is done inside that loop is setting + the lock bit. This has a double objective. First all atomic + operations are expensive and doing a pair of ++ and -- inside + the loop would just waste memory bandwidth. Then, less changes + to the count, means that other threads that are inside this + same loop are less perturbed. */ + for (;;) { + /* The lock bit is set by someone else, wait until it is + unset. */ + for (spins = 0; spins < 10; ++spins) { + a_spin(); + /* be optimistic and hope that the lock has been released */ + register unsigned des = val-1; + val -= contrib; + if (atomic_compare_exchange_strong_explicit(loc, &val, des, memory_order_acq_rel, memory_order_consume)) + goto FINISH; + if (!(val & lockbit)) goto BIT_UNSET; + } + /* The same inner loop as before, but with futex wait instead of + a_spin. */ + for (;;) { + ACCOUNT(futex, 1); + if (__syscall(SYS_futex, loc, FUTEX_WAIT|FUTEX_PRIVATE, val, 0) == -EAGAIN) + ACCOUNT(again, 1); + /* be optimistic and hope that the lock has been released */ + register unsigned des = val-1; + val -= contrib; + if (atomic_compare_exchange_strong_explicit(loc, &val, des, memory_order_acq_rel, memory_order_consume)) + goto FINISH; + if (!(val & lockbit)) goto BIT_UNSET; + } + /* The lock bit isn't set, try to acquire it. */ + BIT_UNSET: + ACCOUNT(spin, spins); + ACCOUNT(slow, 1); + do { + a_spin(); + if (atomic_compare_exchange_strong_explicit(loc, &val, val|lockbit, memory_order_acq_rel, memory_order_consume)) + goto FINISH; + } while (!(val & lockbit)); + } + FINISH: +#ifdef BENCH + __impl_total += 1; + __impl_slow += slow; + __impl_futex += futex; + __impl_again += again; + __impl_spin += spin; +#endif + return; +} + +void __impl_mut_unlock_slow(_Atomic(unsigned)* loc) +{ + __syscall(SYS_futex, loc, FUTEX_WAKE|FUTEX_PRIVATE, 1); +} diff --git a/src/stdatomic/atomic_generic.c b/src/stdatomic/atomic_generic.c new file mode 100644 index 0000000..c51e4dc --- /dev/null +++ b/src/stdatomic/atomic_generic.c @@ -0,0 +1,242 @@ +#include +#include +#include "stdatomic-impl.h" +#include "atomic_generic.h" +#include "libc.h" + +#ifdef HASH_STAT +# include +# include +#endif + +/* This is compatible with musl's internal locks. */ +/* The lock itself must be lock-free, so in general the can only be an + atomic_flag if we know nothing else about the platform. */ + +typedef _Atomic(unsigned) __impl_lock; +void __impl_mut_lock_slow(_Atomic(unsigned)* loc); +void __impl_mut_unlock_slow(_Atomic(unsigned)* loc); + +static unsigned const contrib = ((UINT_MAX/2u)+2u); + +__attribute__((__always_inline__)) +static inline +void __impl_mut_lock(_Atomic(unsigned)* loc) +{ + if (!atomic_compare_exchange_strong_explicit(loc, (unsigned[1]){ 0 }, contrib, memory_order_acq_rel, memory_order_consume)) + __impl_mut_lock_slow(loc); +} + +__attribute__((__always_inline__)) +static inline +void __impl_mut_unlock(_Atomic(unsigned)* loc) +{ + if (contrib != atomic_fetch_sub_explicit(loc, contrib, memory_order_relaxed)) + __impl_mut_unlock_slow(loc); +} + +/* the size of this table has a trade off between the probability of + collisions (the bigger the table, the better) and the waste of + space (the smaller, the better). */ + +#ifndef HBIT +# define HBIT 8 +#endif +/* len is a power of two such that we just can mask out higher bits */ +enum { LEN = 1< 8*HBIT) x ^= (x / (len*len*len*len*len*len*len*len)); + if (ptrbit > 4*HBIT) x ^= (x / (len*len*len*len)); + if (ptrbit > 2*HBIT) x ^= (x / (len*len)); + if (ptrbit > 1*HBIT) x ^= (x / len); + x %= len; +#ifdef HASH_STAT + atomic_fetch_add_explicit(&draw[x], 1, memory_order_relaxed); +#endif + return x; +} + +__attribute__((__unused__)) +static +unsigned __impl_jenkins_one_at_a_time_hash(void volatile const* k) { + union { + unsigned char b[sizeof k]; + uintptr_t v; + void volatile const* k; + } key = { .k = k, }; + uintptr_t i, x = 0; + for(i = 0; i < sizeof(uintptr_t); ++i) { + x += key.b[i]; + x += (x << 10); + x ^= (x >> 6); + } + x += (x << 3); + x ^= (x >> 11); + x += (x << 15); + x %= LEN; +#ifdef HASH_STAT + atomic_fetch_add_explicit(&draw[x], 1, memory_order_relaxed); +#endif + return x; +} + +__attribute__((__unused__)) +static +uintptr_t __impl_mix(void volatile const* x) { + uintptr_t h = (uintptr_t)x; + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + h %= LEN; +#ifdef HASH_STAT + atomic_fetch_add_explicit(&draw[h], 1, memory_order_relaxed); +#endif + return h; +} + +__attribute__((__unused__)) +static +uintptr_t __impl_8(void volatile const* x) { + uintptr_t h = (uintptr_t)x; + h ^= (h >> 8); + h %= LEN; +#ifdef HASH_STAT + atomic_fetch_add_explicit(&draw[h], 1, memory_order_relaxed); +#endif + return h; +} + +#ifndef __ATOMIC_HASH +# define __ATOMIC_HASH __impl_hash +#endif +#define hash __ATOMIC_HASH + + +void __impl_load (size_t size, void volatile* ptr, void volatile* ret, int mo) { + unsigned pos = hash(ptr); + __impl_mut_lock(&table[pos]); + if (mo == memory_order_seq_cst) + atomic_thread_fence(memory_order_seq_cst); + memcpy((void*)ret, (void*)ptr, size); + __impl_mut_unlock(&table[pos]); +} + +void __impl_store (size_t size, void volatile* ptr, void const volatile* val, int mo) { + unsigned pos = hash(ptr); + __impl_mut_lock(&table[pos]); + memcpy((void*)ptr, (void*)val, size); + if (mo == memory_order_seq_cst) + atomic_thread_fence(memory_order_seq_cst); + __impl_mut_unlock(&table[pos]); +} + +static +void atomic_exchange_restrict (size_t size, void volatile*__restrict__ ptr, void const volatile*__restrict__ val, void volatile*__restrict__ ret, int mo) { + unsigned pos = hash(ptr); + __impl_mut_lock(&table[pos]); + memcpy((void*)ret, (void*)ptr, size); + if (mo == memory_order_seq_cst) + atomic_thread_fence(memory_order_seq_cst); + memcpy((void*)ptr, (void*)val, size); + __impl_mut_unlock(&table[pos]); +} + +void __impl_exchange (size_t size, void volatile*__restrict__ ptr, void const volatile* val, void volatile* ret, int mo) { + if (val == ret) { + unsigned char buffer[size]; + atomic_exchange_restrict(size, ptr, val, buffer, mo); + memcpy((void*)ret, buffer, size); + } else { + atomic_exchange_restrict(size, ptr, val, ret, mo); + } +} + +_Bool __impl_compare_exchange (size_t size, void volatile* ptr, void volatile* expected, void const volatile* desired, int mos, int mof) { + unsigned pos = hash(ptr); + __impl_mut_lock(&table[pos]); + _Bool ret = !memcmp((void*)ptr, (void*)expected, size); + if (ret) { + memcpy((void*)ptr, (void*)desired, size); + if (mos == memory_order_seq_cst) + atomic_thread_fence(memory_order_seq_cst); + } else { + if (mof == memory_order_seq_cst) + atomic_thread_fence(memory_order_seq_cst); + memcpy((void*)expected, (void*)ptr, size); + } + __impl_mut_unlock(&table[pos]); + /* fprintf(stderr, "cas for %p (%zu) at pos %u, %s, exp %p, des %p\n", */ + /* ptr, size, pos, ret ? "suceeded" : "failed", */ + /* expected, desired); */ + return ret; +} + +/* To collect hash statistics about atomics, compile with + ``HASH_STAT'' */ +void __impl_print_stat(void) { +#ifdef HASH_STAT + size_t x1 = 0; + size_t x2 = 0; + size_t min = -1; + size_t max = 0; + size_t i; + for (i = 0; i < LEN; i++) { + size_t val = atomic_load(&draw[i]); + fprintf(stderr, "\t%zu", val); + if ((i % 8) == 7) fputc('\n', stderr); + x1 += val; + x2 += val*val; + if (val < min) min = val; + if (val > max) max = val; + } + fputc('\n', stderr); + double avg1 = (x1+0.0)/LEN; + double avg2 = (x2+0.0)/LEN; + double var = avg2 - avg1*avg1; + fprintf(stderr, "hash utilisation, %d positions, %zu draws: %zu < %e (+%e) < %zu\n", + LEN, x1, min, avg1, sqrt(var), max); +#endif +} + +/* For the four functions defined here, we need two entries in the + symbol table. One will be the final link name of the replacement + stub, something like __atomic_load, e.g. The other one is the + __impl prefixed name. It may eventually be used by the fixed-sized + stub functions, since these can't use the name that corresponds to + the builtin. + + The replacement to the final name is not done within compiling, + since the name clashes can only create conflicts for a C + compiler. Instead, we use an external tool (objcopy) that does the + renaming. + + We want these to be strong aliases, so they can't accidentally be + replaced. Therefore we can't use musl's weak_alias macro but create + one of our own. */ + +#define alias(X, Y) __attribute__((__alias__(#X))) __typeof__(X) Y + +alias(__impl_load, __impl_load_replace); +alias(__impl_store, __impl_store_replace); +alias(__impl_exchange, __impl_exchange_replace); +alias(__impl_compare_exchange, __impl_compare_exchange_replace); diff --git a/src/stdatomic/atomic_generic_1.c b/src/stdatomic/atomic_generic_1.c new file mode 100644 index 0000000..722657b --- /dev/null +++ b/src/stdatomic/atomic_generic_1.c @@ -0,0 +1,10 @@ + +#include "stdatomic-impl.h" + +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 +INSTANTIATE_STUB_LF(1, uint8_t) +#else +INSTANTIATE_STUB_LC(1, uint8_t) +#endif + +INSTANTIATE_SYNC(1, uint8_t) diff --git a/src/stdatomic/atomic_generic_16.c b/src/stdatomic/atomic_generic_16.c new file mode 100644 index 0000000..ac5105f --- /dev/null +++ b/src/stdatomic/atomic_generic_16.c @@ -0,0 +1,15 @@ + +#include "stdatomic-impl.h" + +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 +INSTANTIATE_STUB_LF(16, __impl_uint128_t) +INSTANTIATE_SYNC(16, __impl_uint128_t) +#else +INSTANTIATE_STUB_LCM(16, __impl_uint128_t) +INSTANTIATE_SYNCM(16, __impl_uint128_t) +# if __UINT128__ +INSTANTIATE_STUB_LCA(16, __impl_uint128_t) +INSTANTIATE_SYNCA(16, __impl_uint128_t) +# endif +#endif + diff --git a/src/stdatomic/atomic_generic_2.c b/src/stdatomic/atomic_generic_2.c new file mode 100644 index 0000000..a8244f2 --- /dev/null +++ b/src/stdatomic/atomic_generic_2.c @@ -0,0 +1,10 @@ + +#include "stdatomic-impl.h" + +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 +INSTANTIATE_STUB_LF(2, uint16_t) +#else +INSTANTIATE_STUB_LC(2, uint16_t) +#endif + +INSTANTIATE_SYNC(2, uint16_t) diff --git a/src/stdatomic/atomic_generic_4.c b/src/stdatomic/atomic_generic_4.c new file mode 100644 index 0000000..7b1693f --- /dev/null +++ b/src/stdatomic/atomic_generic_4.c @@ -0,0 +1,10 @@ + +#include "stdatomic-impl.h" + +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 +INSTANTIATE_STUB_LF(4, uint32_t) +#else +INSTANTIATE_STUB_LC(4, uint32_t) +#endif + +INSTANTIATE_SYNC(4, uint32_t) diff --git a/src/stdatomic/atomic_generic_8.c b/src/stdatomic/atomic_generic_8.c new file mode 100644 index 0000000..d652497 --- /dev/null +++ b/src/stdatomic/atomic_generic_8.c @@ -0,0 +1,10 @@ + +#include "stdatomic-impl.h" + +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +INSTANTIATE_STUB_LF(8, uint64_t) +#else +INSTANTIATE_STUB_LC(8, uint64_t) +#endif + +INSTANTIATE_SYNC(8, uint64_t) diff --git a/src/stdatomic/redefine_syms.sh b/src/stdatomic/redefine_syms.sh new file mode 100755 index 0000000..31740c5 --- /dev/null +++ b/src/stdatomic/redefine_syms.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +objects=$(ls *.o) + +for obj in ${objects} ; do + objcopy --redefine-syms=redefine_syms.txt ${obj} tmp.o + mv tmp.o ${obj} + objcopy --redefine-syms=redefine_syms.txt ${obj%.o}.lo tmp.o + mv tmp.o ${obj%.o}.lo +done diff --git a/src/stdatomic/redefine_syms.txt b/src/stdatomic/redefine_syms.txt new file mode 100644 index 0000000..5197a49 --- /dev/null +++ b/src/stdatomic/redefine_syms.txt @@ -0,0 +1,144 @@ +__impl_load_1 __atomic_load_1 +__impl_store_1 __atomic_store_1 +__impl_exchange_1 __atomic_exchange_1 +__impl_compare_exchange_1 __atomic_compare_exchange_1 +__impl_fetch_add_1 __atomic_fetch_add_1 +__impl_fetch_sub_1 __atomic_fetch_sub_1 +__impl_fetch_and_1 __atomic_fetch_and_1 +__impl_fetch_xor_1 __atomic_fetch_xor_1 +__impl_fetch_nand_1 __atomic_fetch_nand_1 +__impl_fetch_or_1 __atomic_fetch_or_1 +__impl_add_fetch_1 __atomic_add_fetch_1 +__impl_sub_fetch_1 __atomic_sub_fetch_1 +__impl_and_fetch_1 __atomic_and_fetch_1 +__impl_xor_fetch_1 __atomic_xor_fetch_1 +__impl_nand_fetch_1 __atomic_nand_fetch_1 +__impl_or_fetch_1 __atomic_or_fetch_1 +__impl_load_2 __atomic_load_2 +__impl_store_2 __atomic_store_2 +__impl_exchange_2 __atomic_exchange_2 +__impl_compare_exchange_2 __atomic_compare_exchange_2 +__impl_fetch_add_2 __atomic_fetch_add_2 +__impl_fetch_sub_2 __atomic_fetch_sub_2 +__impl_fetch_and_2 __atomic_fetch_and_2 +__impl_fetch_xor_2 __atomic_fetch_xor_2 +__impl_fetch_nand_2 __atomic_fetch_nand_2 +__impl_fetch_or_2 __atomic_fetch_or_2 +__impl_add_fetch_2 __atomic_add_fetch_2 +__impl_sub_fetch_2 __atomic_sub_fetch_2 +__impl_and_fetch_2 __atomic_and_fetch_2 +__impl_xor_fetch_2 __atomic_xor_fetch_2 +__impl_nand_fetch_2 __atomic_nand_fetch_2 +__impl_or_fetch_2 __atomic_or_fetch_2 +__impl_load_4 __atomic_load_4 +__impl_store_4 __atomic_store_4 +__impl_exchange_4 __atomic_exchange_4 +__impl_compare_exchange_4 __atomic_compare_exchange_4 +__impl_fetch_add_4 __atomic_fetch_add_4 +__impl_fetch_sub_4 __atomic_fetch_sub_4 +__impl_fetch_and_4 __atomic_fetch_and_4 +__impl_fetch_xor_4 __atomic_fetch_xor_4 +__impl_fetch_nand_4 __atomic_fetch_nand_4 +__impl_fetch_or_4 __atomic_fetch_or_4 +__impl_add_fetch_4 __atomic_add_fetch_4 +__impl_sub_fetch_4 __atomic_sub_fetch_4 +__impl_and_fetch_4 __atomic_and_fetch_4 +__impl_xor_fetch_4 __atomic_xor_fetch_4 +__impl_nand_fetch_4 __atomic_nand_fetch_4 +__impl_or_fetch_4 __atomic_or_fetch_4 +__impl_load_8 __atomic_load_8 +__impl_store_8 __atomic_store_8 +__impl_exchange_8 __atomic_exchange_8 +__impl_compare_exchange_8 __atomic_compare_exchange_8 +__impl_fetch_add_8 __atomic_fetch_add_8 +__impl_fetch_sub_8 __atomic_fetch_sub_8 +__impl_fetch_and_8 __atomic_fetch_and_8 +__impl_fetch_xor_8 __atomic_fetch_xor_8 +__impl_fetch_nand_8 __atomic_fetch_nand_8 +__impl_fetch_or_8 __atomic_fetch_or_8 +__impl_add_fetch_8 __atomic_add_fetch_8 +__impl_sub_fetch_8 __atomic_sub_fetch_8 +__impl_and_fetch_8 __atomic_and_fetch_8 +__impl_xor_fetch_8 __atomic_xor_fetch_8 +__impl_nand_fetch_8 __atomic_nand_fetch_8 +__impl_or_fetch_8 __atomic_or_fetch_8 +__impl_load_16 __atomic_load_16 +__impl_store_16 __atomic_store_16 +__impl_exchange_16 __atomic_exchange_16 +__impl_compare_exchange_16 __atomic_compare_exchange_16 +__impl_fetch_add_16 __atomic_fetch_add_16 +__impl_fetch_sub_16 __atomic_fetch_sub_16 +__impl_fetch_and_16 __atomic_fetch_and_16 +__impl_fetch_xor_16 __atomic_fetch_xor_16 +__impl_fetch_nand_16 __atomic_fetch_nand_16 +__impl_fetch_or_16 __atomic_fetch_or_16 +__impl_add_fetch_16 __atomic_add_fetch_16 +__impl_sub_fetch_16 __atomic_sub_fetch_16 +__impl_and_fetch_16 __atomic_and_fetch_16 +__impl_xor_fetch_16 __atomic_xor_fetch_16 +__impl_nand_fetch_16 __atomic_nand_fetch_16 +__impl_or_fetch_16 __atomic_or_fetch_16 +__impl_bool_compare_and_swap_1 __sync_bool_compare_and_swap_1 +__impl_val_compare_and_swap_1 __sync_val_compare_and_swap_1 +__impl_fetch_and_add_1 __sync_fetch_and_add_1 +__impl_fetch_and_sub_1 __sync_fetch_and_sub_1 +__impl_fetch_and_and_1 __sync_fetch_and_and_1 +__impl_fetch_and_xor_1 __sync_fetch_and_xor_1 +__impl_fetch_and_or_1 __sync_fetch_and_or_1 +__impl_add_and_fetch_1 __sync_add_and_fetch_1 +__impl_sub_and_fetch_1 __sync_sub_and_fetch_1 +__impl_and_and_fetch_1 __sync_and_and_fetch_1 +__impl_xor_and_fetch_1 __sync_xor_and_fetch_1 +__impl_or_and_fetch_1 __sync_or_and_fetch_1 +__impl_bool_compare_and_swap_2 __sync_bool_compare_and_swap_2 +__impl_val_compare_and_swap_2 __sync_val_compare_and_swap_2 +__impl_fetch_and_add_2 __sync_fetch_and_add_2 +__impl_fetch_and_sub_2 __sync_fetch_and_sub_2 +__impl_fetch_and_and_2 __sync_fetch_and_and_2 +__impl_fetch_and_xor_2 __sync_fetch_and_xor_2 +__impl_fetch_and_or_2 __sync_fetch_and_or_2 +__impl_add_and_fetch_2 __sync_add_and_fetch_2 +__impl_sub_and_fetch_2 __sync_sub_and_fetch_2 +__impl_and_and_fetch_2 __sync_and_and_fetch_2 +__impl_xor_and_fetch_2 __sync_xor_and_fetch_2 +__impl_or_and_fetch_2 __sync_or_and_fetch_2 +__impl_bool_compare_and_swap_4 __sync_bool_compare_and_swap_4 +__impl_val_compare_and_swap_4 __sync_val_compare_and_swap_4 +__impl_fetch_and_add_4 __sync_fetch_and_add_4 +__impl_fetch_and_sub_4 __sync_fetch_and_sub_4 +__impl_fetch_and_and_4 __sync_fetch_and_and_4 +__impl_fetch_and_xor_4 __sync_fetch_and_xor_4 +__impl_fetch_and_or_4 __sync_fetch_and_or_4 +__impl_add_and_fetch_4 __sync_add_and_fetch_4 +__impl_sub_and_fetch_4 __sync_sub_and_fetch_4 +__impl_and_and_fetch_4 __sync_and_and_fetch_4 +__impl_xor_and_fetch_4 __sync_xor_and_fetch_4 +__impl_or_and_fetch_4 __sync_or_and_fetch_4 +__impl_bool_compare_and_swap_8 __sync_bool_compare_and_swap_8 +__impl_val_compare_and_swap_8 __sync_val_compare_and_swap_8 +__impl_fetch_and_add_8 __sync_fetch_and_add_8 +__impl_fetch_and_sub_8 __sync_fetch_and_sub_8 +__impl_fetch_and_and_8 __sync_fetch_and_and_8 +__impl_fetch_and_xor_8 __sync_fetch_and_xor_8 +__impl_fetch_and_or_8 __sync_fetch_and_or_8 +__impl_add_and_fetch_8 __sync_add_and_fetch_8 +__impl_sub_and_fetch_8 __sync_sub_and_fetch_8 +__impl_and_and_fetch_8 __sync_and_and_fetch_8 +__impl_xor_and_fetch_8 __sync_xor_and_fetch_8 +__impl_or_and_fetch_8 __sync_or_and_fetch_8 +__impl_bool_compare_and_swap_16 __sync_bool_compare_and_swap_16 +__impl_val_compare_and_swap_16 __sync_val_compare_and_swap_16 +__impl_fetch_and_add_16 __sync_fetch_and_add_16 +__impl_fetch_and_sub_16 __sync_fetch_and_sub_16 +__impl_fetch_and_and_16 __sync_fetch_and_and_16 +__impl_fetch_and_xor_16 __sync_fetch_and_xor_16 +__impl_fetch_and_or_16 __sync_fetch_and_or_16 +__impl_add_and_fetch_16 __sync_add_and_fetch_16 +__impl_sub_and_fetch_16 __sync_sub_and_fetch_16 +__impl_and_and_fetch_16 __sync_and_and_fetch_16 +__impl_xor_and_fetch_16 __sync_xor_and_fetch_16 +__impl_or_and_fetch_16 __sync_or_and_fetch_16 +__impl_load_replace __atomic_load +__impl_store_replace __atomic_store +__impl_exchange_replace __atomic_exchange +__impl_compare_exchange_replace __atomic_compare_exchange diff --git a/src/thread/__lock1.c b/src/thread/__lock1.c new file mode 100644 index 0000000..1a7123d --- /dev/null +++ b/src/thread/__lock1.c @@ -0,0 +1,79 @@ +#include "pthread_impl.h" + +#if INT_MIN == -INT_MAX +# error "this implementation supposes that INT_MIN has only the HO bit set and all others 0" +#endif + +int volatile __cnt_slow = 0; +int volatile __cnt_futex = 0; +int volatile __cnt_again = 0; + +#ifndef NBENCH +# define ACCOUNT(X) a_inc(&(X)) +#else +# define ACCOUNT(X) do { } while(0) +#endif + +/* A lock is an int that is interpreted specially. The HO bit tells if + the lock is already taken by some thread or not. The other bits are + a counter for the number of threads that are inside the critical section. + + If loc is non-negative it holds the number of threads that entered + the critical section. + + If loc is negative (the lock is taken) *loc - INT_MIN is the number + of treads in the critical section, including the lock holder. Or in + other words the number is *loc where the sign bit is zeroed. */ + +void __lock1(int volatile* loc) +{ + if (libc.threads_minus_1) { + int spins; + /* fast path */ + /* -INT_MAX is binary 10000…00001 */ + int val = a_cas(loc, 0, -INT_MAX); + if (!val) return; + val = 1+a_fetch_add(loc, 1); + /* The lock acquisition loop. This has been designed such that the + only possible change that is done inside that loop is setting + the lock bit. This has a double objective. First all atomic + operations are expensive and doing a pair of ++ and -- inside + the loop would just waste memory bandwidth. Then, less changes + to the count, means that other threads that are inside this + same loop are less perturbed. */ + for (;;) { + /* The lock bit isn't yet set. */ + NOLOCK: + while (val >= 0) { + /* This just sets the sign bit without causing overflow. */ + int other = (val-INT_MAX)-1; + other = a_cas(loc, val, other); + if (other == val) return; + else val = other; + } + /* The lock bit is set by someone else. */ + ACCOUNT(__cnt_slow); + for (spins = 0; spins < 100; ++spins) { + a_spin(); + val = *loc; + if (val >= 0) goto NOLOCK; + } + while (val < 0) { + ACCOUNT(__cnt_futex); + if (__syscall(SYS_futex, loc, FUTEX_WAIT|FUTEX_PRIVATE, val, 0) == -EAGAIN) + ACCOUNT(__cnt_again); + val = *loc; + } + } + } +} + +void __unlock1(int volatile* loc) +{ + if (*loc < 0) { + /** *loc must be negative when unlock1 is called, so INT_MAX can + always be added without overflow. */ + if (-INT_MAX != a_fetch_add(loc, INT_MAX)) + __syscall(SYS_futex, loc, FUTEX_WAKE|FUTEX_PRIVATE, 1); + } +}