Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Tue, 20 Jun 2017 22:35:28 +0200
From: Jens Gustedt <Jens.Gustedt@...ia.fr>
To: musl@...ts.openwall.com
Subject: [PATCH 4/8] determine the existence of private futexes at the first
 thread creation

The current strategie to deal with kernels that don't implement private
futexes is to

 - test if the call works with FUTEX_PRIVATE
 - fallback to a call without if it doesn't

This forces an overhead for both sides, Linux'es with and without private
futexes. For those with, it adds a superflouous branch instruction to all
calls. For those without, it add the whole call overhead of a syscall.

The idea of this patch is simple. We determine dynamically but only once
when we switch from 1 thread to multiple threads if private futexes are
implemented. This reduces the overhead to
 - one system call in total
 - one load of a global and an OR of the flags at each futex call

In addition this system call will be as short as possible, because it
futexes on a local variable where nobody is to woken, and all this is in
a context (thread creation) that is clearly dominated by a lot of other
things.

This patch also clears an inconsistency in the code that had
FUTEX_PRIVATE written out as 128 in many places, which made it really
hard to digest and to identify what was going on.
---
 src/internal/pthread_impl.h         | 11 +++++------
 src/thread/__timedwait.c            |  3 +--
 src/thread/__wait.c                 |  5 ++---
 src/thread/pthread_barrier_wait.c   |  3 +--
 src/thread/pthread_cond_timedwait.c |  3 +--
 src/thread/pthread_create.c         | 10 +++++++++-
 6 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index e1dd4421..02929e07 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -126,18 +126,17 @@ void __vm_unlock(void);
 int __timedwait(volatile int *, int, clockid_t, const struct timespec *, int);
 int __timedwait_cp(volatile int *, int, clockid_t, const struct timespec *, int);
 void __wait(volatile int *, volatile int *, int, int);
+extern int __futex_private;
 static inline void __wake(volatile void *addr, int cnt, int priv)
 {
-	if (priv) priv = 128;
+	if (priv) priv = __futex_private;
 	if (cnt<0) cnt = INT_MAX;
-	__syscall(SYS_futex, addr, FUTEX_WAKE|priv, cnt) != -ENOSYS ||
-	__syscall(SYS_futex, addr, FUTEX_WAKE, cnt);
+	__syscall(SYS_futex, addr, FUTEX_WAKE|priv, cnt);
 }
 static inline void __futexwait(volatile void *addr, int val, int priv)
 {
-	if (priv) priv = 128;
-	__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val) != -ENOSYS ||
-	__syscall(SYS_futex, addr, FUTEX_WAIT, val);
+	if (priv) priv = __futex_private;
+	__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val);
 }
 
 void __acquire_ptc(void);
diff --git a/src/thread/__timedwait.c b/src/thread/__timedwait.c
index 13d8465a..7f0f03e6 100644
--- a/src/thread/__timedwait.c
+++ b/src/thread/__timedwait.c
@@ -14,7 +14,7 @@ int __timedwait_cp(volatile int *addr, int val,
 	int r;
 	struct timespec to, *top=0;
 
-	if (priv) priv = 128;
+	if (priv) priv = __futex_private;
 
 	if (at) {
 		if (at->tv_nsec >= 1000000000UL) return EINVAL;
@@ -29,7 +29,6 @@ int __timedwait_cp(volatile int *addr, int val,
 	}
 
 	r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT|priv, val, top);
-	if (r == ENOSYS) r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT, val, top);
 	if (r != EINTR && r != ETIMEDOUT && r != ECANCELED) r = 0;
 
 	return r;
diff --git a/src/thread/__wait.c b/src/thread/__wait.c
index dc33c1a3..a3391b79 100644
--- a/src/thread/__wait.c
+++ b/src/thread/__wait.c
@@ -3,15 +3,14 @@
 void __wait(volatile int *addr, volatile int *waiters, int val, int priv)
 {
 	int spins=100;
-	if (priv) priv = FUTEX_PRIVATE;
+	if (priv) priv = __futex_private;
 	while (spins-- && (!waiters || !*waiters)) {
 		if (*addr==val) a_spin();
 		else return;
 	}
 	if (waiters) a_inc(waiters);
 	while (*addr==val) {
-		__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val, 0) != -ENOSYS
-		|| __syscall(SYS_futex, addr, FUTEX_WAIT, val, 0);
+		__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val, 0);
 	}
 	if (waiters) a_dec(waiters);
 }
diff --git a/src/thread/pthread_barrier_wait.c b/src/thread/pthread_barrier_wait.c
index 06b83db9..a89051e5 100644
--- a/src/thread/pthread_barrier_wait.c
+++ b/src/thread/pthread_barrier_wait.c
@@ -84,8 +84,7 @@ int pthread_barrier_wait(pthread_barrier_t *b)
 			a_spin();
 		a_inc(&inst->finished);
 		while (inst->finished == 1)
-			__syscall(SYS_futex,&inst->finished,FUTEX_WAIT|128,1,0) != -ENOSYS
-			|| __syscall(SYS_futex,&inst->finished,FUTEX_WAIT,1,0);
+			__syscall(SYS_futex,&inst->finished,FUTEX_WAIT|__futex_private,1,0);
 		return PTHREAD_BARRIER_SERIAL_THREAD;
 	}
 
diff --git a/src/thread/pthread_cond_timedwait.c b/src/thread/pthread_cond_timedwait.c
index 3526ecfb..6d92ea88 100644
--- a/src/thread/pthread_cond_timedwait.c
+++ b/src/thread/pthread_cond_timedwait.c
@@ -54,8 +54,7 @@ static inline void unlock_requeue(volatile int *l, volatile int *r, int w)
 {
 	a_store(l, 0);
 	if (w) __wake(l, 1, 1);
-	else __syscall(SYS_futex, l, FUTEX_REQUEUE|128, 0, 1, r) != -ENOSYS
-		|| __syscall(SYS_futex, l, FUTEX_REQUEUE, 0, 1, r);
+	else __syscall(SYS_futex, l, FUTEX_REQUEUE|__futex_private, 0, 1, r);
 }
 
 enum {
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index ca5c6b90..26945022 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -10,6 +10,8 @@ void *__mmap(void *, size_t, int, int, int, off_t);
 int __munmap(void *, size_t);
 int __mprotect(void *, size_t, int);
 
+int __futex_private = 0;
+
 static void dummy_0()
 {
 }
@@ -277,7 +279,13 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 	new->unblock_cancel = self->cancel;
 	new->CANARY = self->CANARY;
 
-	a_inc(&libc.threads_minus_1);
+	if (!a_fetch_add(&libc.threads_minus_1, 1)) {
+          // As long as we only have one thread, test if this supports
+          // private futexes.
+          __lock_t dummy = { 0 };
+          if (__syscall(SYS_futex, dummy.lc, FUTEX_WAKE|FUTEX_PRIVATE, 0) != -ENOSYS)
+		__futex_private = FUTEX_PRIVATE;
+        }
 	ret = __clone((c11 ? start_c11 : start), stack, flags, new, &new->tid, TP_ADJ(new), &new->tid);
 
 	__release_ptc();

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.