Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Sat, 24 Dec 2011 11:32:27 +0100
From: magnum <john.magnum@...hmail.com>
To: john-dev@...ts.openwall.com
Subject: Re: MD5 intrinsics compile-time condition

On 12/24/2011 03:34 AM, magnum wrote:
> On 12/23/2011 04:49 PM, Solar Designer wrote:
>> Apparently, the condition that enables the use of intrinsics is not the
>> same for md5 vs. dynamic_27 and 28, and apparently it is non-optimal for
>> md5 for certain gcc version(s) (I guess Apple's gcc 4.2).
...
> The current code picks PARA 3 (12x) for any gcc other than 4.5. I
> recently tweaked those tests after empirical tests with 4.4, 4.5 and 4.6
> (and clang and icc) - the versions that were available in my Ubuntu repo
> at the time. I suppose PARA 1 (4x) would be the safe choice for any
> untested version and it should always be faster than disabling SSE.

Here is a patch that rather reverts to 4x than disable intrinsics for 
MD5. On some builds, it will probably boost MD5 but ruin some other 
formats until we add more tests for individual gcc versions < 4.4. And 
it's quite possible we actually never have to go below 8x.

BTW, note that 64i and sse2i builds are not affected by this issue at all.

magnum

>From c316c5a87ffa84dcbaadefe2ed06334867f06235 Mon Sep 17 00:00:00 2001
From: magnum <john.magnum@...hmail.com>
Date: Sat, 24 Dec 2011 11:20:43 +0100
Subject: [PATCH 29/29] MD5_std vs intrinsics / arch.h PARA fixes. Never
 disable SSE for MD5, revert to 4x instead.

---
 src/MD5_std.h        |    6 ------
 src/sse-intrinsics.c |    2 --
 src/x86-64.h         |    5 ++++-
 src/x86-ssei.h       |    5 ++++-
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/MD5_std.h b/src/MD5_std.h
index b1609c4..21f323c 100644
--- a/src/MD5_std.h
+++ b/src/MD5_std.h
@@ -58,12 +58,6 @@ typedef struct {
 } MD5_data;
 #endif
 
-#if !defined(MD5_in_sse_intrinsics) && defined(__GNUC__) && \
-    (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) && \
-    !defined(USING_ICC_S_FILE)
-#undef MD5_SSE_PARA
-#endif
-
 #ifdef MD5_SSE_PARA
 # ifndef MMX_COEF
 #  define MMX_COEF			4
diff --git a/src/sse-intrinsics.c b/src/sse-intrinsics.c
index ed2a050..003b1ee 100644
--- a/src/sse-intrinsics.c
+++ b/src/sse-intrinsics.c
@@ -8,9 +8,7 @@
 #include <emmintrin.h>
 #include "memory.h"
 #include "md5.h"
-#define MD5_in_sse_intrinsics
 #include "MD5_std.h"
-#undef MD5_in_sse_intrinsics
 
 #ifndef MMX_COEF
 #define MMX_COEF 4
diff --git a/src/x86-64.h b/src/x86-64.h
index 2e9bd09..9da141f 100644
--- a/src/x86-64.h
+++ b/src/x86-64.h
@@ -186,9 +186,12 @@
 #elif defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 5)
 #define MD5_SSE_PARA			2
 #define MD5_N_STR			"8x"
-#elif defined(__GNUC__)
+#elif defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ == 4 || __GNUC_MINOR__ > 5)))
 #define MD5_SSE_PARA			3
 #define MD5_N_STR			"12x"
+#elif defined(__GNUC__)
+#define MD5_SSE_PARA			1
+#define MD5_N_STR			"4x"
 #else
 #define MD5_SSE_PARA			3
 #define MD5_N_STR			"12x"
diff --git a/src/x86-ssei.h b/src/x86-ssei.h
index 4afd775..6e7822b 100644
--- a/src/x86-ssei.h
+++ b/src/x86-ssei.h
@@ -145,9 +145,12 @@
 #elif defined(__clang__)
 #define MD5_SSE_PARA			4
 #define MD5_N_STR			"16x"
-#elif defined(__GNUC__) || defined (_MSC_VER)
+#elif defined (_MSC_VER) || (defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)))
 #define MD5_SSE_PARA			3
 #define MD5_N_STR			"12x"
+#elif defined(__GNUC__)
+#define MD5_SSE_PARA			1
+#define MD5_N_STR			"4x"
 #else
 #define MD5_SSE_PARA			2
 #define MD5_N_STR			"8x"
-- 
1.7.5.4


Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ