commit 2cc0400512c53704230c06838efd8870e91fcaf3 Author: Aleksey Cherepanov Date: Fri Sep 14 17:04:40 2012 +0400 experimental speed up of md5 intrinsics for linux-x86-64-native diff --git a/src/sse-intrinsics.c b/src/sse-intrinsics.c index c09d6ad..74aae16 100644 --- a/src/sse-intrinsics.c +++ b/src/sse-intrinsics.c @@ -43,40 +43,31 @@ #ifdef __XOP__ #define MD5_F(x,y,z) \ - MD5_PARA_DO(i) tmp[i] = _mm_cmov_si128((y[i]),(z[i]),(x[i])); + _mm_cmov_si128((y[i]),(z[i]),(x[i])) #else #define MD5_F(x,y,z) \ - MD5_PARA_DO(i) tmp[i] = _mm_xor_si128((y[i]),(z[i])); \ - MD5_PARA_DO(i) tmp[i] = _mm_and_si128((tmp[i]),(x[i])); \ - MD5_PARA_DO(i) tmp[i] = _mm_xor_si128((tmp[i]),(z[i])); + _mm_xor_si128(_mm_and_si128(_mm_xor_si128((y[i]),(z[i])),(x[i])),(z[i])) #endif #ifdef __XOP__ #define MD5_G(x,y,z) \ - MD5_PARA_DO(i) tmp[i] = _mm_cmov_si128((x[i]),(y[i]),(z[i])); + _mm_cmov_si128((x[i]),(y[i]),(z[i])) #else #define MD5_G(x,y,z) \ - MD5_PARA_DO(i) tmp[i] = _mm_xor_si128((y[i]),(x[i])); \ - MD5_PARA_DO(i) tmp[i] = _mm_and_si128((tmp[i]),(z[i])); \ - MD5_PARA_DO(i) tmp[i] = _mm_xor_si128((tmp[i]), (y[i]) ); + _mm_xor_si128(_mm_and_si128(_mm_xor_si128((y[i]),(x[i])),(z[i])), (y[i])) #endif #define MD5_H(x,y,z) \ - MD5_PARA_DO(i) tmp[i] = _mm_xor_si128((y[i]),(z[i])); \ - MD5_PARA_DO(i) tmp[i] = _mm_xor_si128((tmp[i]),(x[i])); + _mm_xor_si128(_mm_xor_si128((y[i]),(z[i])),(x[i])) #define MD5_I(x,y,z) \ - MD5_PARA_DO(i) tmp[i] = _mm_andnot_si128((z[i]), mask); \ - MD5_PARA_DO(i) tmp[i] = _mm_or_si128((tmp[i]),(x[i])); \ - MD5_PARA_DO(i) tmp[i] = _mm_xor_si128((tmp[i]),(y[i])); + _mm_xor_si128(_mm_or_si128(_mm_andnot_si128((z[i]), mask),(x[i])),(y[i])) #define MD5_STEP(f, a, b, c, d, x, t, s) \ MD5_PARA_DO(i) a[i] = _mm_add_epi32( a[i], _mm_set_epi32(t,t,t,t) ); \ - f((b),(c),(d)) \ - MD5_PARA_DO(i) a[i] = _mm_add_epi32( a[i], tmp[i] ); \ - MD5_PARA_DO(i) a[i] = _mm_add_epi32( a[i], data[i*16+x] ); \ - MD5_PARA_DO(i) a[i] = _mm_roti_epi32( a[i], (s) ); \ - MD5_PARA_DO(i) a[i] = _mm_add_epi32( a[i], b[i] ); + MD5_PARA_DO(i) a[i] = _mm_add_epi32( a[i], f((b),(c),(d)) ); \ + MD5_PARA_DO(i) a[i] = _mm_add_epi32( \ + _mm_roti_epi32( _mm_add_epi32(a[i], data[i*16+x]), (s) ), b[i] ); unsigned int debug = 0; @@ -91,7 +82,6 @@ void SSEmd5body(__m128i* data, unsigned int * out, int init) __m128i b[MD5_SSE_PARA]; __m128i c[MD5_SSE_PARA]; __m128i d[MD5_SSE_PARA]; - __m128i tmp[MD5_SSE_PARA]; __m128i mask; unsigned int i;