diff --git a/src/rawSHA1_ng_fmt.c b/src/rawSHA1_ng_fmt.c index 17dd484..93e3c09 100644 --- a/src/rawSHA1_ng_fmt.c +++ b/src/rawSHA1_ng_fmt.c @@ -98,6 +98,15 @@ E = _mm_add_epi32(E, _mm_roti_epi32(A, 5)); \ } while (false) +#ifdef __XOP__ +#define R3(W, A, B, C, D, E) do { \ + E = _mm_add_epi32(E, K); \ + E = _mm_add_epi32(E, _mm_xor_si128(_mm_cmov_si128(D, B, C), _mm_andnot_si128(D, B))); \ + E = _mm_add_epi32(E, W); \ + B = _mm_roti_epi32(B, 30); \ + E = _mm_add_epi32(E, _mm_roti_epi32(A, 5)); \ +} while (false) +#else #define R3(W, A, B, C, D, E) do { \ E = _mm_add_epi32(E, K); \ E = _mm_add_epi32(E, _mm_or_si128(_mm_and_si128(_mm_or_si128(B, D), C), _mm_and_si128(B, D))); \ @@ -105,6 +114,7 @@ B = _mm_roti_epi32(B, 30); \ E = _mm_add_epi32(E, _mm_roti_epi32(A, 5)); \ } while (false) +#endif #define _MM_TRANSPOSE4_EPI32(R0, R1, R2, R3) do { \ __m128i T0, T1, T2, T3; \ diff --git a/src/sse-intrinsics.c b/src/sse-intrinsics.c index 3880f6d..7aee2eb 100644 --- a/src/sse-intrinsics.c +++ b/src/sse-intrinsics.c @@ -620,11 +620,18 @@ void SSEmd4body(__m128i* data, unsigned int * out, int init) SHA1_PARA_DO(i) tmp[i] = _mm_xor_si128((y[i]),(z[i])); \ SHA1_PARA_DO(i) tmp[i] = _mm_xor_si128((tmp[i]),(x[i])); +#ifdef __XOP__ +#define SHA1_H(x,y,z) \ + SHA1_PARA_DO(i) tmp[i] = _mm_cmov_si128((x[i]),(y[i]),(z[i])); \ + SHA1_PARA_DO(i) tmp2[i] = _mm_andnot_si128((x[i]),(y[i])); \ + SHA1_PARA_DO(i) tmp[i] = _mm_xor_si128((tmp[i]),(tmp2[i])); +#else #define SHA1_H(x,y,z) \ SHA1_PARA_DO(i) tmp[i] = _mm_and_si128((x[i]),(y[i])); \ SHA1_PARA_DO(i) tmp2[i] = _mm_or_si128((x[i]),(y[i])); \ SHA1_PARA_DO(i) tmp2[i] = _mm_and_si128((tmp2[i]),(z[i])); \ - SHA1_PARA_DO(i) tmp[i] = _mm_or_si128((tmp[i]),(tmp2[i])); \ + SHA1_PARA_DO(i) tmp[i] = _mm_or_si128((tmp[i]),(tmp2[i])); +#endif #define SHA1_I(x,y,z) SHA1_G(x,y,z)