/* * Proof-of-concept bitslice implementation of the MD5 compression function. * * Written by Solar Designer in 1997-1998, revised in * 2007,2011, and placed in the public domain. There's absolutely no warranty. * Although this is not required formally, due credit will be appreciated if * you re-use this code or concepts. * * Possible compiler invocations (tested on Linux/x86-64): * * # No vectors (e.g., native 64-bit registers): * gcc md5slice.c -o md5slice -Wall -s -O3 -fomit-frame-pointer -funroll-loops * * # 128-bit vectors with SSE2: * PATH=~/gcc-4.1.0/bin:$PATH gcc md5slice.c -o md5slice -Wall -s -O3 -fomit-frame-pointer -funroll-loops -msse2 -DVECTOR * * The -msse2 option is not required on x86-64 (it is implied), and it needs * to be replaced with -maltivec for AltiVec. You may also add -march=... to * match your CPU model. * * Systems with smaller L1 instruction caches may need -O3 replaced with -O2, * or -fno-inline-functions may be passed explicitly. */ #include #include #include #define MAYBE_INLINE1 __attribute__((always_inline)) //#define MAYBE_INLINE1 #define MAYBE_INLINE2 __attribute__((always_inline)) //#define MAYBE_INLINE2 //#define MAYBE_INLINE3 __attribute__((always_inline)) #define MAYBE_INLINE3 #define MAYBE_INLINE4 __attribute__((always_inline)) //#define MAYBE_INLINE4 typedef unsigned int scalar; typedef signed int element; #ifdef VECTOR typedef element vector __attribute__ ((vector_size (16))); #else typedef unsigned long vector; #endif #define S11 7 #define S12 12 #define S13 17 #define S14 22 #define S21 5 #define S22 9 #define S23 14 #define S24 20 #define S31 4 #define S32 11 #define S33 16 #define S34 23 #define S41 6 #define S42 10 #define S43 15 #define S44 21 static MAYBE_INLINE1 void add32(x, y, z) vector *x, *y, *z; { int i; vector a, b, c, p; i = 32; c ^= c; // c = 0; do { a = *x++; b = *y++; *z++ = (p = a ^ b) ^ c; c = (p & c) | (a & b); } while (--i); } static MAYBE_INLINE1 void add32c(c, x, y, z) scalar c; vector *x, *y, *z; { int i; vector a, b, c1, c2, p, q; i = 32; c1 ^= c1; c2 ^= c2; // c1 = c2 = 0; do { a = *x++; b = *y++; if (c & 1) { *z++ = ~(a ^ b) ^ c1 ^ c2; c2 = (a & b & (p = c1 | c2)) | (c1 & c2 & (q = a | b)); c1 = p | q; } else { *z++ = (q = (p = a ^ b) ^ c1) ^ c2; c1 = (p & c1) | (a & b); c2 &= q; } c >>= 1; } while (--i); } static MAYBE_INLINE1 void add32r(n, x, y, z) int n; vector *x, *y, *z; { int i; vector *yr, a, b, c, p; i = n; n = 32 - n; yr = y + n; c ^= c; // c = 0; do { a = *x++; b = *yr++; *z++ = (p = a ^ b) ^ c; c = (p & c) | (a & b); } while (--i); do { a = *x++; b = *y++; *z++ = (p = a ^ b) ^ c; c = (p & c) | (a & b); } while (--n); } static MAYBE_INLINE2 void F(x, y, z, f) vector *x, *y, *z, *f; { int i; vector a; i = 32; do { a = *z++; *f++ = a ^ (*x++ & (*y++ ^ a)); } while (--i); } static MAYBE_INLINE2 void H(x, y, z, f) vector *x, *y, *z, *f; { int i; i = 32; do { *f++ = *x++ ^ *y++ ^ *z++; } while (--i); } static MAYBE_INLINE2 void I(x, y, z, f) vector *x, *y, *z, *f; { int i; i = 32; do { *f++ = *y++ ^ (*x++ | ~*z++); } while (--i); } static void const32(x, y) scalar x; vector *y; { int i; vector zero, ones; zero ^= zero; ones = ~zero; i = 32; do { if (x & 1) *y++ = ones; else *y++ = zero; x >>= 1; } while (--i); } static MAYBE_INLINE4 void FF(a, b, c, d, x, s, ac) vector *a, *b, *c, *d, *x; int s; scalar ac; { int i, sr = 32 - s; vector tmp[32]; vector c1, c2, c3, c4; c1 ^= c1; c2 ^= c2; c3 ^= c3; for (i = 0; i < sr; i++) { scalar cond = ac & 1; ac >>= 1; if (cond) { vector di = d[i]; vector t = di ^ (b[i] & (c[i] ^ di)); vector xi = x[i]; vector ai = a[i]; vector p, q, r; vector u = ~(xi ^ t) ^ c1 ^ c2; c2 = (xi & t & (p = c1 | c2)) | (c1 & c2 & (q = xi | t)); c1 = p | q; tmp[i] = (r = ai ^ u) ^ c3; c3 = (r & c3) | (ai & u); } else { vector di = d[i]; vector t = di ^ (b[i] & (c[i] ^ di)); vector xi = x[i]; vector ai = a[i]; vector p, q, r; vector u = (q = (p = xi ^ t) ^ c1) ^ c2; c1 = (p & c1) | (xi & t); c2 &= q; tmp[i] = (r = ai ^ u) ^ c3; c3 = (r & c3) | (ai & u); } } c4 ^= c4; for (; i < 32; i++) { scalar cond = ac & 1; ac >>= 1; if (cond) { vector di = d[i]; vector t = di ^ (b[i] & (c[i] ^ di)); vector xi = x[i]; vector ai = a[i]; vector p, q, r; vector u = ~(xi ^ t) ^ c1 ^ c2; vector v; c2 = (xi & t & (p = c1 | c2)) | (c1 & c2 & (q = xi | t)); c1 = p | q; v = (r = ai ^ u) ^ c3; c3 = (r & c3) | (ai & u); { vector bir = b[i - sr]; vector p; a[i - sr] = (p = bir ^ v) ^ c4; c4 = (p & c4) | (bir & v); } } else { vector di = d[i]; vector t = di ^ (b[i] & (c[i] ^ di)); vector xi = x[i]; vector ai = a[i]; vector p, q, r; vector u = (q = (p = xi ^ t) ^ c1) ^ c2; vector v; c1 = (p & c1) | (xi & t); c2 &= q; v = (r = ai ^ u) ^ c3; c3 = (r & c3) | (ai & u); { vector bir = b[i - sr]; vector p; a[i - sr] = (p = bir ^ v) ^ c4; c4 = (p & c4) | (bir & v); } } } for (i = s; i < 32; i++) { vector bi = b[i]; vector tir = tmp[i - s]; vector p; a[i] = (p = bi ^ tir) ^ c4; c4 = (p & c4) | (bi & tir); } } static MAYBE_INLINE3 void FF_S11(a, b, c, d, x, ac) vector *a, *b, *c, *d, *x; scalar ac; { FF(a, b, c, d, x, S11, ac); } static MAYBE_INLINE3 void FF_S12(a, b, c, d, x, ac) vector *a, *b, *c, *d, *x; scalar ac; { FF(a, b, c, d, x, S12, ac); } static MAYBE_INLINE3 void FF_S13(a, b, c, d, x, ac) vector *a, *b, *c, *d, *x; scalar ac; { FF(a, b, c, d, x, S13, ac); } static MAYBE_INLINE3 void FF_S14(a, b, c, d, x, ac) vector *a, *b, *c, *d, *x; scalar ac; { FF(a, b, c, d, x, S14, ac); } static MAYBE_INLINE3 void GG(a, b, c, d, x, s, ac) vector *a, *b, *c, *d, *x; int s; scalar ac; { vector tmp[32]; F(d, b, c, tmp); add32c(ac, x, tmp, tmp); add32(a, tmp, tmp); add32r(s, b, tmp, a); } static MAYBE_INLINE3 void HH(a, b, c, d, x, s, ac) vector *a, *b, *c, *d, *x; int s; scalar ac; { vector tmp[32]; H(b, c, d, tmp); add32c(ac, x, tmp, tmp); add32(a, tmp, tmp); add32r(s, b, tmp, a); } static MAYBE_INLINE3 void II(a, b, c, d, x, s, ac) vector *a, *b, *c, *d, *x; int s; scalar ac; { vector tmp[32]; I(b, c, d, tmp); add32c(ac, x, tmp, tmp); add32(a, tmp, tmp); add32r(s, b, tmp, a); } static union { vector v[4][32]; element e[4][32][sizeof(vector) / sizeof(element)]; } state; static vector init[4][32]; static void md5_init(void) { const32(0x67452301, init[0]); const32(0xefcdab89, init[1]); const32(0x98badcfe, init[2]); const32(0x10325476, init[3]); } static void md5_xform(x) vector x[16][32]; { vector a[32], b[32], c[32], d[32]; memcpy(a, init[0], sizeof(a)); memcpy(b, init[1], sizeof(b)); memcpy(c, init[2], sizeof(c)); memcpy(d, init[3], sizeof(d)); /* Round 1 */ FF_S11(a, b, c, d, x[0], 0xd76aa478); /* 1 */ FF_S12(d, a, b, c, x[1], 0xe8c7b756); /* 2 */ FF_S13(c, d, a, b, x[2], 0x242070db); /* 3 */ FF_S14(b, c, d, a, x[3], 0xc1bdceee); /* 4 */ FF_S11(a, b, c, d, x[4], 0xf57c0faf); /* 5 */ FF_S12(d, a, b, c, x[5], 0x4787c62a); /* 6 */ FF_S13(c, d, a, b, x[6], 0xa8304613); /* 7 */ FF_S14(b, c, d, a, x[7], 0xfd469501); /* 8 */ FF_S11(a, b, c, d, x[8], 0x698098d8); /* 9 */ FF_S12(d, a, b, c, x[9], 0x8b44f7af); /* 10 */ FF_S13(c, d, a, b, x[10], 0xffff5bb1); /* 11 */ FF_S14(b, c, d, a, x[11], 0x895cd7be); /* 12 */ FF_S11(a, b, c, d, x[12], 0x6b901122); /* 13 */ FF_S12(d, a, b, c, x[13], 0xfd987193); /* 14 */ FF_S13(c, d, a, b, x[14], 0xa679438e); /* 15 */ FF_S14(b, c, d, a, x[15], 0x49b40821); /* 16 */ /* Round 2 */ GG(a, b, c, d, x[1], S21, 0xf61e2562); /* 17 */ GG(d, a, b, c, x[6], S22, 0xc040b340); /* 18 */ GG(c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ GG(b, c, d, a, x[0], S24, 0xe9b6c7aa); /* 20 */ GG(a, b, c, d, x[5], S21, 0xd62f105d); /* 21 */ GG(d, a, b, c, x[10], S22, 0x2441453); /* 22 */ GG(c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ GG(b, c, d, a, x[4], S24, 0xe7d3fbc8); /* 24 */ GG(a, b, c, d, x[9], S21, 0x21e1cde6); /* 25 */ GG(d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ GG(c, d, a, b, x[3], S23, 0xf4d50d87); /* 27 */ GG(b, c, d, a, x[8], S24, 0x455a14ed); /* 28 */ GG(a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ GG(d, a, b, c, x[2], S22, 0xfcefa3f8); /* 30 */ GG(c, d, a, b, x[7], S23, 0x676f02d9); /* 31 */ GG(b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ /* Round 3 */ HH(a, b, c, d, x[5], S31, 0xfffa3942); /* 33 */ HH(d, a, b, c, x[8], S32, 0x8771f681); /* 34 */ HH(c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ HH(b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ HH(a, b, c, d, x[1], S31, 0xa4beea44); /* 37 */ HH(d, a, b, c, x[4], S32, 0x4bdecfa9); /* 38 */ HH(c, d, a, b, x[7], S33, 0xf6bb4b60); /* 39 */ HH(b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ HH(a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ HH(d, a, b, c, x[0], S32, 0xeaa127fa); /* 42 */ HH(c, d, a, b, x[3], S33, 0xd4ef3085); /* 43 */ HH(b, c, d, a, x[6], S34, 0x4881d05); /* 44 */ HH(a, b, c, d, x[9], S31, 0xd9d4d039); /* 45 */ HH(d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ HH(c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ HH(b, c, d, a, x[2], S34, 0xc4ac5665); /* 48 */ /* Round 4 */ II(a, b, c, d, x[0], S41, 0xf4292244); /* 49 */ II(d, a, b, c, x[7], S42, 0x432aff97); /* 50 */ II(c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ II(b, c, d, a, x[5], S44, 0xfc93a039); /* 52 */ II(a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ II(d, a, b, c, x[3], S42, 0x8f0ccc92); /* 54 */ II(c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ II(b, c, d, a, x[1], S44, 0x85845dd1); /* 56 */ II(a, b, c, d, x[8], S41, 0x6fa87e4f); /* 57 */ II(d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ II(c, d, a, b, x[6], S43, 0xa3014314); /* 59 */ II(b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ II(a, b, c, d, x[4], S41, 0xf7537e82); /* 61 */ II(d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ II(c, d, a, b, x[2], S43, 0x2ad7d2bb); /* 63 */ II(b, c, d, a, x[9], S44, 0xeb86d391); /* 64 */ add32(a, init[0], state.v[0]); add32(b, init[1], state.v[1]); add32(c, init[2], state.v[2]); add32(d, init[3], state.v[3]); } int main(void) { vector x[16][32]; scalar y[4]; int i, j; printf("vector size = %u bits\n", (unsigned int)sizeof(vector) * 8); md5_init(); for (i = 0; i < 16; i++) const32(0xf4f4f4f4, x[i]); md5_xform(x); memset(y, 0, sizeof(y)); for (i = 0; i < 4; i++) for (j = 0; j < 32; j++) y[i] |= (state.e[i][j][0] & 1) << j; printf("%x %x %x %x\n", y[0], y[1], y[2], y[3]); i = 10000000 / (sizeof(vector) * 8); do md5_xform(x); while (--i); return 0; }