Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Mon, 12 Nov 2007 07:00:21 +0300
From: Solar Designer <solar@...nwall.com>
To: john-users@...ts.openwall.com
Subject: bitslice MD5 (was: bitslice implementation of ORACLE hash cracking)

I wrote:
> > As to SHA-1 (as well as MD4 and MD5, for that matter), bitslice
> > implementations are possible,

On Sun, Nov 11, 2007 at 07:38:02PM +0000, Larry Bonner wrote:
> have you got or know of any code that demonstrates it? to play around with.

I've attached my proof-of-concept bitslice implementation of the MD5
compression function.  Very recent versions of gcc are able to
meaningfully compile this into SSE2 code (and likely AltiVec as well,
but I have not tested that).  The performance is not impressive, but
there's lots of room for improvement.

On Athlon64 3200+ (2.0 GHz) running Linux/x86-64 (Owl-current), I get:

amd!solar:~/md5slice$ gcc md5slice.c -o md5slice -Wall -s -O3 -fomit-frame-pointer -funroll-loops
amd!solar:~/md5slice$ time ./md5slice
vector size = 64 bits
c09c4c1f 21876746 18aed2 70b452f0

real    0m0.463s
user    0m0.460s
sys     0m0.010s

amd!solar:~/md5slice$ PATH=~/gcc-4.1.0/bin:$PATH gcc md5slice.c -o md5slice -Wall -s -O3 -fomit-frame-pointer -funroll-loops -DVECTOR
amd!solar:~/md5slice$ time ./md5slice
vector size = 128 bits
c09c4c1f 21876746 18aed2 70b452f0

real    0m0.388s
user    0m0.390s
sys     0m0.000s

The latter corresponds to roughly 2.5 million computations of the MD5
compression function per second, which is 3 times lower than what the
double-MD5 code currently in JtR achieves on the same system:

amd!solar:~$ john -te -fo=md5
Benchmarking: FreeBSD MD5 [32/64 X2]... DONE
Raw:    7458 c/s real, 7458 c/s virtual

(these numbers need to be multiplied by 1000 to get the number of MD5
compression function computations).

Feel free to experiment with this on Core 2, on AltiVec, with more
recent gcc, and to improve the implementation.

-- 
Alexander Peslyak <solar at openwall.com>
GPG key ID: 5B341F15  fp: B3FB 63F4 D7A3 BCCC 6F6E  FC55 A2FC 027C 5B34 1F15
http://www.openwall.com - bringing security into open computing environments

/*
 * Proof-of-concept bitslice implementation of the MD5 compression function.
 *
 * Written by Solar Designer <solar at openwall.com> in 1997-1998, revised in
 * 2007, and placed in the public domain.  There's absolutely no warranty.
 * Although this is not required formally, due credit will be appreciated if
 * you re-use this code or concepts.
 *
 * Possible compiler invocations (tested on Linux/x86-64):
 *
 * # No vectors (e.g., native 64-bit registers):
 * gcc md5slice.c -o md5slice -Wall -s -O3 -fomit-frame-pointer -funroll-loops
 *
 * # 128-bit vectors with SSE2:
 * PATH=~/gcc-4.1.0/bin:$PATH gcc md5slice.c -o md5slice -Wall -s -O3 -fomit-frame-pointer -funroll-loops -msse2 -DVECTOR
 *
 * The -msse2 option is not required on x86-64 (it is implied), and it needs
 * to be replaced with -maltivec for AltiVec.  You may also add -march=... to
 * match your CPU model.
 *
 * Systems with smaller L1 instruction caches may need -O3 replaced with -O2,
 * or -fno-inline-functions may be passed explicitly.
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

typedef unsigned int scalar;
typedef signed int element;
#ifdef VECTOR
typedef element vector __attribute__ ((vector_size (16)));
#else
typedef unsigned long vector;
#endif

#define S11 7
#define S12 12
#define S13 17
#define S14 22
#define S21 5
#define S22 9
#define S23 14
#define S24 20
#define S31 4
#define S32 11
#define S33 16
#define S34 23
#define S41 6
#define S42 10
#define S43 15
#define S44 21

static void add32(x, y, z)
vector *x, *y, *z;
{
	int i;
	vector a, b, c, p;

	i = 32;
	c ^= c; // c = 0;
	do {
		a = *x++;
		b = *y++;
		*z++ = (p = a ^ b) ^ c;
		c = (p & c) | (a & b);
	} while (--i);
}

static void add32c(c, x, y, z)
scalar c;
vector *x, *y, *z;
{
	int i;
	vector a, b, c1, c2, p, q;

	i = 32;
	c1 ^= c1; c2 ^= c2; // c1 = c2 = 0;
	do {
		a = *x++;
		b = *y++;
		if (c & 1) {
			*z++ = ~(a ^ b) ^ c1 ^ c2;
			c2 = (a & b & (p = c1 | c2)) | (c1 & c2 & (q = a | b));
			c1 = p | q;
		} else {
			*z++ = (q = (p = a ^ b) ^ c1) ^ c2;
			c1 = (p & c1) | (a & b);
			c2 &= q;
		}
		c >>= 1;
	} while (--i);
}

static void F(x, y, z, f)
vector *x, *y, *z, *f;
{
	int i;
	vector a;

	i = 32;
	do {
		a = *z++;
		*f++ = a ^ (*x++ & (*y++ ^ a));
	} while (--i);
}

static void G(x, y, z, f)
vector *x, *y, *z, *f;
{
	F(z, x, y, f);
}

static void H(x, y, z, f)
vector *x, *y, *z, *f;
{
	int i;

	i = 32;
	do {
		*f++ = *x++ ^ *y++ ^ *z++;
	} while (--i);
}

static void I(x, y, z, f)
vector *x, *y, *z, *f;
{
	int i;

	i = 32;
	do {
		*f++ = *y++ ^ (*x++ | ~*z++);
	} while (--i);
}

static void rol32(n, x, y)
int n;
vector *x, *y;
{
	int i, j;

	j = 32 - (i = n);
	do {
		*y++ = x[j++];
	} while (--i);

	i = 32 - n;
	do {
		*y++ = *x++;
	} while (--i);
}

static void const32(x, y)
scalar x;
vector *y;
{
	int i;
	vector zero, ones;

	zero ^= zero;
	ones = ~zero;

	i = 32;
	do {
		if (x & 1)
			*y++ = ones;
		else
			*y++ = zero;
		x >>= 1;
	} while (--i);
}

static void XX(f, a, b, c, d, x, s, ac)
void (*f) (vector *x, vector *y, vector *z, vector *f);
vector *a, *b, *c, *d, *x;
int s;
scalar ac;
{
	vector tmp[32];

	f(b, c, d, tmp);
	add32c(ac, x, tmp, tmp);
	add32(a, tmp, tmp);
	rol32(s, tmp, a);
	add32(b, a, a);
}

#define FF(a, b, c, d, x, s, ac) XX(F, a, b, c, d, x, s, ac)
#define GG(a, b, c, d, x, s, ac) XX(G, a, b, c, d, x, s, ac)
#define HH(a, b, c, d, x, s, ac) XX(H, a, b, c, d, x, s, ac)
#define II(a, b, c, d, x, s, ac) XX(I, a, b, c, d, x, s, ac)

static union {
	vector v[4][32];
	element e[4][32][sizeof(vector) / sizeof(element)];
} state;
static vector init[4][32];

static void md5_init(void)
{
	const32(0x67452301, init[0]);
	const32(0xefcdab89, init[1]);
	const32(0x98badcfe, init[2]);
	const32(0x10325476, init[3]);
}

static void md5_xform(x)
vector x[16][32];
{
	vector a[32], b[32], c[32], d[32];

	memcpy(a, init[0], sizeof(a));
	memcpy(b, init[1], sizeof(b));
	memcpy(c, init[2], sizeof(c));
	memcpy(d, init[3], sizeof(d));

	/* Round 1 */
	FF(a, b, c, d, x[0], S11, 0xd76aa478);	/* 1 */
	FF(d, a, b, c, x[1], S12, 0xe8c7b756);	/* 2 */
	FF(c, d, a, b, x[2], S13, 0x242070db);	/* 3 */
	FF(b, c, d, a, x[3], S14, 0xc1bdceee);	/* 4 */
	FF(a, b, c, d, x[4], S11, 0xf57c0faf);	/* 5 */
	FF(d, a, b, c, x[5], S12, 0x4787c62a);	/* 6 */
	FF(c, d, a, b, x[6], S13, 0xa8304613);	/* 7 */
	FF(b, c, d, a, x[7], S14, 0xfd469501);	/* 8 */
	FF(a, b, c, d, x[8], S11, 0x698098d8);	/* 9 */
	FF(d, a, b, c, x[9], S12, 0x8b44f7af);	/* 10 */
	FF(c, d, a, b, x[10], S13, 0xffff5bb1);	/* 11 */
	FF(b, c, d, a, x[11], S14, 0x895cd7be);	/* 12 */
	FF(a, b, c, d, x[12], S11, 0x6b901122);	/* 13 */
	FF(d, a, b, c, x[13], S12, 0xfd987193);	/* 14 */
	FF(c, d, a, b, x[14], S13, 0xa679438e);	/* 15 */
	FF(b, c, d, a, x[15], S14, 0x49b40821);	/* 16 */

	/* Round 2 */
	GG(a, b, c, d, x[1], S21, 0xf61e2562);	/* 17 */
	GG(d, a, b, c, x[6], S22, 0xc040b340);	/* 18 */
	GG(c, d, a, b, x[11], S23, 0x265e5a51);	/* 19 */
	GG(b, c, d, a, x[0], S24, 0xe9b6c7aa);	/* 20 */
	GG(a, b, c, d, x[5], S21, 0xd62f105d);	/* 21 */
	GG(d, a, b, c, x[10], S22, 0x2441453);	/* 22 */
	GG(c, d, a, b, x[15], S23, 0xd8a1e681);	/* 23 */
	GG(b, c, d, a, x[4], S24, 0xe7d3fbc8);	/* 24 */
	GG(a, b, c, d, x[9], S21, 0x21e1cde6);	/* 25 */
	GG(d, a, b, c, x[14], S22, 0xc33707d6);	/* 26 */
	GG(c, d, a, b, x[3], S23, 0xf4d50d87);	/* 27 */
	GG(b, c, d, a, x[8], S24, 0x455a14ed);	/* 28 */
	GG(a, b, c, d, x[13], S21, 0xa9e3e905);	/* 29 */
	GG(d, a, b, c, x[2], S22, 0xfcefa3f8);	/* 30 */
	GG(c, d, a, b, x[7], S23, 0x676f02d9);	/* 31 */
	GG(b, c, d, a, x[12], S24, 0x8d2a4c8a);	/* 32 */

	/* Round 3 */
	HH(a, b, c, d, x[5], S31, 0xfffa3942);	/* 33 */
	HH(d, a, b, c, x[8], S32, 0x8771f681);	/* 34 */
	HH(c, d, a, b, x[11], S33, 0x6d9d6122);	/* 35 */
	HH(b, c, d, a, x[14], S34, 0xfde5380c);	/* 36 */
	HH(a, b, c, d, x[1], S31, 0xa4beea44);	/* 37 */
	HH(d, a, b, c, x[4], S32, 0x4bdecfa9);	/* 38 */
	HH(c, d, a, b, x[7], S33, 0xf6bb4b60);	/* 39 */
	HH(b, c, d, a, x[10], S34, 0xbebfbc70);	/* 40 */
	HH(a, b, c, d, x[13], S31, 0x289b7ec6);	/* 41 */
	HH(d, a, b, c, x[0], S32, 0xeaa127fa);	/* 42 */
	HH(c, d, a, b, x[3], S33, 0xd4ef3085);	/* 43 */
	HH(b, c, d, a, x[6], S34, 0x4881d05);	/* 44 */
	HH(a, b, c, d, x[9], S31, 0xd9d4d039);	/* 45 */
	HH(d, a, b, c, x[12], S32, 0xe6db99e5);	/* 46 */
	HH(c, d, a, b, x[15], S33, 0x1fa27cf8);	/* 47 */
	HH(b, c, d, a, x[2], S34, 0xc4ac5665);	/* 48 */

	/* Round 4 */
	II(a, b, c, d, x[0], S41, 0xf4292244);	/* 49 */
	II(d, a, b, c, x[7], S42, 0x432aff97);	/* 50 */
	II(c, d, a, b, x[14], S43, 0xab9423a7);	/* 51 */
	II(b, c, d, a, x[5], S44, 0xfc93a039);	/* 52 */
	II(a, b, c, d, x[12], S41, 0x655b59c3);	/* 53 */
	II(d, a, b, c, x[3], S42, 0x8f0ccc92);	/* 54 */
	II(c, d, a, b, x[10], S43, 0xffeff47d);	/* 55 */
	II(b, c, d, a, x[1], S44, 0x85845dd1);	/* 56 */
	II(a, b, c, d, x[8], S41, 0x6fa87e4f);	/* 57 */
	II(d, a, b, c, x[15], S42, 0xfe2ce6e0);	/* 58 */
	II(c, d, a, b, x[6], S43, 0xa3014314);	/* 59 */
	II(b, c, d, a, x[13], S44, 0x4e0811a1);	/* 60 */
	II(a, b, c, d, x[4], S41, 0xf7537e82);	/* 61 */
	II(d, a, b, c, x[11], S42, 0xbd3af235);	/* 62 */
	II(c, d, a, b, x[2], S43, 0x2ad7d2bb);	/* 63 */
	II(b, c, d, a, x[9], S44, 0xeb86d391);	/* 64 */

	add32(a, init[0], state.v[0]);
	add32(b, init[1], state.v[1]);
	add32(c, init[2], state.v[2]);
	add32(d, init[3], state.v[3]);
}

int main(void)
{
	vector x[16][32];
	scalar y[4];
	int i, j;

	printf("vector size = %u bits\n", (unsigned int)sizeof(vector) * 8);

	md5_init();

	for (i = 0; i < 16; i++)
		const32(0xf4f4f4f4, x[i]);
	md5_xform(x);

	memset(y, 0, sizeof(y));
	for (i = 0; i < 4; i++)
		for (j = 0; j < 32; j++)
			y[i] |= (state.e[i][j][0] & 1) << j;

	printf("%x %x %x %x\n", y[0], y[1], y[2], y[3]);

	i = 1000000 / (sizeof(vector) * 8);
	do
		md5_xform(x);
	while (--i);

	return 0;
}


-- 
To unsubscribe, e-mail john-users-unsubscribe@...ts.openwall.com and reply
to the automated confirmation request that will be sent to you.

Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ