Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Sun, 15 May 2005 14:47:31 +0200
From: Simon Marechal <simon@...quise.net>
To: john-users@...ts.openwall.com
Subject: Re: LANMAN and NT Hash ?s...basic

Solar Designer wrote:
> I am not sure of that.  It could be more complicated than that
> (registry settings involved, etc.)  I'd rather have someone more
> knowledgeable in Windows comment on this.  Simon?

I think so, the lmhash should be aad3b435b51404eeaad3b435b51404ee.
A quick google reveals from
http://www.microsoft.com/technet/community/chats/trans/windowsnet/wnet_092104.mspx
Host: Jesper (Microsoft)
Q: Can you comment on disabling LM Hashing? Is there any other way than
creating passwords of 14 characters or more?
A: You can disable LM Hashes by using the settings documented in KB
299656. If you can't turn the setting on globally then using passwords
longer than 14 characters works well. You can also use certain Unicode
characters in the password. For instance, if you use characters between
0127 and 0156 you will not get an LM hash. You will break things if you
turn off LM hashes though. Windows cluster services and RTC both use
them. To work around that, turn on NtlmMinClientSec to at least 0x80010.
You will also break Win9x, but I think of that as a security benefit.




If you do a lot of NT crack, you might be interested by the attached
files. It's an mmx/sse2 implementation of md4 applied to NT hashes.

It's a really ugly hack i wrote, it might not work as expected. AFAIK it
works quite well, and is way faster:

simon@...p:~/projets/john/run$ ./john -test -format:NT
Benchmarking: NT MD4 [TridgeMD4]... DONE
Raw:    683249 c/s real, 741051 c/s virtual

simon@...p:~/projets/john/run$ ./john -test -format:NTmmx
Benchmarking: NT MD4 MMX(MMX 2x) [bartavelle]... DONE
Raw:    4027K c/s real, 4525K c/s virtual

simon@...p:~/projets/john/run$ ./john -test -format:NTmmx
Benchmarking: NT MD4 MMX (SSE2 4x) [bartavelle]... DONE
Raw:    6139K c/s real, 6615K c/s virtual

It does not implement the full md4update stuff, so it only works with a
buffer of 64 chars (32 unicoded chars for the password).

I have the same stuff for md5 and sha1 if anybody is interested.

/*
 * NTLM patch for john version 0.3
 *
 * (C) 2001 Olle Segerdahl <olle@....se>
 *
 * license: GPL <http://www.gnu.org/licenses/gpl.html>
 *
 * This file is based on code from John the Ripper,
 * Copyright (c) 1996-99 by Solar Designer
 *
 * performance enhancements by bartavelle@...decon.com
 */

#include <string.h>

#include "arch.h"
#include "memory.h"
#include "common.h"
#include "formats.h"
#include "md4.h"

#ifndef uchar
#define uchar unsigned char
#endif

#define FORMAT_LABEL			"ntmmx"
#define FORMAT_NAME			"NT MD4 MMX"

#define BENCHMARK_COMMENT		MMX_TYPE
#define BENCHMARK_LENGTH		-1

#define PLAINTEXT_LENGTH		32
#define CIPHERTEXT_LENGTH		36


static struct fmt_tests tests[] = {
	{"$NT$b7e4b9022cd45f275334bbdb83bb5be5", "John the Ripper"},
	{"$NT$8846f7eaee8fb117ad06bdd830b7586c", "password"},
	{"$NT$0cb6948805f797bf2a82807973b89537", "test"},
	{"$NT$31d6cfe0d16ae931b73c59d7e0c089c0", ""},
	{NULL}
};

#define ALGORITHM_NAME			"bartavelle"

#define BINARY_SIZE			16
#define SALT_SIZE			0


#define MIN_KEYS_PER_CRYPT		MMX_COEF
#define MAX_KEYS_PER_CRYPT		MMX_COEF

#define GETPOS(i,idx)	( ((i)&0xfffe)*MMX_COEF + ((i)&1) + ((idx)<<1) )

//uchar saved_plain[PLAINTEXT_LENGTH + 1];

static unsigned char saved_plain[64 * MMX_COEF] __attribute__ ((aligned(32)));
static unsigned char tmpbuf[64 * MMX_COEF] __attribute__ ((aligned(32)));
static unsigned char output[BINARY_SIZE*MMX_COEF + 1] __attribute__ ((aligned(32)));
static unsigned char out[32];
static unsigned long total_len;
static int global_watch = 0;

static int valid(char *ciphertext)
{
        char *pos;

	if (strncmp(ciphertext, "$NT$", 4)!=0) return 0;

        for (pos = &ciphertext[4]; atoi16[ARCH_INDEX(*pos)] != 0x7F; pos++);

        if (!*pos && pos - ciphertext == CIPHERTEXT_LENGTH)
		return 1;
        else
        	return 0;

}

static void *get_binary(char *ciphertext)
{
	static uchar binary[BINARY_SIZE];
	int i;

	ciphertext+=4;
	for (i=0; i<BINARY_SIZE; i++)
	{
 		binary[i] = (atoi16[ARCH_INDEX(ciphertext[i*2])])<<4;
 		binary[i] |= (atoi16[ARCH_INDEX(ciphertext[i*2+1])]);
	}

	return binary;
}

static int binary_hash_0(void *binary)
{
	return ((uchar *)binary)[0] & 0x0F;
}

static int binary_hash_1(void *binary)
{
	return ((uchar *)binary)[0];
}

static int binary_hash_2(void *binary)
{
	return (((uchar *)binary)[0] << 4) + (((uchar *)binary)[1] & 0x0F);
}

static int get_hash_0(int index)
{
	return output[index*4] & 0x0F;
}

static int get_hash_1(int index)
{
	return output[index*4];
}

static int get_hash_2(int index)
{
	return (output[index*4] << 4) + (output[index*4+1] & 0x0F);
}

static void crypt_all(int count)
{
#if (MMX_COEF == 2)
        mdfourmmx(output, saved_plain, total_len);
#endif
#if (MMX_COEF == 4)
        mdfoursse2(output, saved_plain, total_len);
#endif
}

static int cmp_all(void *binary, int count)
{
	int i = 0;

	while(i<(BINARY_SIZE/4))
	{
		if (
			( ((unsigned long *)binary)[i] != ((unsigned long *)output)[i*MMX_COEF])
#if (MMX_COEF > 1 )
			&& ( ((unsigned long *)binary)[i] != ((unsigned long *)output)[i*MMX_COEF+1])
#endif
#if (MMX_COEF > 3)
			&& ( ((unsigned long *)binary)[i] != ((unsigned long *)output)[i*MMX_COEF+2])
			&& ( ((unsigned long *)binary)[i] != ((unsigned long *)output)[i*MMX_COEF+3])
#endif	

		)
			return 0;
		i++;
	}
	return 1;
}

static int cmp_one(void * binary, int index)
{
	int i = 0;
	for(i=0;i<(BINARY_SIZE/4);i++)
		if ( ((unsigned long *)binary)[i] != ((unsigned long *)output)[i*MMX_COEF+index] )
			return 0;
	return 1;
}

static int cmp_exact(char *source, int index)
{
	return 1;
}

static void set_salt(void *salt)
{
}

static void set_key(char *key, int index)
{
	int len;
	int i;

	if(index==0)
	{
		total_len = 0;
		memset(saved_plain, 0, 64*MMX_COEF);
	}
	len = strlen(key);
	if(len > 32)
                len = 32;

	total_len += len << (1 + ( (32/MMX_COEF) * index ) );

	for(i=0;i<len;i++)
		((unsigned short *)saved_plain)[ GETPOS(i, index) ] = key[i] ;
	((unsigned short *)saved_plain)[ GETPOS(i, index) ] = 0x80;
}

static char *get_key(int index)
{
	unsigned int s, i;

#if (MMX_COEF == 4)
	s = (total_len >> (1+((32/MMX_COEF)*(index)))) & 0xff;
#else
	if(index == 0)
		s = (total_len & 0xffff) >> 1 ;
	else
		s = total_len >> 17;
#endif
	for(i=0;i<s;i++)
		out[i] = ((unsigned short *)saved_plain)[ GETPOS(i, index) ];
	out[i]=0;
	return out;
}

struct fmt_main fmt_NTmmx = {
	{
		FORMAT_LABEL,
		FORMAT_NAME,
		ALGORITHM_NAME,
		BENCHMARK_COMMENT,
		BENCHMARK_LENGTH,
		PLAINTEXT_LENGTH,
		BINARY_SIZE,
		SALT_SIZE,
		MIN_KEYS_PER_CRYPT,
		MAX_KEYS_PER_CRYPT,
		FMT_CASE | FMT_8_BIT,
		tests
	}, {
		fmt_default_init,
		valid,
		fmt_default_split,
		get_binary,
		fmt_default_salt,
		{
			binary_hash_0,
			binary_hash_1,
			binary_hash_2
		},
		fmt_default_salt_hash,
		set_salt,
		set_key,
		get_key,
		fmt_default_clear_keys,
		crypt_all,
		{
			get_hash_0,
			get_hash_1,
			get_hash_2
		},
		cmp_all,
		cmp_one,
		cmp_exact
	}
};


// extern int mdfourmmx(unsigned char *out, unsigned char *in, int n) __attribute__((regparm(3)));

#ifdef UNDERSCORES
#define mdfourmmx	_mdfourmmx
#endif

.globl mdfourmmx

.data
.align(16)
const_init_a:
.long 0x67452301
.long 0x67452301
const_init_b:
.long 0xefcdab89
.long 0xefcdab89
const_init_c:
.long 0x98badcfe
.long 0x98badcfe
const_init_d:
.long 0x10325476
.long 0x10325476

const_stage2:
.long 0x5a827999
.long 0x5a827999
const_stage3:
.long 0x6ed9eba1
.long 0x6ed9eba1

.align(16)
buffer:
.zero(64*2)

#define ctxa %mm0
#define ctxb %mm1
#define ctxc %mm2
#define ctxd %mm3
#define tmp1 %mm4
#define tmp2 %mm5
#define tmp3 %mm6
#define tmp4 %mm7


//#define F_MMX(x, y, z)			(z ^ (x & (y ^ z)))

#define F(x,y,z) \
	movq y, tmp1; \
	pxor z, tmp1; \
	pand x, tmp1; \
	pxor z, tmp1

//#define G_MMX(x, y, z)			((x & (y | z)) | (y & z))

#define G(x,y,z) \
	movq y, tmp1; \
	movq y, tmp2; \
	por z, tmp1; \
	pand z, tmp2; \
	pand x, tmp1; \
	por tmp2, tmp1

//#define H_MMX(x, y, z)			(x ^ y ^ z)
#define H(x,y,z) \
	movq x, tmp1; \
	pxor y, tmp1; \
	pxor z, tmp1

//#define STEP_MMX(f, a, b, c, d, x, s) \
//	(a) += f((b), (c), (d)) + (x); \
//	(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s))));

#define STEP1(f, a, b, c, d, x, s) \
	f(b, c, d); \
	paddd (x*8)(%edx), tmp1; \
	paddd tmp1, a; \
	movq a, tmp3; \
	pslld $s, a; \
	psrld $(32-s), tmp3; \
	por tmp3, a

#define STEP2(f, a, b, c, d, x, s) \
	f(b, c, d); \
	paddd (x*8)(%edx), tmp1; \
	paddd tmp4, tmp1; \
	paddd tmp1, a; \
	movq a, tmp3; \
	pslld $s, a; \
	psrld $(32-s), tmp3; \
	por tmp3, a


.text
/*
 * Try to do some asm md4 w/ mmx
 * %eax ptr -> out
 * %edx ptr -> in
 * %ecx n
 */

mdfourmmx:
	 //MD4 Init
	
	shl $3, %ecx
	mov %ecx, %ebx
	and $0xffff, %ecx
	shrl $16,  %ebx
	// %ecx contient la taille du premier mdp
	// %edx celle du second
	mov %ecx, (14*8)(%edx)
	mov %ebx, (14*8+4)(%edx)

	movq const_init_a, ctxa
	movq const_init_b, ctxb
	movq const_init_c, ctxc
	movq const_init_d, ctxd

	STEP1(F, ctxa, ctxb, ctxc, ctxd, 0, 3)
	STEP1(F, ctxd, ctxa, ctxb, ctxc, 1, 7)
	STEP1(F, ctxc, ctxd, ctxa, ctxb, 2, 11)
	STEP1(F, ctxb, ctxc, ctxd, ctxa, 3, 19)
	STEP1(F, ctxa, ctxb, ctxc, ctxd, 4, 3)
	STEP1(F, ctxd, ctxa, ctxb, ctxc, 5, 7)
	STEP1(F, ctxc, ctxd, ctxa, ctxb, 6, 11)
	STEP1(F, ctxb, ctxc, ctxd, ctxa, 7, 19)
	STEP1(F, ctxa, ctxb, ctxc, ctxd, 8, 3)
	STEP1(F, ctxd, ctxa, ctxb, ctxc, 9, 7)
	STEP1(F, ctxc, ctxd, ctxa, ctxb, 10, 11)
	STEP1(F, ctxb, ctxc, ctxd, ctxa, 11, 19)
	STEP1(F, ctxa, ctxb, ctxc, ctxd, 12, 3)
	STEP1(F, ctxd, ctxa, ctxb, ctxc, 13, 7)
	STEP1(F, ctxc, ctxd, ctxa, ctxb, 14, 11)
	STEP1(F, ctxb, ctxc, ctxd, ctxa, 15, 19)

	movq const_stage2, tmp4

	STEP2(G, ctxa, ctxb, ctxc, ctxd, 0, 3)
	STEP2(G, ctxd, ctxa, ctxb, ctxc, 4, 5)
	STEP2(G, ctxc, ctxd, ctxa, ctxb, 8, 9)
	STEP2(G, ctxb, ctxc, ctxd, ctxa, 12, 13)
	STEP2(G, ctxa, ctxb, ctxc, ctxd, 1, 3)
	STEP2(G, ctxd, ctxa, ctxb, ctxc, 5, 5)
	STEP2(G, ctxc, ctxd, ctxa, ctxb, 9, 9)
	STEP2(G, ctxb, ctxc, ctxd, ctxa, 13, 13)
	STEP2(G, ctxa, ctxb, ctxc, ctxd, 2, 3)
	STEP2(G, ctxd, ctxa, ctxb, ctxc, 6, 5)
	STEP2(G, ctxc, ctxd, ctxa, ctxb, 10, 9)
	STEP2(G, ctxb, ctxc, ctxd, ctxa, 14, 13)
	STEP2(G, ctxa, ctxb, ctxc, ctxd, 3, 3)
	STEP2(G, ctxd, ctxa, ctxb, ctxc, 7, 5)
	STEP2(G, ctxc, ctxd, ctxa, ctxb, 11, 9)
	STEP2(G, ctxb, ctxc, ctxd, ctxa, 15, 13)

	movq const_stage3, tmp4

	STEP2(H, ctxa, ctxb, ctxc, ctxd, 0, 3)
	STEP2(H, ctxd, ctxa, ctxb, ctxc, 8, 9)
	STEP2(H, ctxc, ctxd, ctxa, ctxb, 4, 11)
	STEP2(H, ctxb, ctxc, ctxd, ctxa, 12, 15)
	STEP2(H, ctxa, ctxb, ctxc, ctxd, 2, 3)
	STEP2(H, ctxd, ctxa, ctxb, ctxc, 10, 9)
	STEP2(H, ctxc, ctxd, ctxa, ctxb, 6, 11)
	STEP2(H, ctxb, ctxc, ctxd, ctxa, 14, 15)
	STEP2(H, ctxa, ctxb, ctxc, ctxd, 1, 3)
	STEP2(H, ctxd, ctxa, ctxb, ctxc, 9, 9)
	STEP2(H, ctxc, ctxd, ctxa, ctxb, 5, 11)
	STEP2(H, ctxb, ctxc, ctxd, ctxa, 13, 15)
	STEP2(H, ctxa, ctxb, ctxc, ctxd, 3, 3)
	STEP2(H, ctxd, ctxa, ctxb, ctxc, 11, 9)
	STEP2(H, ctxc, ctxd, ctxa, ctxb, 7, 11)
	STEP2(H, ctxb, ctxc, ctxd, ctxa, 15, 15)

	paddd const_init_a, ctxa
	paddd const_init_b, ctxb
	paddd const_init_c, ctxc
	paddd const_init_d, ctxd


	movq ctxa, 0(%eax)
	movq ctxb, 8(%eax)
	movq ctxc, 16(%eax)
	movq ctxd, 24(%eax)

	movd ctxa, %eax
	emms
	
	ret



// extern int mdfoursse2(unsigned char *out, unsigned char *in, int n) __attribute__((regparm(3)));

#ifdef UNDERSCORES
#define mdfoursse2	_mdfoursse2
#endif

.globl mdfoursse2

.data
.align(16)
const_init_a:
.long 0x67452301
.long 0x67452301
.long 0x67452301
.long 0x67452301
const_init_b:
.long 0xefcdab89
.long 0xefcdab89
.long 0xefcdab89
.long 0xefcdab89
const_init_c:
.long 0x98badcfe
.long 0x98badcfe
.long 0x98badcfe
.long 0x98badcfe
const_init_d:
.long 0x10325476
.long 0x10325476
.long 0x10325476
.long 0x10325476

const_stage2:
.long 0x5a827999
.long 0x5a827999
.long 0x5a827999
.long 0x5a827999
const_stage3:
.long 0x6ed9eba1
.long 0x6ed9eba1
.long 0x6ed9eba1
.long 0x6ed9eba1

.align(16)
buffer:
.zero(64*4)

#define ctxa %xmm0
#define ctxb %xmm1
#define ctxc %xmm2
#define ctxd %xmm3
#define tmp1 %xmm4
#define tmp2 %xmm5
#define tmp3 %xmm6
#define tmp4 %xmm7


//#define F_MMX(x, y, z)			(z ^ (x & (y ^ z)))

#define F(x,y,z) \
	movapd y, tmp1; \
	pxor z, tmp1; \
	pand x, tmp1; \
	pxor z, tmp1

//#define G_MMX(x, y, z)			((x & (y | z)) | (y & z))

#define G(x,y,z) \
	movapd y, tmp1; \
	movapd y, tmp2; \
	por z, tmp1; \
	pand z, tmp2; \
	pand x, tmp1; \
	por tmp2, tmp1

//#define H_MMX(x, y, z)			(x ^ y ^ z)
#define H(x,y,z) \
	movapd x, tmp1; \
	pxor y, tmp1; \
	pxor z, tmp1

//#define STEP_MMX(f, a, b, c, d, x, s) \
//	(a) += f((b), (c), (d)) + (x); \
//	(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s))));

#define STEP1(f, a, b, c, d, x, s) \
	f(b, c, d); \
	paddd (x*16)(%edx), tmp1; \
	paddd tmp1, a; \
	movapd a, tmp3; \
	pslld $s, a; \
	psrld $(32-s), tmp3; \
	por tmp3, a

#define STEP2(f, a, b, c, d, x, s) \
	f(b, c, d); \
	paddd (x*16)(%edx), tmp1; \
	paddd tmp4, tmp1; \
	paddd tmp1, a; \
	movapd a, tmp3; \
	pslld $s, a; \
	psrld $(32-s), tmp3; \
	por tmp3, a


.text
/*
 * Try to do some asm md4 w/ sse2
 * %eax ptr -> out
 * %edx ptr -> in
 * %ecx n
 */

mdfoursse2:
	 //MD4 Init
	
	//mov %edx, %eax
	//ret

//	shl $3, %ecx
	mov %ecx, %ebx
	shr $8, %ecx
	and $0xff, %ebx
	shl $3, %ebx
	mov %ebx, (14*16)(%edx)
	
	mov %ecx, %ebx
	shr $8, %ecx
	and $0xff, %ebx
	shl $3, %ebx
	mov %ebx, (14*16+4)(%edx)

	mov %ecx, %ebx
	shr $8, %ecx
	and $0xff, %ebx
	shl $3, %ebx
	mov %ebx, (14*16+8)(%edx)

	and $0xff, %ecx
	shl $3, %ecx
	mov %ecx, (14*16+12)(%edx)

	movapd const_init_a, ctxa
	movapd const_init_b, ctxb
	movapd const_init_c, ctxc
	movapd const_init_d, ctxd

	STEP1(F, ctxa, ctxb, ctxc, ctxd, 0, 3)
	STEP1(F, ctxd, ctxa, ctxb, ctxc, 1, 7)
	STEP1(F, ctxc, ctxd, ctxa, ctxb, 2, 11)
	STEP1(F, ctxb, ctxc, ctxd, ctxa, 3, 19)
	STEP1(F, ctxa, ctxb, ctxc, ctxd, 4, 3)
	STEP1(F, ctxd, ctxa, ctxb, ctxc, 5, 7)
	STEP1(F, ctxc, ctxd, ctxa, ctxb, 6, 11)
	STEP1(F, ctxb, ctxc, ctxd, ctxa, 7, 19)
	STEP1(F, ctxa, ctxb, ctxc, ctxd, 8, 3)
	STEP1(F, ctxd, ctxa, ctxb, ctxc, 9, 7)
	STEP1(F, ctxc, ctxd, ctxa, ctxb, 10, 11)
	STEP1(F, ctxb, ctxc, ctxd, ctxa, 11, 19)
	STEP1(F, ctxa, ctxb, ctxc, ctxd, 12, 3)
	STEP1(F, ctxd, ctxa, ctxb, ctxc, 13, 7)
	STEP1(F, ctxc, ctxd, ctxa, ctxb, 14, 11)
	STEP1(F, ctxb, ctxc, ctxd, ctxa, 15, 19)

	movapd const_stage2, tmp4

	STEP2(G, ctxa, ctxb, ctxc, ctxd, 0, 3)
	STEP2(G, ctxd, ctxa, ctxb, ctxc, 4, 5)
	STEP2(G, ctxc, ctxd, ctxa, ctxb, 8, 9)
	STEP2(G, ctxb, ctxc, ctxd, ctxa, 12, 13)
	STEP2(G, ctxa, ctxb, ctxc, ctxd, 1, 3)
	STEP2(G, ctxd, ctxa, ctxb, ctxc, 5, 5)
	STEP2(G, ctxc, ctxd, ctxa, ctxb, 9, 9)
	STEP2(G, ctxb, ctxc, ctxd, ctxa, 13, 13)
	STEP2(G, ctxa, ctxb, ctxc, ctxd, 2, 3)
	STEP2(G, ctxd, ctxa, ctxb, ctxc, 6, 5)
	STEP2(G, ctxc, ctxd, ctxa, ctxb, 10, 9)
	STEP2(G, ctxb, ctxc, ctxd, ctxa, 14, 13)
	STEP2(G, ctxa, ctxb, ctxc, ctxd, 3, 3)
	STEP2(G, ctxd, ctxa, ctxb, ctxc, 7, 5)
	STEP2(G, ctxc, ctxd, ctxa, ctxb, 11, 9)
	STEP2(G, ctxb, ctxc, ctxd, ctxa, 15, 13)

	movapd const_stage3, tmp4

	STEP2(H, ctxa, ctxb, ctxc, ctxd, 0, 3)
	STEP2(H, ctxd, ctxa, ctxb, ctxc, 8, 9)
	STEP2(H, ctxc, ctxd, ctxa, ctxb, 4, 11)
	STEP2(H, ctxb, ctxc, ctxd, ctxa, 12, 15)
	STEP2(H, ctxa, ctxb, ctxc, ctxd, 2, 3)
	STEP2(H, ctxd, ctxa, ctxb, ctxc, 10, 9)
	STEP2(H, ctxc, ctxd, ctxa, ctxb, 6, 11)
	STEP2(H, ctxb, ctxc, ctxd, ctxa, 14, 15)
	STEP2(H, ctxa, ctxb, ctxc, ctxd, 1, 3)
	STEP2(H, ctxd, ctxa, ctxb, ctxc, 9, 9)
	STEP2(H, ctxc, ctxd, ctxa, ctxb, 5, 11)
	STEP2(H, ctxb, ctxc, ctxd, ctxa, 13, 15)
	STEP2(H, ctxa, ctxb, ctxc, ctxd, 3, 3)
	STEP2(H, ctxd, ctxa, ctxb, ctxc, 11, 9)
	STEP2(H, ctxc, ctxd, ctxa, ctxb, 7, 11)
	STEP2(H, ctxb, ctxc, ctxd, ctxa, 15, 15)

	paddd const_init_a, ctxa
	paddd const_init_b, ctxb
	paddd const_init_c, ctxc
	paddd const_init_d, ctxd


	movapd ctxa, 0(%eax)
	movapd ctxb, 16(%eax)
	movapd ctxc, 32(%eax)
	movapd ctxd, 48(%eax)

	movd ctxa, %eax
	emms
	
	ret


/*
 * This is an OpenSSL-compatible implementation of the RSA Data Security,
 * Inc. MD4 Message-Digest Algorithm (RFC 1320).
 *
 * Written by Solar Designer <solar at openwall.com> in 2005, and placed
 * in the public domain.  There's absolutely no warranty.
 *
 * This differs from Colin Plumb's older public domain implementation in
 * that no 32-bit integer data type is required, there's no compile-time
 * endianness configuration, and the function prototypes match OpenSSL's.
 * The primary goals are portability and ease of use.
 *
 * This implementation is meant to be fast, but not as fast as possible.
 * Some known optimizations are not included to reduce source code size
 * and avoid compile-time configuration.
 */

#ifndef HAVE_OPENSSL

#include <string.h>
#include "md4.h"

/*
 * The basic MD4 functions.
 *
 * F and G are optimized compared to their RFC 1320 definitions, with the
 * optimization for F borrowed from Colin Plumb's MD5 implementation.
 */
#define F(x, y, z)			((z) ^ ((x) & ((y) ^ (z))))
#define G(x, y, z)			(((x) & ((y) | (z))) | ((y) & (z)))
#define H(x, y, z)			((x) ^ (y) ^ (z))

/*
 * The MD4 transformation for all three rounds.
 */
#define STEP(f, a, b, c, d, x, s) \
	(a) += f((b), (c), (d)) + (x); \
	(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s))));

/*
 * SET reads 4 input bytes in little-endian byte order and stores them
 * in a properly aligned word in host byte order.
 *
 * The check for little-endian architectures which tolerate unaligned
 * memory accesses is just an optimization.  Nothing will break if it
 * doesn't work.
 */
#if defined(__i386__) || defined(__vax__)
#define SET(n) \
	(*(MD4_u32plus *)&ptr[(n) * 4])
#define GET(n) \
	SET(n)
#else
#define SET(n) \
	(ctx->block[(n)] = \
	(MD4_u32plus)ptr[(n) * 4] | \
	((MD4_u32plus)ptr[(n) * 4 + 1] << 8) | \
	((MD4_u32plus)ptr[(n) * 4 + 2] << 16) | \
	((MD4_u32plus)ptr[(n) * 4 + 3] << 24))
#define GET(n) \
	(ctx->block[(n)])
#endif

/*
 * This processes one or more 64-byte data blocks, but does NOT update
 * the bit counters.  There're no alignment requirements.
 */
static void *body(MD4_CTX *ctx, void *data, unsigned long size)
{
	unsigned char *ptr;
	MD4_u32plus a, b, c, d;
	MD4_u32plus saved_a, saved_b, saved_c, saved_d;

	ptr = data;

	a = ctx->a;
	b = ctx->b;
	c = ctx->c;
	d = ctx->d;

	do {
		saved_a = a;
		saved_b = b;
		saved_c = c;
		saved_d = d;

/* Round 1 */
		STEP(F, a, b, c, d, SET(0), 3)
		STEP(F, d, a, b, c, SET(1), 7)
		STEP(F, c, d, a, b, SET(2), 11)
		STEP(F, b, c, d, a, SET(3), 19)
		STEP(F, a, b, c, d, SET(4), 3)
		STEP(F, d, a, b, c, SET(5), 7)
		STEP(F, c, d, a, b, SET(6), 11)
		STEP(F, b, c, d, a, SET(7), 19)
		STEP(F, a, b, c, d, SET(8), 3)
		STEP(F, d, a, b, c, SET(9), 7)
		STEP(F, c, d, a, b, SET(10), 11)
		STEP(F, b, c, d, a, SET(11), 19)
		STEP(F, a, b, c, d, SET(12), 3)
		STEP(F, d, a, b, c, SET(13), 7)
		STEP(F, c, d, a, b, SET(14), 11)
		STEP(F, b, c, d, a, SET(15), 19)

/* Round 2 */
		STEP(G, a, b, c, d, GET(0) + 0x5a827999, 3)
		STEP(G, d, a, b, c, GET(4) + 0x5a827999, 5)
		STEP(G, c, d, a, b, GET(8) + 0x5a827999, 9)
		STEP(G, b, c, d, a, GET(12) + 0x5a827999, 13)
		STEP(G, a, b, c, d, GET(1) + 0x5a827999, 3)
		STEP(G, d, a, b, c, GET(5) + 0x5a827999, 5)
		STEP(G, c, d, a, b, GET(9) + 0x5a827999, 9)
		STEP(G, b, c, d, a, GET(13) + 0x5a827999, 13)
		STEP(G, a, b, c, d, GET(2) + 0x5a827999, 3)
		STEP(G, d, a, b, c, GET(6) + 0x5a827999, 5)
		STEP(G, c, d, a, b, GET(10) + 0x5a827999, 9)
		STEP(G, b, c, d, a, GET(14) + 0x5a827999, 13)
		STEP(G, a, b, c, d, GET(3) + 0x5a827999, 3)
		STEP(G, d, a, b, c, GET(7) + 0x5a827999, 5)
		STEP(G, c, d, a, b, GET(11) + 0x5a827999, 9)
		STEP(G, b, c, d, a, GET(15) + 0x5a827999, 13)

/* Round 3 */
		STEP(H, a, b, c, d, GET(0) + 0x6ed9eba1, 3)
		STEP(H, d, a, b, c, GET(8) + 0x6ed9eba1, 9)
		STEP(H, c, d, a, b, GET(4) + 0x6ed9eba1, 11)
		STEP(H, b, c, d, a, GET(12) + 0x6ed9eba1, 15)
		STEP(H, a, b, c, d, GET(2) + 0x6ed9eba1, 3)
		STEP(H, d, a, b, c, GET(10) + 0x6ed9eba1, 9)
		STEP(H, c, d, a, b, GET(6) + 0x6ed9eba1, 11)
		STEP(H, b, c, d, a, GET(14) + 0x6ed9eba1, 15)
		STEP(H, a, b, c, d, GET(1) + 0x6ed9eba1, 3)
		STEP(H, d, a, b, c, GET(9) + 0x6ed9eba1, 9)
		STEP(H, c, d, a, b, GET(5) + 0x6ed9eba1, 11)
		STEP(H, b, c, d, a, GET(13) + 0x6ed9eba1, 15)
		STEP(H, a, b, c, d, GET(3) + 0x6ed9eba1, 3)
		STEP(H, d, a, b, c, GET(11) + 0x6ed9eba1, 9)
		STEP(H, c, d, a, b, GET(7) + 0x6ed9eba1, 11)
		STEP(H, b, c, d, a, GET(15) + 0x6ed9eba1, 15)

		a += saved_a;
		b += saved_b;
		c += saved_c;
		d += saved_d;

		ptr += 64;
	} while (size -= 64);

	ctx->a = a;
	ctx->b = b;
	ctx->c = c;
	ctx->d = d;

	return ptr;
}

void MD4_Init(MD4_CTX *ctx)
{
	ctx->a = 0x67452301;
	ctx->b = 0xefcdab89;
	ctx->c = 0x98badcfe;
	ctx->d = 0x10325476;

	ctx->lo = 0;
	ctx->hi = 0;
}

void MD4_Update(MD4_CTX *ctx, void *data, unsigned long size)
{
	MD4_u32plus saved_lo;
	unsigned long used, free;

	saved_lo = ctx->lo;
	if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
		ctx->hi++;
	ctx->hi += size >> 29;

	used = saved_lo & 0x3f;

	if (used) {
		free = 64 - used;

		if (size < free) {
			memcpy(&ctx->buffer[used], data, size);
			return;
		}
		
		memcpy(&ctx->buffer[used], data, free);

		data = (unsigned char *)data + free;
		size -= free;
		body(ctx, ctx->buffer, 64);
	}

	if (size >= 64) {
		data = body(ctx, data, size & ~(unsigned long)0x3f);
		size &= 0x3f;
	}

	memcpy(ctx->buffer, data, size);
}

void MD4_Final(unsigned char *result, MD4_CTX *ctx)
{
	unsigned long used, free;

	used = ctx->lo & 0x3f;

	ctx->buffer[used++] = 0x80;

	free = 64 - used;

	if (free < 8) {
		memset(&ctx->buffer[used], 0, free);
		body(ctx, ctx->buffer, 64);
		used = 0;
		free = 64;
	}

	memset(&ctx->buffer[used], 0, free - 8);


	ctx->lo <<= 3;
#ifndef ARCH_LITTLE_ENDIAN
	ctx->buffer[56] = ctx->lo;
	ctx->buffer[57] = ctx->lo >> 8;
	ctx->buffer[58] = ctx->lo >> 16;
	ctx->buffer[59] = ctx->lo >> 24;
	ctx->buffer[60] = ctx->hi;
	ctx->buffer[61] = ctx->hi >> 8;
	ctx->buffer[62] = ctx->hi >> 16;
	ctx->buffer[63] = ctx->hi >> 24;
#else
	((unsigned long *)ctx->buffer)[14] = ctx->lo;
	((unsigned long *)ctx->buffer)[15] = ctx->hi;
#endif

	body(ctx, ctx->buffer, 64);

#ifndef ARCH_LITTLE_ENDIAN
	result[0] = ctx->a;
	result[1] = ctx->a >> 8;
	result[2] = ctx->a >> 16;
	result[3] = ctx->a >> 24;
	result[4] = ctx->b;
	result[5] = ctx->b >> 8;
	result[6] = ctx->b >> 16;
	result[7] = ctx->b >> 24;
	result[8] = ctx->c;
	result[9] = ctx->c >> 8;
	result[10] = ctx->c >> 16;
	result[11] = ctx->c >> 24;
	result[12] = ctx->d;
	result[13] = ctx->d >> 8;
	result[14] = ctx->d >> 16;
	result[15] = ctx->d >> 24;
#else
	((unsigned long *)result)[0] = ctx->a;
	((unsigned long *)result)[1] = ctx->b;
	((unsigned long *)result)[2] = ctx->c;
	((unsigned long *)result)[3] = ctx->d;
#endif

	memset(ctx, 0, sizeof(*ctx));
}

//this function has been added by bartavelle at bandecon.com
void mdfour(unsigned char *out, unsigned char *in, int n)
{
	MD4_CTX ctx;
	MD4_Init(&ctx);
	MD4_Update(&ctx, in, n);
	MD4_Final(out, &ctx);
}

#endif

/*
 * This is an OpenSSL-compatible implementation of the RSA Data Security,
 * Inc. MD4 Message-Digest Algorithm (RFC 1320).
 *
 * Written by Solar Designer <solar at openwall.com> in 2005, and placed
 * in the public domain.  See md4.c for more information.
 */

#ifdef HAVE_OPENSSL
#include <openssl/md4.h>
#elif !defined(_MD4_H)
#define _MD4_H

/* Any 32-bit or wider unsigned integer data type will do */
typedef unsigned long MD4_u32plus;

typedef struct {
	MD4_u32plus lo, hi;
	MD4_u32plus a, b, c, d;
	unsigned char buffer[64];
	MD4_u32plus block[16];
} MD4_CTX;

extern void MD4_Init(MD4_CTX *ctx);
extern void MD4_Update(MD4_CTX *ctx, void *data, unsigned long size);
extern void MD4_Final(unsigned char *result, MD4_CTX *ctx);

//added by bartavelle
extern void mdfour(unsigned char *out, unsigned char *in, int n);

#if (MMX_COEF == 2)
extern int mdfourmmx(unsigned char *out, unsigned char *in, int n) __attribute__((regparm(3)));
#endif

#if (MMX_COEF == 4)
extern int mdfoursse2(unsigned char *out, unsigned char *in, int n) __attribute__((regparm(3)));
#endif

#endif

Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ