diff -urp john-1.7.6.orig/src/BSDI_fmt.c john-1.7.6-omp-des-2/src/BSDI_fmt.c --- john-1.7.6.orig/src/BSDI_fmt.c 2010-01-16 17:13:35 +0000 +++ john-1.7.6-omp-des-2/src/BSDI_fmt.c 2010-06-27 16:27:58 +0000 @@ -45,8 +45,8 @@ static struct fmt_tests tests[] = { #define BINARY_SIZE ARCH_SIZE #define SALT_SIZE (ARCH_SIZE * 2) -#define MIN_KEYS_PER_CRYPT DES_BS_DEPTH -#define MAX_KEYS_PER_CRYPT DES_BS_DEPTH +#define MIN_KEYS_PER_CRYPT (DES_BS_DEPTH * DES_bs_mt) +#define MAX_KEYS_PER_CRYPT (DES_BS_DEPTH * DES_bs_mt) #else diff -urp john-1.7.6.orig/src/DES_bs.c john-1.7.6-omp-des-2/src/DES_bs.c --- john-1.7.6.orig/src/DES_bs.c 2010-06-09 14:54:26 +0000 +++ john-1.7.6-omp-des-2/src/DES_bs.c 2010-06-27 17:10:52 +0000 @@ -3,7 +3,13 @@ * Copyright (c) 1996-2002,2005,2010 by Solar Designer */ +/* cmp_all() does too little work per call to be worth the overhead */ +#undef CMP_ALL_PARA + #include +#ifdef CMP_ALL_PARA +#include /* for sig_atomic_t */ +#endif #include "arch.h" #include "common.h" @@ -14,7 +20,10 @@ #define DEPTH [depth] #define START [0] #define init_depth() \ + DES_bs_combined *tp; \ int depth; \ + tp = &DES_bs_all[index / DES_BS_DEPTH]; \ + index %= DES_BS_DEPTH; \ depth = index >> ARCH_BITS_LOG; \ index &= (ARCH_BITS - 1); #define for_each_depth() \ @@ -22,12 +31,13 @@ #else #define DEPTH #define START -#define init_depth() +#define init_depth() \ + DES_bs_combined *tp = &DES_bs_all[index / DES_BS_DEPTH]; #define for_each_depth() #endif #if !DES_BS_ASM -DES_bs_combined CC_CACHE_ALIGN DES_bs_all; +DES_bs_combined CC_CACHE_ALIGN DES_bs_all[DES_bs_mt]; #endif static unsigned char DES_LM_KP[56] = { @@ -55,20 +65,24 @@ void DES_bs_init(int LM) int round, index, bit; int p, q, s; int c; + DES_bs_combined *tp; - DES_bs_all.KS_updates = 0; + for_each_t() + tp->KS_updates = 0; if (LM) DES_bs_clear_keys_LM(); else DES_bs_clear_keys(); +for_each_t() { + #if DES_BS_EXPAND if (LM) - k = DES_bs_all.KS.p; + k = tp->KS.p; else - k = DES_bs_all.KSp; + k = tp->KSp; #else - k = DES_bs_all.KS.p; + k = tp->KS.p; #endif s = 0; @@ -84,7 +98,7 @@ void DES_bs_init(int LM) bit -= bit >> 3; bit = 55 - bit; if (LM) bit = DES_LM_KP[bit]; - *k++ = &DES_bs_all.K[bit] START; + *k++ = &tp->K[bit] START; } } @@ -92,42 +106,46 @@ void DES_bs_init(int LM) * non-zero bit in the index. */ for (bit = 0; bit <= 7; bit++) for (index = 1 << bit; index < 0x100; index += 2 << bit) - DES_bs_all.s1[index] = bit + 1; + tp->s1[index] = bit + 1; /* Special case: instead of doing an extra check in *_set_key*(), we * might overrun into DES_bs_all.B, which is harmless as long as the * order of fields is unchanged. 57 is the smallest value to guarantee * we'd be past the end of K[] since we start at -1. */ - DES_bs_all.s1[0] = 57; + tp->s1[0] = 57; /* The same for second bits */ for (index = 0; index < 0x100; index++) { - bit = DES_bs_all.s1[index]; - bit += DES_bs_all.s1[index >> bit]; - DES_bs_all.s2[index] = (bit <= 8) ? bit : 57; + bit = tp->s1[index]; + bit += tp->s1[index >> bit]; + tp->s2[index] = (bit <= 8) ? bit : 57; } /* Convert to byte offsets */ for (index = 0; index < 0x100; index++) - DES_bs_all.s1[index] *= sizeof(DES_bs_vector); + tp->s1[index] *= sizeof(DES_bs_vector); if (LM) { for (c = 0; c < 0x100; c++) if (c >= 'a' && c <= 'z') - DES_bs_all.E.extras.u[c] = c & ~0x20; + tp->E.extras.u[c] = c & ~0x20; else - DES_bs_all.E.extras.u[c] = c; + tp->E.extras.u[c] = c; } #if DES_BS_ASM DES_bs_init_asm(); #elif defined(__MMX__) || defined(__SSE2__) - memset(&DES_bs_all.ones, -1, sizeof(DES_bs_all.ones)); + memset(tp->ones, -1, sizeof(tp->ones)); #endif +} /*t*/ } void DES_bs_set_salt(ARCH_WORD salt) { + DES_bs_combined *tp; + +for_each_t() { ARCH_WORD mask; int src, dst; @@ -139,48 +157,59 @@ void DES_bs_set_salt(ARCH_WORD salt) if (dst < 24) src = dst + 24; else src = dst - 24; } else src = dst; - DES_bs_all.E.E[dst] = &DES_bs_all.B[DES_E[src]] START; - DES_bs_all.E.E[dst + 48] = &DES_bs_all.B[DES_E[src] + 32] START; + tp->E.E[dst] = &tp->B[DES_E[src]] START; + tp->E.E[dst + 48] = &tp->B[DES_E[src] + 32] START; mask <<= 1; } } +} void DES_bs_clear_keys(void) { - if (DES_bs_all.KS_updates++ & 0xFFF) return; - DES_bs_all.KS_updates = 1; - memset(DES_bs_all.K, 0, sizeof(DES_bs_all.K)); - memset(DES_bs_all.keys, 0, sizeof(DES_bs_all.keys)); - DES_bs_all.keys_changed = 1; + DES_bs_combined *tp; + +for_each_t() { + if (tp->KS_updates++ & 0xFFF) continue; + tp->KS_updates = 1; + memset(tp->K, 0, sizeof(tp->K)); + memset(tp->keys, 0, sizeof(tp->keys)); + tp->keys_changed = 1; +} } void DES_bs_clear_keys_LM(void) { - if (DES_bs_all.KS_updates++ & 0xFFF) return; - DES_bs_all.KS_updates = 1; - memset(DES_bs_all.K, 0, sizeof(DES_bs_all.K)); + DES_bs_combined *tp; + +for_each_t() { + if (tp->KS_updates++ & 0xFFF) continue; + tp->KS_updates = 1; + memset(tp->K, 0, sizeof(tp->K)); #if !DES_BS_VECTOR && ARCH_BITS >= 64 - memset(DES_bs_all.E.extras.keys, 0, sizeof(DES_bs_all.E.extras.keys)); + memset(tp->E.extras.keys, 0, sizeof(tp->E.extras.keys)); #else - memset(DES_bs_all.keys, 0, sizeof(DES_bs_all.keys)); + memset(tp->keys, 0, sizeof(tp->keys)); #endif } +} void DES_bs_set_key(char *key, int index) { /* new is NUL-terminated, but not NUL-padded to any length; * old is NUL-padded to 8 characters, but not always NUL-terminated. */ unsigned char *new = (unsigned char *)key; - unsigned char *old = DES_bs_all.keys[index]; + unsigned char *old; DES_bs_vector *k, *kbase; ARCH_WORD mask; unsigned int xor, s1, s2; + old = DES_bs_all[index / DES_BS_DEPTH].keys[index % DES_BS_DEPTH]; + init_depth(); mask = (ARCH_WORD)1 << index; - k = (DES_bs_vector *)&DES_bs_all.K[0] DEPTH - 1; + k = (DES_bs_vector *)&tp->K[0] DEPTH - 1; #if ARCH_ALLOWS_UNALIGNED if (*(ARCH_WORD_32 *)new == *(ARCH_WORD_32 *)old && old[sizeof(ARCH_WORD_32)]) { @@ -189,22 +218,22 @@ void DES_bs_set_key(char *key, int index k += sizeof(ARCH_WORD_32) * 7; } #endif - while (*new && k < &DES_bs_all.K[55]) { + while (*new && k < &tp->K[55]) { kbase = k; if ((xor = *new ^ *old)) { xor &= 0x7F; /* Note: this might result in xor == 0 */ *old = *new; do { - s1 = DES_bs_all.s1[xor]; - s2 = DES_bs_all.s2[xor]; + s1 = tp->s1[xor]; + s2 = tp->s2[xor]; *(ARCH_WORD *)((char *)k + s1) ^= mask; if (s2 > 8) break; /* Required for xor == 0 */ xor >>= s2; k[s2] START ^= mask; k += s2; if (!xor) break; - s1 = DES_bs_all.s1[xor]; - s2 = DES_bs_all.s2[xor]; + s1 = tp->s1[xor]; + s2 = tp->s2[xor]; xor >>= s2; *(ARCH_WORD *)((char *)k + s1) ^= mask; k[s2] START ^= mask; @@ -217,21 +246,21 @@ void DES_bs_set_key(char *key, int index k = kbase + 7; } - while (*old && k < &DES_bs_all.K[55]) { + while (*old && k < &tp->K[55]) { kbase = k; xor = *old & 0x7F; /* Note: this might result in xor == 0 */ *old++ = 0; do { - s1 = DES_bs_all.s1[xor]; - s2 = DES_bs_all.s2[xor]; + s1 = tp->s1[xor]; + s2 = tp->s2[xor]; *(ARCH_WORD *)((char *)k + s1) ^= mask; if (s2 > 8) break; /* Required for xor == 0 */ xor >>= s2; k[s2] START ^= mask; k += s2; if (!xor) break; - s1 = DES_bs_all.s1[xor]; - s2 = DES_bs_all.s2[xor]; + s1 = tp->s1[xor]; + s2 = tp->s2[xor]; xor >>= s2; *(ARCH_WORD *)((char *)k + s1) ^= mask; k[s2] START ^= mask; @@ -241,7 +270,7 @@ void DES_bs_set_key(char *key, int index k = kbase + 7; } - DES_bs_all.keys_changed = 1; + tp->keys_changed = 1; } void DES_bs_set_key_LM(char *key, int index) @@ -249,20 +278,22 @@ void DES_bs_set_key_LM(char *key, int in /* new is NUL-terminated, but not NUL-padded to any length; * old is NUL-padded to 7 characters and NUL-terminated. */ unsigned char *new = (unsigned char *)key; -#if !DES_BS_VECTOR && ARCH_BITS >= 64 - unsigned char *old = DES_bs_all.E.extras.keys[index]; -#else - unsigned char *old = DES_bs_all.keys[index]; -#endif + unsigned char *old; DES_bs_vector *k, *kbase; ARCH_WORD mask; unsigned int xor, s1, s2; unsigned char plain; +#if !DES_BS_VECTOR && ARCH_BITS >= 64 + old = DES_bs_all[index / DES_BS_DEPTH].E.extras.keys[index % DES_BS_DEPTH]; +#else + old = DES_bs_all[index / DES_BS_DEPTH].keys[index % DES_BS_DEPTH]; +#endif + init_depth(); mask = (ARCH_WORD)1 << index; - k = (DES_bs_vector *)&DES_bs_all.K[0] DEPTH - 1; + k = (DES_bs_vector *)&tp->K[0] DEPTH - 1; #if ARCH_ALLOWS_UNALIGNED if (*(ARCH_WORD_32 *)new == *(ARCH_WORD_32 *)old && old[sizeof(ARCH_WORD_32)]) { @@ -271,21 +302,21 @@ void DES_bs_set_key_LM(char *key, int in k += sizeof(ARCH_WORD_32) * 8; } #endif - while (*new && k < &DES_bs_all.K[55]) { - plain = DES_bs_all.E.extras.u[ARCH_INDEX(*new)]; + while (*new && k < &tp->K[55]) { + plain = tp->E.extras.u[ARCH_INDEX(*new)]; kbase = k; if ((xor = plain ^ *old)) { *old = plain; do { - s1 = DES_bs_all.s1[xor]; - s2 = DES_bs_all.s2[xor]; + s1 = tp->s1[xor]; + s2 = tp->s2[xor]; xor >>= s2; *(ARCH_WORD *)((char *)k + s1) ^= mask; k[s2] START ^= mask; k += s2; if (!xor) break; - s1 = DES_bs_all.s1[xor]; - s2 = DES_bs_all.s2[xor]; + s1 = tp->s1[xor]; + s2 = tp->s2[xor]; xor >>= s2; *(ARCH_WORD *)((char *)k + s1) ^= mask; k[s2] START ^= mask; @@ -303,15 +334,15 @@ void DES_bs_set_key_LM(char *key, int in xor = *old; *old++ = 0; do { - s1 = DES_bs_all.s1[xor]; - s2 = DES_bs_all.s2[xor]; + s1 = tp->s1[xor]; + s2 = tp->s2[xor]; xor >>= s2; *(ARCH_WORD *)((char *)k + s1) ^= mask; k[s2] START ^= mask; k += s2; if (!xor) break; - s1 = DES_bs_all.s1[xor]; - s2 = DES_bs_all.s2[xor]; + s1 = tp->s1[xor]; + s2 = tp->s2[xor]; xor >>= s2; *(ARCH_WORD *)((char *)k + s1) ^= mask; k[s2] START ^= mask; @@ -322,25 +353,34 @@ void DES_bs_set_key_LM(char *key, int in } } -#if DES_BS_EXPAND +#if DES_BS_EXPAND && !DES_BS_EXPAND_MERGED void DES_bs_expand_keys(void) { + DES_bs_combined *tp; + +#if 0 +/* it may be unreasonable to parallelize this */ +#pragma omp parallel for default(shared) private(tp) +#endif + +for_each_t() { int index; #if DES_BS_VECTOR int depth; #endif - if (!DES_bs_all.keys_changed) return; + if (!tp->keys_changed) continue; for (index = 0; index < 0x300; index++) for_each_depth() #if DES_BS_VECTOR - DES_bs_all.KS.v[index] DEPTH = DES_bs_all.KSp[index] DEPTH; + tp->KS.v[index] DEPTH = tp->KSp[index] DEPTH; #else - DES_bs_all.KS.v[index] = *DES_bs_all.KSp[index]; + tp->KS.v[index] = *tp->KSp[index]; #endif - DES_bs_all.keys_changed = 0; + tp->keys_changed = 0; +} } #endif @@ -387,7 +427,7 @@ int DES_bs_get_hash(int index, int count DES_bs_vector *b; init_depth(); - b = (DES_bs_vector *)&DES_bs_all.B[0] DEPTH; + b = (DES_bs_vector *)&tp->B[0] DEPTH; result = (b[0] START >> index) & 1; result |= ((b[1] START >> index) & 1) << 1; @@ -428,6 +468,13 @@ int DES_bs_get_hash(int index, int count */ int DES_bs_cmp_all(ARCH_WORD *binary) { + DES_bs_combined *tp; +#ifdef CMP_ALL_PARA + sig_atomic_t retval = 0; + +#pragma omp parallel for default(shared) private(tp) +#endif +for_each_t() { ARCH_WORD value, mask; int bit; #if DES_BS_VECTOR @@ -437,7 +484,7 @@ int DES_bs_cmp_all(ARCH_WORD *binary) for_each_depth() { value = binary[0]; - b = (DES_bs_vector *)&DES_bs_all.B[0] DEPTH; + b = (DES_bs_vector *)&tp->B[0] DEPTH; mask = b[0] START ^ -(value & 1); mask |= b[1] START ^ -((value >> 1) & 1); @@ -458,12 +505,22 @@ int DES_bs_cmp_all(ARCH_WORD *binary) b += 2; } +#ifdef CMP_ALL_PARA + retval = 1; + continue; /* have to let the rest of threads run anyway... */ +#else return 1; +#endif next_depth: ; } +} +#ifdef CMP_ALL_PARA + return retval; +#else return 0; +#endif } int DES_bs_cmp_one(ARCH_WORD *binary, int count, int index) @@ -472,7 +529,7 @@ int DES_bs_cmp_one(ARCH_WORD *binary, in DES_bs_vector *b; init_depth(); - b = (DES_bs_vector *)&DES_bs_all.B[0] DEPTH; + b = (DES_bs_vector *)&tp->B[0] DEPTH; for (bit = 0; bit < 31; bit++, b++) if (((b[0] START >> index) ^ (binary[0] >> bit)) & 1) return 0; diff -urp john-1.7.6.orig/src/DES_bs.h john-1.7.6-omp-des-2/src/DES_bs.h --- john-1.7.6.orig/src/DES_bs.h 2010-06-09 12:24:14 +0000 +++ john-1.7.6-omp-des-2/src/DES_bs.h 2010-06-27 17:13:56 +0000 @@ -69,9 +69,27 @@ typedef struct { int KS_updates; /* Key schedule updates counter */ int keys_changed; /* If keys have changed since last expand */ unsigned char keys[DES_BS_DEPTH][8]; /* Current keys */ + int gap[2]; /* XXX: SSE2 alignment hack for DES_bs_mt > 1 */ } DES_bs_combined; -extern DES_bs_combined DES_bs_all; +#if DES_BS_ASM +#define DES_bs_mt 1 +#else +#define DES_bs_mt 8 +#endif + +extern DES_bs_combined DES_bs_all[DES_bs_mt]; + +#define for_each_t() \ + for (tp = &DES_bs_all[0]; tp <= &DES_bs_all[DES_bs_mt - 1]; tp++) + +#if DES_BS_ASM +#define DES_BS_EXPAND_MERGED 0 +#else +/* optional, can set to 0 to have expand in DES_bs.c + * or to 1 to have it merged into "crypt body" in DES_bs_b.c */ +#define DES_BS_EXPAND_MERGED 0 +#endif /* * Initializes the internal structures. @@ -99,7 +117,7 @@ extern void DES_bs_set_key(char *key, in /* * Initializes the key schedule with actual key bits. Not for LM. */ -#if DES_BS_EXPAND +#if DES_BS_EXPAND && !DES_BS_EXPAND_MERGED extern void DES_bs_expand_keys(void); #else #define DES_bs_expand_keys() diff -urp john-1.7.6.orig/src/DES_bs_b.c john-1.7.6-omp-des-2/src/DES_bs_b.c --- john-1.7.6.orig/src/DES_bs_b.c 2010-06-13 18:17:29 +0000 +++ john-1.7.6-omp-des-2/src/DES_bs_b.c 2010-06-27 17:07:24 +0000 @@ -8,6 +8,8 @@ #if !DES_BS_ASM #include "DES_bs.h" +#define _ones ((vtype *)DES_bs_all[0].ones) + #if defined(__ALTIVEC__) && DES_BS_DEPTH == 128 #undef DES_BS_VECTOR @@ -145,7 +147,7 @@ typedef __m128i vtype; _mm_xor_si128((a), (b)) #define vnot(dst, a) \ - (dst) = _mm_xor_si128((a), *(vtype *)&DES_bs_all.ones) + (dst) = _mm_xor_si128((a), *_ones) #define vand(dst, a, b) \ (dst) = _mm_and_si128((a), (b)) #define vor(dst, a, b) \ @@ -153,8 +155,7 @@ typedef __m128i vtype; #define vandn(dst, a, b) \ (dst) = _mm_andnot_si128((b), (a)) #define vxorn(dst, a, b) \ - (dst) = _mm_xor_si128(_mm_xor_si128((a), (b)), \ - *(vtype *)&DES_bs_all.ones) + (dst) = _mm_xor_si128(_mm_xor_si128((a), (b)), *_ones) #elif defined(__SSE2__) && defined(__MMX__) && DES_BS_DEPTH == 192 && \ !defined(DES_BS_NO_MMX) @@ -178,8 +179,8 @@ typedef struct { (dst).g = _mm_xor_si64((a).g, (b).g) #define vnot(dst, a) \ - (dst).f = _mm_xor_si128((a).f, ((vtype *)&DES_bs_all.ones)->f); \ - (dst).g = _mm_xor_si64((a).g, ((vtype *)&DES_bs_all.ones)->g) + (dst).f = _mm_xor_si128((a).f, _ones->f); \ + (dst).g = _mm_xor_si64((a).g, _ones->g) #define vand(dst, a, b) \ (dst).f = _mm_and_si128((a).f, (b).f); \ (dst).g = _mm_and_si64((a).g, (b).g) @@ -190,10 +191,8 @@ typedef struct { (dst).f = _mm_andnot_si128((b).f, (a).f); \ (dst).g = _mm_andnot_si64((b).g, (a).g) #define vxorn(dst, a, b) \ - (dst).f = _mm_xor_si128(_mm_xor_si128((a).f, (b).f), \ - (*(vtype *)&DES_bs_all.ones).f); \ - (dst).g = _mm_xor_si64(_mm_xor_si64((a).g, (b).g), \ - (*(vtype *)&DES_bs_all.ones).g); + (dst).f = _mm_xor_si128(_mm_xor_si128((a).f, (b).f), _ones->f); \ + (dst).g = _mm_xor_si64(_mm_xor_si64((a).g, (b).g), _ones->g); #elif defined(__SSE2__) && \ ((ARCH_BITS == 64 && DES_BS_DEPTH == 192) || \ @@ -217,7 +216,7 @@ typedef struct { (dst).g = (a).g ^ (b).g #define vnot(dst, a) \ - (dst).f = _mm_xor_si128((a).f, ((vtype *)&DES_bs_all.ones)->f); \ + (dst).f = _mm_xor_si128((a).f, _ones->f); \ (dst).g = ~(a).g #define vand(dst, a, b) \ (dst).f = _mm_and_si128((a).f, (b).f); \ @@ -229,8 +228,7 @@ typedef struct { (dst).f = _mm_andnot_si128((b).f, (a).f); \ (dst).g = (a).g & ~(b).g #define vxorn(dst, a, b) \ - (dst).f = _mm_xor_si128(_mm_xor_si128((a).f, (b).f), \ - (*(vtype *)&DES_bs_all.ones).f); \ + (dst).f = _mm_xor_si128(_mm_xor_si128((a).f, (b).f), _ones->f); \ (dst).g = ~((a).g ^ (b).g) #elif defined(__SSE2__) && defined(__MMX__) && \ @@ -259,8 +257,8 @@ typedef struct { (dst).h = (a).h ^ (b).h #define vnot(dst, a) \ - (dst).f = _mm_xor_si128((a).f, ((vtype *)&DES_bs_all.ones)->f); \ - (dst).g = _mm_xor_si64((a).g, ((vtype *)&DES_bs_all.ones)->g); \ + (dst).f = _mm_xor_si128((a).f, _ones->f); \ + (dst).g = _mm_xor_si64((a).g, _ones->g); \ (dst).h = ~(a).h #define vand(dst, a, b) \ (dst).f = _mm_and_si128((a).f, (b).f); \ @@ -275,10 +273,8 @@ typedef struct { (dst).g = _mm_andnot_si64((b).g, (a).g); \ (dst).h = (a).h & ~(b).h #define vxorn(dst, a, b) \ - (dst).f = _mm_xor_si128(_mm_xor_si128((a).f, (b).f), \ - (*(vtype *)&DES_bs_all.ones).f); \ - (dst).g = _mm_xor_si64(_mm_xor_si64((a).g, (b).g), \ - (*(vtype *)&DES_bs_all.ones).g); \ + (dst).f = _mm_xor_si128(_mm_xor_si128((a).f, (b).f), _ones->f); \ + (dst).g = _mm_xor_si64(_mm_xor_si64((a).g, (b).g), _ones->g); \ (dst).h = ~((a).h ^ (b).h) #elif defined(__MMX__) && ARCH_BITS != 64 && DES_BS_DEPTH == 64 @@ -292,7 +288,7 @@ typedef __m64 vtype; _mm_xor_si64((a), (b)) #define vnot(dst, a) \ - (dst) = _mm_xor_si64((a), *(vtype *)&DES_bs_all.ones) + (dst) = _mm_xor_si64((a), *_ones) #define vand(dst, a, b) \ (dst) = _mm_and_si64((a), (b)) #define vor(dst, a, b) \ @@ -300,8 +296,7 @@ typedef __m64 vtype; #define vandn(dst, a, b) \ (dst) = _mm_andnot_si64((b), (a)) #define vxorn(dst, a, b) \ - (dst) = _mm_xor_si64(_mm_xor_si64((a), (b)), \ - *(vtype *)&DES_bs_all.ones) + (dst) = _mm_xor_si64(_mm_xor_si64((a), (b)), *_ones) #elif defined(__MMX__) && ARCH_BITS == 32 && DES_BS_DEPTH == 96 #undef DES_BS_VECTOR @@ -322,7 +317,7 @@ typedef struct { (dst).g = (a).g ^ (b).g #define vnot(dst, a) \ - (dst).f = _mm_xor_si64((a).f, ((vtype *)&DES_bs_all.ones)->f); \ + (dst).f = _mm_xor_si64((a).f, _ones->f); \ (dst).g = ~(a).g #define vand(dst, a, b) \ (dst).f = _mm_and_si64((a).f, (b).f); \ @@ -334,8 +329,7 @@ typedef struct { (dst).f = _mm_andnot_si64((b).f, (a).f); \ (dst).g = (a).g & ~(b).g #define vxorn(dst, a, b) \ - (dst).f = _mm_xor_si64(_mm_xor_si64((a).f, (b).f), \ - (*(vtype *)&DES_bs_all.ones).f); \ + (dst).f = _mm_xor_si64(_mm_xor_si64((a).f, (b).f), _ones->f); \ (dst).g = ~((a).g ^ (b).g) #else @@ -398,8 +392,8 @@ typedef ARCH_WORD vtype; #include "nonstd.c" #endif -#define b DES_bs_all.B -#define e DES_bs_all.E.E +#define b tp->B +#define e tp->E.E #ifndef DES_BS_VECTOR #define DES_BS_VECTOR 0 @@ -456,12 +450,32 @@ typedef ARCH_WORD vtype; vst(b[i] bd, 7, v7); \ } +#if DES_BS_EXPAND_MERGED +#undef EK_FUNC +// #define EK_FUNC + +#ifdef EK_FUNC +static void expand_keys(DES_bs_combined *tp) +{ + int index; + for (index = 0; index < 0x300; index++) + for_each_depth() + vst(tp->KS.v[index] kd, 0, *(vtype *)tp->KSp[index] kd); + tp->keys_changed = 0; +} +#endif +#endif + #define x(p) vxorf(*(vtype *)&e[p] ed, *(vtype *)&k[p] kd) #define y(p, q) vxorf(*(vtype *)&b[p] bd, *(vtype *)&k[q] kd) #define z(r) ((vtype *)&b[r] bd) void DES_bs_crypt(int count) { + DES_bs_combined *tp; + +#pragma omp parallel for default(shared) private(tp) +for_each_t() { #if DES_BS_EXPAND DES_bs_vector *k; #else @@ -471,9 +485,21 @@ void DES_bs_crypt(int count) #if DES_BS_VECTOR int depth; #endif - #ifndef zero vtype zero; +#endif + +#if DES_BS_EXPAND && DES_BS_EXPAND_MERGED + if (tp->keys_changed) +#ifdef EK_FUNC + expand_keys(tp); +#else + goto expand_keys; +back_from_expand_keys: +#endif +#endif + +#ifndef zero /* This may produce an "uninitialized" warning */ vxor(zero, zero, zero); #endif @@ -481,9 +507,9 @@ void DES_bs_crypt(int count) DES_bs_clear_block(); #if DES_BS_EXPAND - k = DES_bs_all.KS.v; + k = tp->KS.v; #else - k = DES_bs_all.KS.p; + k = tp->KS.p; #endif rounds_and_swapped = 8; iterations = count; @@ -548,16 +574,37 @@ swap: k -= (0x300 + 48); rounds_and_swapped = 0x108; if (--iterations) goto swap; - return; + continue; next: k -= (0x300 - 48); rounds_and_swapped = 8; if (--iterations) goto start; + +#if DES_BS_EXPAND && DES_BS_EXPAND_MERGED +#ifndef EK_FUNC + continue; + +expand_keys: + { + int index; + for (index = 0; index < 0x300; index++) + for_each_depth() + vst(tp->KS.v[index] kd, 0, *(vtype *)tp->KSp[index] kd); + } + tp->keys_changed = 0; + goto back_from_expand_keys; +#endif +#endif +} } void DES_bs_crypt_25(void) { + DES_bs_combined *tp; + +#pragma omp parallel for default(shared) private(tp) +for_each_t() { #if DES_BS_EXPAND DES_bs_vector *k; #else @@ -567,9 +614,21 @@ void DES_bs_crypt_25(void) #if DES_BS_VECTOR int depth; #endif - #ifndef zero vtype zero; +#endif + +#if DES_BS_EXPAND && DES_BS_EXPAND_MERGED + if (tp->keys_changed) +#ifdef EK_FUNC + expand_keys(tp); +#else + goto expand_keys; +back_from_expand_keys: +#endif +#endif + +#ifndef zero /* This may produce an "uninitialized" warning */ vxor(zero, zero, zero); #endif @@ -577,9 +636,9 @@ void DES_bs_crypt_25(void) DES_bs_clear_block(); #if DES_BS_EXPAND - k = DES_bs_all.KS.v; + k = tp->KS.v; #else - k = DES_bs_all.KS.p; + k = tp->KS.p; #endif rounds_and_swapped = 8; iterations = 25; @@ -652,13 +711,28 @@ swap: k -= (0x300 + 48); rounds_and_swapped = 0x108; if (--iterations) goto swap; - return; + continue; next: k -= (0x300 - 48); rounds_and_swapped = 8; iterations--; goto start; + +#if DES_BS_EXPAND && DES_BS_EXPAND_MERGED +#ifndef EK_FUNC +expand_keys: + { + int index; + for (index = 0; index < 0x300; index++) + for_each_depth() + vst(tp->KS.v[index] kd, 0, *(vtype *)tp->KSp[index] kd); + } + tp->keys_changed = 0; + goto back_from_expand_keys; +#endif +#endif +} } #undef x @@ -672,11 +746,7 @@ next: void DES_bs_crypt_LM(void) { - ARCH_WORD **k; - int rounds; -#if DES_BS_VECTOR - int depth; -#endif + DES_bs_combined *tp; #ifndef zero vtype zero, ones; @@ -685,6 +755,14 @@ void DES_bs_crypt_LM(void) vnot(ones, zero); #endif +#pragma omp parallel for default(shared) private(tp) +for_each_t() { + ARCH_WORD **k; + int rounds; +#if DES_BS_VECTOR + int depth; +#endif + DES_bs_set_block_8(0, zero, zero, zero, zero, zero, zero, zero, zero); DES_bs_set_block_8(8, ones, ones, ones, zero, ones, zero, zero, zero); DES_bs_set_block_8(16, zero, zero, zero, zero, zero, zero, zero, ones); @@ -694,7 +772,7 @@ void DES_bs_crypt_LM(void) DES_bs_set_block_8(48, ones, ones, zero, zero, zero, zero, ones, zero); DES_bs_set_block_8(56, ones, zero, ones, zero, ones, ones, ones, ones); - k = DES_bs_all.KS.p; + k = tp->KS.p; rounds = 8; do { @@ -767,4 +845,5 @@ void DES_bs_crypt_LM(void) k += 96; } while (--rounds); } +} #endif diff -urp john-1.7.6.orig/src/DES_fmt.c john-1.7.6-omp-des-2/src/DES_fmt.c --- john-1.7.6.orig/src/DES_fmt.c 2010-01-16 17:05:46 +0000 +++ john-1.7.6-omp-des-2/src/DES_fmt.c 2010-06-27 16:27:52 +0000 @@ -38,8 +38,8 @@ static struct fmt_tests tests[] = { #define BINARY_SIZE ARCH_SIZE #define SALT_SIZE ARCH_SIZE -#define MIN_KEYS_PER_CRYPT DES_BS_DEPTH -#define MAX_KEYS_PER_CRYPT DES_BS_DEPTH +#define MIN_KEYS_PER_CRYPT (DES_BS_DEPTH * DES_bs_mt) +#define MAX_KEYS_PER_CRYPT (DES_BS_DEPTH * DES_bs_mt) #else @@ -323,7 +323,8 @@ static char *get_key(int index) static char out[PLAINTEXT_LENGTH + 1]; #if DES_BS - memcpy(out, DES_bs_all.keys[index], PLAINTEXT_LENGTH); + int t = index / DES_BS_DEPTH; index %= DES_BS_DEPTH; + memcpy(out, DES_bs_all[t].keys[index], PLAINTEXT_LENGTH); #else memcpy(out, buffer[index].key, PLAINTEXT_LENGTH); #endif diff -urp john-1.7.6.orig/src/LM_fmt.c john-1.7.6-omp-des-2/src/LM_fmt.c --- john-1.7.6.orig/src/LM_fmt.c 2010-01-16 17:16:20 +0000 +++ john-1.7.6-omp-des-2/src/LM_fmt.c 2010-06-26 21:43:01 +0000 @@ -41,8 +41,8 @@ static struct fmt_tests tests[] = { #define BINARY_SIZE ARCH_SIZE #define SALT_SIZE 0 -#define MIN_KEYS_PER_CRYPT DES_BS_DEPTH -#define MAX_KEYS_PER_CRYPT DES_BS_DEPTH +#define MIN_KEYS_PER_CRYPT (DES_BS_DEPTH * DES_bs_mt) +#define MAX_KEYS_PER_CRYPT (DES_BS_DEPTH * DES_bs_mt) static void init(void) { @@ -175,9 +175,9 @@ static int cmp_exact(char *source, int i static char *get_key(int index) { #if !DES_BS_VECTOR && ARCH_BITS >= 64 - return (char *)DES_bs_all.E.extras.keys[index]; + return (char *)DES_bs_all[index / DES_BS_DEPTH].E.extras.keys[index % DES_BS_DEPTH]; #else - return (char *)DES_bs_all.keys[index]; + return (char *)DES_bs_all[index / DES_BS_DEPTH].keys[index % DES_BS_DEPTH]; #endif } diff -urp john-1.7.6.orig/src/Makefile john-1.7.6-omp-des-2/src/Makefile --- john-1.7.6.orig/src/Makefile 2010-06-13 21:12:37 +0000 +++ john-1.7.6-omp-des-2/src/Makefile 2010-06-26 23:51:52 +0000 @@ -16,7 +16,7 @@ NULL = /dev/null CPPFLAGS = -E OMPFLAGS = # gcc with OpenMP -#OMPFLAGS = -fopenmp +OMPFLAGS = -fopenmp # Sun Studio with OpenMP (set the OMP_NUM_THREADS env var at runtime) #OMPFLAGS = -xopenmp CFLAGS = -c -Wall -O2 -fomit-frame-pointer $(OMPFLAGS) diff -urp john-1.7.6.orig/src/params.h john-1.7.6-omp-des-2/src/params.h --- john-1.7.6.orig/src/params.h 2010-06-14 02:38:55 +0000 +++ john-1.7.6-omp-des-2/src/params.h 2010-06-27 15:01:28 +0000 @@ -15,7 +15,7 @@ /* * John's version number. */ -#define JOHN_VERSION "1.7.6" +#define JOHN_VERSION "1.7.6-des-2" /* * Notes to packagers of John for *BSD "ports", Linux distributions, etc.: diff -urp john-1.7.6.orig/src/x86-64.h john-1.7.6-omp-des-2/src/x86-64.h --- john-1.7.6.orig/src/x86-64.h 2010-06-13 00:33:38 +0000 +++ john-1.7.6-omp-des-2/src/x86-64.h 2010-06-27 15:22:39 +0000 @@ -33,7 +33,8 @@ #define DES_EXTB 1 #define DES_COPY 0 #if defined(__SSE2__) && \ - ((__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __GNUC__ > 4) + (((__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __GNUC__ > 4) || \ + defined(_OPENMP)) #define DES_BS_ASM 0 #if 1 #define DES_BS_VECTOR 2