[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Wed, 19 May 2010 02:38:45 +0400
From: Solar Designer <solar@...nwall.com>
To: john-users@...ts.openwall.com
Subject: Re: C compiler generated SSE2 code
On Tue, May 18, 2010 at 01:49:46PM +0200, bartavelle@...quise.net wrote:
> Le 18/05/2010 01:32, Solar Designer a ?crit :
> >Can you upload it to the wiki, please?
> >
> >http://openwall.info/wiki/john/patches
>
> Done, but I made a quick git patch.
Thank you! This works for now. When you update this code/patch, please
specify the copyright status and license for sse-intrinsics.c (new
source file added with the patch) and for your changes to MD5_fmt.c,
preferably like I have suggested here:
http://openwall.info/wiki/john/licensing
> http://bigbox.banquise.net/jtr/gcc-4.3.2
> http://bigbox.banquise.net/jtr/clang-103935
> http://bigbox.banquise.net/jtr/icc-10.1
>
> This does speak for itself :) The icc does disentangle the whole stuff,
> but is still faster with 3 loops (only 2 in the sample).
I think you need to disentangle the source code rather than leave that
for the compiler. Specifically, I'd remove the "unneeded" MD5_PARA_DO
loops. Instead, I'd define macros around primitives such as xor, which
would perform the required number of instances of the operation. They
would use constants for the array indices - or, if that does not work
well enough, even use individual local variables instead of array
elements. This is more similar to what I have in MD5_std.c, where I use
separate local variables for the two instances of MD5:
MD5_word a0, b0 = Cb, c0 = Cc, d0;
MD5_word a1, b1, c1, d1;
MD5_word u, v;
I understand that you like to be able to easily adjust the number of
instances that you mix, but you'll have to achieve that by defining your
xor, etc. macros differently for common instance counts (say, 2 vs. 3).
> Do you mind giving bench of your SSE code with ICC ?
Sorry, I have no time for diving into this now. I got too many other
tasks in my queue.
> Or just share it so that I could try it :)
I've attached a dirty patch. IIRC, this code is in a state suitable for
the Sun Studio compiler. You'll likely need to change the initializer
for "ones" (a trivial change) to get this to compile with gcc again.
DES_BS_VECTOR34 enables 192-bit vectors with 256-bit alignment (there
are two kinds of them - SSE2+MMX or SSE2+native). With other settings,
you can do pure 128-bit SSE2 vectors and two kinds of 256-bit vectors
(dual SSE2 or SSE2+MMX+native).
In my experiments, I was using new DES S-box expressions (these are in
the works), but I reverted to "plain" nonstd.c for generating this
patch. You may choose to have the code use sboxes.c instead (change
DES_BS from 1 to 2 in x86-64.h), which might better match the target
instruction set (mostly if you use x86-64 native instructions).
A known problem is that the code violates C strict aliasing rules with
its use of typecasts. Yet this did not cause anything worse than
compiler warnings in my testing. A fix for this may be to use unions.
I think I should roll similar changes into the official JtR even if
they'd be no-ops in the default build - to make it easier to conduct
experiments like this.
Alexander
diff -urpN john-1.7.5.orig/src/DES_bs.c john-1.7.5-des-intrinsics/src/DES_bs.c
--- john-1.7.5.orig/src/DES_bs.c 2010-01-16 17:07:54 +0000
+++ john-1.7.5-des-intrinsics/src/DES_bs.c 2010-05-03 10:21:23 +0000
@@ -11,6 +11,12 @@
#include "DES_bs.h"
#if DES_BS_VECTOR
+
+#ifdef DES_BS_VECTOR34
+#undef DES_BS_VECTOR
+#define DES_BS_VECTOR 3
+#endif
+
#define DEPTH [depth]
#define START [0]
#define init_depth() \
@@ -109,7 +115,7 @@ void DES_bs_init(int LM)
/* Convert to byte offsets */
for (index = 0; index < 0x100; index++)
- DES_bs_all.s1[index] *= DES_BS_DEPTH >> 3;
+ DES_bs_all.s1[index] *= sizeof(DES_bs_vector);
if (LM) {
for (c = 0; c < 0x100; c++)
diff -urpN john-1.7.5.orig/src/DES_bs.h john-1.7.5-des-intrinsics/src/DES_bs.h
--- john-1.7.5.orig/src/DES_bs.h 2005-04-17 19:25:32 +0000
+++ john-1.7.5-des-intrinsics/src/DES_bs.h 2010-05-03 10:09:33 +0000
@@ -16,7 +16,9 @@
#define DES_BS_ALGORITHM_NAME ARCH_BITS_STR "/" ARCH_BITS_STR " BS"
#endif
-#if DES_BS_VECTOR
+#ifdef DES_BS_VECTOR34
+#define DES_BS_DEPTH (ARCH_BITS * 3)
+#elif DES_BS_VECTOR
#define DES_BS_DEPTH (ARCH_BITS * DES_BS_VECTOR)
#else
#define DES_BS_DEPTH ARCH_BITS
diff -urpN john-1.7.5.orig/src/DES_bs_b.c john-1.7.5-des-intrinsics/src/DES_bs_b.c
--- john-1.7.5.orig/src/DES_bs_b.c 2003-10-10 03:42:11 +0000
+++ john-1.7.5-des-intrinsics/src/DES_bs_b.c 2010-05-03 11:44:20 +0000
@@ -8,6 +8,178 @@
#if !DES_BS_ASM
#include "DES_bs.h"
+// typedef ARCH_WORD vtype;
+
+#include <emmintrin.h>
+#include <mmintrin.h>
+
+#ifdef DES_BS_VECTOR34
+
+// #define MM
+
+#ifdef MM
+typedef struct {
+ __m128i f;
+ __m64 g;
+} vtype;
+
+#define vst(src, dst) *(vtype *)&(dst) = (src)
+
+static vtype ones = {{-1, -1}, {-1}};
+
+#define vnot(dst, a) \
+ dst.f = _mm_xor_si128(ones.f, a.f); \
+ dst.g = _mm_xor_si64(ones.g, a.g)
+#define vand(dst, a, b) \
+ dst.f = _mm_and_si128(a.f, b.f); \
+ dst.g = _mm_and_si64(a.g, b.g)
+#define vor(dst, a, b) \
+ dst.f = _mm_or_si128(a.f, b.f); \
+ dst.g = _mm_or_si64(a.g, b.g)
+#define vxor(dst, a, b) \
+ (dst).f = _mm_xor_si128((a).f, (b).f); \
+ (dst).g = _mm_xor_si64((a).g, (b).g)
+#define vandn(dst, a, b) \
+ dst.f = _mm_andnot_si128(b.f, a.f); \
+ dst.g = _mm_andnot_si64(b.g, a.g)
+#define vxorn(dst, a, b) \
+ dst.f = _mm_xor_si128(ones.f, _mm_xor_si128((a).f, (b).f)); \
+ dst.g = _mm_xor_si64(ones.g, _mm_xor_si64((a).g, (b).g))
+#else
+typedef struct {
+ __m128i f;
+ long h;
+} vtype;
+
+#define vst(src, dst) *(vtype *)&(dst) = (src)
+
+static vtype ones = {{-1, -1}, -1};
+
+#define vnot(dst, a) \
+ dst.f = _mm_xor_si128(ones.f, a.f); \
+ dst.h = ~a.h;
+#define vand(dst, a, b) \
+ dst.f = _mm_and_si128(a.f, b.f); \
+ dst.h = a.h & b.h;
+#define vor(dst, a, b) \
+ dst.f = _mm_or_si128(a.f, b.f); \
+ dst.h = a.h | b.h;
+#define vxor(dst, a, b) \
+ (dst).f = _mm_xor_si128((a).f, (b).f); \
+ (dst).h = (a).h ^ (b).h;
+#define vandn(dst, a, b) \
+ dst.f = _mm_andnot_si128(b.f, a.f); \
+ dst.h = a.h & ~b.h;
+#define vxorn(dst, a, b) \
+ dst.f = _mm_xor_si128(ones.f, _mm_xor_si128((a).f, (b).f)); \
+ dst.h = ~(a.h ^ b.h);
+#endif
+
+#elif DES_BS_VECTOR == 4
+
+// #define MIX
+
+#ifdef MIX
+typedef struct {
+ __m128i f;
+ __m64 g;
+ long h;
+} vtype;
+
+#define vst(src, dst) *(vtype *)&(dst) = (src)
+
+static vtype ones = {{-1, -1}, {-1, -1}, -1};
+
+#define vnot(dst, a) \
+ dst.f = _mm_xor_si128(ones.f, a.f); \
+ dst.g = _mm_xor_si64(ones.g, a.g); \
+ dst.h = ~a.h;
+#define vand(dst, a, b) \
+ dst.f = _mm_and_si128(a.f, b.f); \
+ dst.g = _mm_and_si64(a.g, b.g); \
+ dst.h = a.h & b.h;
+#define vor(dst, a, b) \
+ dst.f = _mm_or_si128(a.f, b.f); \
+ dst.g = _mm_or_si64(a.g, b.g); \
+ dst.h = a.h | b.h;
+#define vxor(dst, a, b) \
+ (dst).f = _mm_xor_si128((a).f, (b).f); \
+ (dst).g = _mm_xor_si64((a).g, (b).g); \
+ (dst).h = (a).h ^ (b).h;
+#define vandn(dst, a, b) \
+ dst.f = _mm_andnot_si128(b.f, a.f); \
+ dst.g = _mm_andnot_si64(b.g, a.g); \
+ dst.h = a.h & ~b.h;
+#define vxorn(dst, a, b) \
+ dst.f = _mm_xor_si128(ones.f, _mm_xor_si128((a).f, (b).f)); \
+ dst.g = _mm_xor_si64(ones.g, _mm_xor_si64((a).g, (b).g)); \
+ dst.h = ~(a.h ^ b.h);
+#else
+typedef struct {
+ __m128i f;
+ __m128i g;
+} vtype;
+
+#define vst(src, dst) *(vtype *)&(dst) = (src)
+
+static vtype ones = {{-1, -1}, {-1, -1}};
+
+#define vnot(dst, a) \
+ dst.f = _mm_xor_si128(ones.f, a.f); \
+ dst.g = _mm_xor_si128(ones.g, a.g);
+#define vand(dst, a, b) \
+ dst.f = _mm_and_si128(a.f, b.f); \
+ dst.g = _mm_and_si128(a.g, b.g);
+#define vor(dst, a, b) \
+ dst.f = _mm_or_si128(a.f, b.f); \
+ dst.g = _mm_or_si128(a.g, b.g);
+#define vxor(dst, a, b) \
+ (dst).f = _mm_xor_si128((a).f, (b).f); \
+ (dst).g = _mm_xor_si128((a).g, (b).g);
+#define vandn(dst, a, b) \
+ dst.f = _mm_andnot_si128(b.f, a.f); \
+ dst.g = _mm_andnot_si128(b.g, a.g);
+#define vxorn(dst, a, b) \
+ dst.f = _mm_xor_si128(ones.f, _mm_xor_si128((a).f, (b).f)); \
+ dst.g = _mm_xor_si128(ones.g, _mm_xor_si128((a).g, (b).g));
+#endif
+
+#else
+
+// typedef int vtype __attribute__ ((vector_size (16)));
+typedef __m128i vtype;
+
+#define vst(src, dst) *(vtype *)&(dst) = (src)
+
+#if 0
+#define vnot(a) (~(a))
+#define vand(a, b) ((a) & (b))
+#define vor(a, b) ((a) | (b))
+#define vxor(a, b) ((a) ^ (b))
+#define vandn(a, b) ((a) & ~(b))
+#define vxorn(a, b) (~((a) ^ (b)))
+#else
+static vtype ones = {-1, -1};
+
+#define vnot(dst, a) \
+ dst = _mm_xor_si128(ones, a)
+#define vand(dst, a, b) \
+ dst = _mm_and_si128(a, b)
+#define vor(dst, a, b) \
+ dst = _mm_or_si128(a, b)
+#define vxor(dst, a, b) \
+ dst = _mm_xor_si128(a, b)
+#define vandn(dst, a, b) \
+ dst = _mm_andnot_si128(b, a)
+#define vxorn(dst, a, b) \
+ dst = _mm_xor_si128(ones, _mm_xor_si128(a, b))
+#endif
+
+#endif
+
+#undef DES_BS_VECTOR
+#define DES_BS_VECTOR 0
+
/* Include the S-boxes here, so that the compiler can inline them */
#if DES_BS == 2
#include "DES_bs_s.c"
@@ -37,17 +209,21 @@
#define DES_bs_clear_block_8(i) \
for_each_depth() { \
- b[i] bd = 0; \
- b[i + 1] bd = 0; \
- b[i + 2] bd = 0; \
- b[i + 3] bd = 0; \
- b[i + 4] bd = 0; \
- b[i + 5] bd = 0; \
- b[i + 6] bd = 0; \
- b[i + 7] bd = 0; \
+ vst(zero, b[i] bd); \
+ vst(zero, b[i + 1] bd); \
+ vst(zero, b[i + 2] bd); \
+ vst(zero, b[i + 3] bd); \
+ vst(zero, b[i + 4] bd); \
+ vst(zero, b[i + 5] bd); \
+ vst(zero, b[i + 6] bd); \
+ vst(zero, b[i + 7] bd); \
}
#define DES_bs_clear_block() \
+{ \
+ vtype zero; \
+/* This may produce an "uninitialized" warning */ \
+ vxor(zero, zero, zero); \
DES_bs_clear_block_8(0); \
DES_bs_clear_block_8(8); \
DES_bs_clear_block_8(16); \
@@ -55,7 +231,24 @@
DES_bs_clear_block_8(32); \
DES_bs_clear_block_8(40); \
DES_bs_clear_block_8(48); \
- DES_bs_clear_block_8(56);
+ DES_bs_clear_block_8(56); \
+}
+
+#define DES_bs_set_block_8(i, v0, v1, v2, v3, v4, v5, v6, v7) \
+ for_each_depth() { \
+ vst(v0, b[i] bd); \
+ vst(v1, b[i + 1] bd); \
+ vst(v2, b[i + 2] bd); \
+ vst(v3, b[i + 3] bd); \
+ vst(v4, b[i + 4] bd); \
+ vst(v5, b[i + 5] bd); \
+ vst(v6, b[i + 6] bd); \
+ vst(v7, b[i + 7] bd); \
+ }
+
+#define x(p, q) ({ vtype t; vxor(t, *(vtype *)&e[p] ed, *(vtype *)&k[q] kd); t; })
+#define y(p, q) ({ vtype t; vxor(t, *(vtype *)&b[p] bd, *(vtype *)&k[q] kd); t; })
+#define z(r) ((vtype *)&b[r] bd)
void DES_bs_crypt(int count)
{
@@ -81,73 +274,73 @@ void DES_bs_crypt(int count)
start:
for_each_depth()
- s1(e[0] ed ^ k[0] kd, e[1] ed ^ k[1] kd, e[2] ed ^ k[2] kd,
- e[3] ed ^ k[3] kd, e[4] ed ^ k[4] kd, e[5] ed ^ k[5] kd,
- &b[40] bd, &b[48] bd, &b[54] bd, &b[62] bd);
- for_each_depth()
- s2(e[6] ed ^ k[6] kd, e[7] ed ^ k[7] kd, e[8] ed ^ k[8] kd,
- e[9] ed ^ k[9] kd, e[10] ed ^ k[10] kd, e[11] ed ^ k[11] kd,
- &b[44] bd, &b[59] bd, &b[33] bd, &b[49] bd);
- for_each_depth()
- s3(e[12] ed ^ k[12] kd, e[13] ed ^ k[13] kd, e[14] ed ^ k[14] kd,
- e[15] ed ^ k[15] kd, e[16] ed ^ k[16] kd, e[17] ed ^ k[17] kd,
- &b[55] bd, &b[47] bd, &b[61] bd, &b[37] bd);
- for_each_depth()
- s4(e[18] ed ^ k[18] kd, e[19] ed ^ k[19] kd, e[20] ed ^ k[20] kd,
- e[21] ed ^ k[21] kd, e[22] ed ^ k[22] kd, e[23] ed ^ k[23] kd,
- &b[57] bd, &b[51] bd, &b[41] bd, &b[32] bd);
- for_each_depth()
- s5(e[24] ed ^ k[24] kd, e[25] ed ^ k[25] kd, e[26] ed ^ k[26] kd,
- e[27] ed ^ k[27] kd, e[28] ed ^ k[28] kd, e[29] ed ^ k[29] kd,
- &b[39] bd, &b[45] bd, &b[56] bd, &b[34] bd);
- for_each_depth()
- s6(e[30] ed ^ k[30] kd, e[31] ed ^ k[31] kd, e[32] ed ^ k[32] kd,
- e[33] ed ^ k[33] kd, e[34] ed ^ k[34] kd, e[35] ed ^ k[35] kd,
- &b[35] bd, &b[60] bd, &b[42] bd, &b[50] bd);
- for_each_depth()
- s7(e[36] ed ^ k[36] kd, e[37] ed ^ k[37] kd, e[38] ed ^ k[38] kd,
- e[39] ed ^ k[39] kd, e[40] ed ^ k[40] kd, e[41] ed ^ k[41] kd,
- &b[63] bd, &b[43] bd, &b[53] bd, &b[38] bd);
- for_each_depth()
- s8(e[42] ed ^ k[42] kd, e[43] ed ^ k[43] kd, e[44] ed ^ k[44] kd,
- e[45] ed ^ k[45] kd, e[46] ed ^ k[46] kd, e[47] ed ^ k[47] kd,
- &b[36] bd, &b[58] bd, &b[46] bd, &b[52] bd);
+ s1(x(0, 0), x(1, 1), x(2, 2),
+ x(3, 3), x(4, 4), x(5, 5),
+ z(40), z(48), z(54), z(62));
+ for_each_depth()
+ s2(x(6, 6), x(7, 7), x(8, 8),
+ x(9, 9), x(10, 10), x(11, 11),
+ z(44), z(59), z(33), z(49));
+ for_each_depth()
+ s3(x(12, 12), x(13, 13), x(14, 14),
+ x(15, 15), x(16, 16), x(17, 17),
+ z(55), z(47), z(61), z(37));
+ for_each_depth()
+ s4(x(18, 18), x(19, 19), x(20, 20),
+ x(21, 21), x(22, 22), x(23, 23),
+ z(57), z(51), z(41), z(32));
+ for_each_depth()
+ s5(x(24, 24), x(25, 25), x(26, 26),
+ x(27, 27), x(28, 28), x(29, 29),
+ z(39), z(45), z(56), z(34));
+ for_each_depth()
+ s6(x(30, 30), x(31, 31), x(32, 32),
+ x(33, 33), x(34, 34), x(35, 35),
+ z(35), z(60), z(42), z(50));
+ for_each_depth()
+ s7(x(36, 36), x(37, 37), x(38, 38),
+ x(39, 39), x(40, 40), x(41, 41),
+ z(63), z(43), z(53), z(38));
+ for_each_depth()
+ s8(x(42, 42), x(43, 43), x(44, 44),
+ x(45, 45), x(46, 46), x(47, 47),
+ z(36), z(58), z(46), z(52));
if (rounds_and_swapped == 0x100) goto next;
swap:
for_each_depth()
- s1(e[48] ed ^ k[48] kd, e[49] ed ^ k[49] kd, e[50] ed ^ k[50] kd,
- e[51] ed ^ k[51] kd, e[52] ed ^ k[52] kd, e[53] ed ^ k[53] kd,
- &b[8] bd, &b[16] bd, &b[22] bd, &b[30] bd);
- for_each_depth()
- s2(e[54] ed ^ k[54] kd, e[55] ed ^ k[55] kd, e[56] ed ^ k[56] kd,
- e[57] ed ^ k[57] kd, e[58] ed ^ k[58] kd, e[59] ed ^ k[59] kd,
- &b[12] bd, &b[27] bd, &b[1] bd, &b[17] bd);
- for_each_depth()
- s3(e[60] ed ^ k[60] kd, e[61] ed ^ k[61] kd, e[62] ed ^ k[62] kd,
- e[63] ed ^ k[63] kd, e[64] ed ^ k[64] kd, e[65] ed ^ k[65] kd,
- &b[23] bd, &b[15] bd, &b[29] bd, &b[5] bd);
- for_each_depth()
- s4(e[66] ed ^ k[66] kd, e[67] ed ^ k[67] kd, e[68] ed ^ k[68] kd,
- e[69] ed ^ k[69] kd, e[70] ed ^ k[70] kd, e[71] ed ^ k[71] kd,
- &b[25] bd, &b[19] bd, &b[9] bd, &b[0] bd);
- for_each_depth()
- s5(e[72] ed ^ k[72] kd, e[73] ed ^ k[73] kd, e[74] ed ^ k[74] kd,
- e[75] ed ^ k[75] kd, e[76] ed ^ k[76] kd, e[77] ed ^ k[77] kd,
- &b[7] bd, &b[13] bd, &b[24] bd, &b[2] bd);
- for_each_depth()
- s6(e[78] ed ^ k[78] kd, e[79] ed ^ k[79] kd, e[80] ed ^ k[80] kd,
- e[81] ed ^ k[81] kd, e[82] ed ^ k[82] kd, e[83] ed ^ k[83] kd,
- &b[3] bd, &b[28] bd, &b[10] bd, &b[18] bd);
- for_each_depth()
- s7(e[84] ed ^ k[84] kd, e[85] ed ^ k[85] kd, e[86] ed ^ k[86] kd,
- e[87] ed ^ k[87] kd, e[88] ed ^ k[88] kd, e[89] ed ^ k[89] kd,
- &b[31] bd, &b[11] bd, &b[21] bd, &b[6] bd);
- for_each_depth()
- s8(e[90] ed ^ k[90] kd, e[91] ed ^ k[91] kd, e[92] ed ^ k[92] kd,
- e[93] ed ^ k[93] kd, e[94] ed ^ k[94] kd, e[95] ed ^ k[95] kd,
- &b[4] bd, &b[26] bd, &b[14] bd, &b[20] bd);
+ s1(x(48, 48), x(49, 49), x(50, 50),
+ x(51, 51), x(52, 52), x(53, 53),
+ z(8), z(16), z(22), z(30));
+ for_each_depth()
+ s2(x(54, 54), x(55, 55), x(56, 56),
+ x(57, 57), x(58, 58), x(59, 59),
+ z(12), z(27), z(1), z(17));
+ for_each_depth()
+ s3(x(60, 60), x(61, 61), x(62, 62),
+ x(63, 63), x(64, 64), x(65, 65),
+ z(23), z(15), z(29), z(5));
+ for_each_depth()
+ s4(x(66, 66), x(67, 67), x(68, 68),
+ x(69, 69), x(70, 70), x(71, 71),
+ z(25), z(19), z(9), z(0));
+ for_each_depth()
+ s5(x(72, 72), x(73, 73), x(74, 74),
+ x(75, 75), x(76, 76), x(77, 77),
+ z(7), z(13), z(24), z(2));
+ for_each_depth()
+ s6(x(78, 78), x(79, 79), x(80, 80),
+ x(81, 81), x(82, 82), x(83, 83),
+ z(3), z(28), z(10), z(18));
+ for_each_depth()
+ s7(x(84, 84), x(85, 85), x(86, 86),
+ x(87, 87), x(88, 88), x(89, 89),
+ z(31), z(11), z(21), z(6));
+ for_each_depth()
+ s8(x(90, 90), x(91, 91), x(92, 92),
+ x(93, 93), x(94, 94), x(95, 95),
+ z(4), z(26), z(14), z(20));
k += 96;
@@ -187,73 +380,73 @@ void DES_bs_crypt_25(void)
start:
for_each_depth()
- s1(e[0] ed ^ k[0] kd, e[1] ed ^ k[1] kd, e[2] ed ^ k[2] kd,
- e[3] ed ^ k[3] kd, e[4] ed ^ k[4] kd, e[5] ed ^ k[5] kd,
- &b[40] bd, &b[48] bd, &b[54] bd, &b[62] bd);
- for_each_depth()
- s2(e[6] ed ^ k[6] kd, e[7] ed ^ k[7] kd, e[8] ed ^ k[8] kd,
- e[9] ed ^ k[9] kd, e[10] ed ^ k[10] kd, e[11] ed ^ k[11] kd,
- &b[44] bd, &b[59] bd, &b[33] bd, &b[49] bd);
- for_each_depth()
- s3(b[7] bd ^ k[12] kd, b[8] bd ^ k[13] kd, b[9] bd ^ k[14] kd,
- b[10] bd ^ k[15] kd, b[11] bd ^ k[16] kd, b[12] bd ^ k[17] kd,
- &b[55] bd, &b[47] bd, &b[61] bd, &b[37] bd);
- for_each_depth()
- s4(b[11] bd ^ k[18] kd, b[12] bd ^ k[19] kd, b[13] bd ^ k[20] kd,
- b[14] bd ^ k[21] kd, b[15] bd ^ k[22] kd, b[16] bd ^ k[23] kd,
- &b[57] bd, &b[51] bd, &b[41] bd, &b[32] bd);
- for_each_depth()
- s5(e[24] ed ^ k[24] kd, e[25] ed ^ k[25] kd, e[26] ed ^ k[26] kd,
- e[27] ed ^ k[27] kd, e[28] ed ^ k[28] kd, e[29] ed ^ k[29] kd,
- &b[39] bd, &b[45] bd, &b[56] bd, &b[34] bd);
- for_each_depth()
- s6(e[30] ed ^ k[30] kd, e[31] ed ^ k[31] kd, e[32] ed ^ k[32] kd,
- e[33] ed ^ k[33] kd, e[34] ed ^ k[34] kd, e[35] ed ^ k[35] kd,
- &b[35] bd, &b[60] bd, &b[42] bd, &b[50] bd);
- for_each_depth()
- s7(b[23] bd ^ k[36] kd, b[24] bd ^ k[37] kd, b[25] bd ^ k[38] kd,
- b[26] bd ^ k[39] kd, b[27] bd ^ k[40] kd, b[28] bd ^ k[41] kd,
- &b[63] bd, &b[43] bd, &b[53] bd, &b[38] bd);
- for_each_depth()
- s8(b[27] bd ^ k[42] kd, b[28] bd ^ k[43] kd, b[29] bd ^ k[44] kd,
- b[30] bd ^ k[45] kd, b[31] bd ^ k[46] kd, b[0] bd ^ k[47] kd,
- &b[36] bd, &b[58] bd, &b[46] bd, &b[52] bd);
+ s1(x(0, 0), x(1, 1), x(2, 2),
+ x(3, 3), x(4, 4), x(5, 5),
+ z(40), z(48), z(54), z(62));
+ for_each_depth()
+ s2(x(6, 6), x(7, 7), x(8, 8),
+ x(9, 9), x(10, 10), x(11, 11),
+ z(44), z(59), z(33), z(49));
+ for_each_depth()
+ s3(y(7, 12), y(8, 13), y(9, 14),
+ y(10, 15), y(11, 16), y(12, 17),
+ z(55), z(47), z(61), z(37));
+ for_each_depth()
+ s4(y(11, 18), y(12, 19), y(13, 20),
+ y(14, 21), y(15, 22), y(16, 23),
+ z(57), z(51), z(41), z(32));
+ for_each_depth()
+ s5(x(24, 24), x(25, 25), x(26, 26),
+ x(27, 27), x(28, 28), x(29, 29),
+ z(39), z(45), z(56), z(34));
+ for_each_depth()
+ s6(x(30, 30), x(31, 31), x(32, 32),
+ x(33, 33), x(34, 34), x(35, 35),
+ z(35), z(60), z(42), z(50));
+ for_each_depth()
+ s7(y(23, 36), y(24, 37), y(25, 38),
+ y(26, 39), y(27, 40), y(28, 41),
+ z(63), z(43), z(53), z(38));
+ for_each_depth()
+ s8(y(27, 42), y(28, 43), y(29, 44),
+ y(30, 45), y(31, 46), y(0, 47),
+ z(36), z(58), z(46), z(52));
if (rounds_and_swapped == 0x100) goto next;
swap:
for_each_depth()
- s1(e[48] ed ^ k[48] kd, e[49] ed ^ k[49] kd, e[50] ed ^ k[50] kd,
- e[51] ed ^ k[51] kd, e[52] ed ^ k[52] kd, e[53] ed ^ k[53] kd,
- &b[8] bd, &b[16] bd, &b[22] bd, &b[30] bd);
- for_each_depth()
- s2(e[54] ed ^ k[54] kd, e[55] ed ^ k[55] kd, e[56] ed ^ k[56] kd,
- e[57] ed ^ k[57] kd, e[58] ed ^ k[58] kd, e[59] ed ^ k[59] kd,
- &b[12] bd, &b[27] bd, &b[1] bd, &b[17] bd);
- for_each_depth()
- s3(b[39] bd ^ k[60] kd, b[40] bd ^ k[61] kd, b[41] bd ^ k[62] kd,
- b[42] bd ^ k[63] kd, b[43] bd ^ k[64] kd, b[44] bd ^ k[65] kd,
- &b[23] bd, &b[15] bd, &b[29] bd, &b[5] bd);
- for_each_depth()
- s4(b[43] bd ^ k[66] kd, b[44] bd ^ k[67] kd, b[45] bd ^ k[68] kd,
- b[46] bd ^ k[69] kd, b[47] bd ^ k[70] kd, b[48] bd ^ k[71] kd,
- &b[25] bd, &b[19] bd, &b[9] bd, &b[0] bd);
- for_each_depth()
- s5(e[72] ed ^ k[72] kd, e[73] ed ^ k[73] kd, e[74] ed ^ k[74] kd,
- e[75] ed ^ k[75] kd, e[76] ed ^ k[76] kd, e[77] ed ^ k[77] kd,
- &b[7] bd, &b[13] bd, &b[24] bd, &b[2] bd);
- for_each_depth()
- s6(e[78] ed ^ k[78] kd, e[79] ed ^ k[79] kd, e[80] ed ^ k[80] kd,
- e[81] ed ^ k[81] kd, e[82] ed ^ k[82] kd, e[83] ed ^ k[83] kd,
- &b[3] bd, &b[28] bd, &b[10] bd, &b[18] bd);
- for_each_depth()
- s7(b[55] bd ^ k[84] kd, b[56] bd ^ k[85] kd, b[57] bd ^ k[86] kd,
- b[58] bd ^ k[87] kd, b[59] bd ^ k[88] kd, b[60] bd ^ k[89] kd,
- &b[31] bd, &b[11] bd, &b[21] bd, &b[6] bd);
- for_each_depth()
- s8(b[59] bd ^ k[90] kd, b[60] bd ^ k[91] kd, b[61] bd ^ k[92] kd,
- b[62] bd ^ k[93] kd, b[63] bd ^ k[94] kd, b[32] bd ^ k[95] kd,
- &b[4] bd, &b[26] bd, &b[14] bd, &b[20] bd);
+ s1(x(48, 48), x(49, 49), x(50, 50),
+ x(51, 51), x(52, 52), x(53, 53),
+ z(8), z(16), z(22), z(30));
+ for_each_depth()
+ s2(x(54, 54), x(55, 55), x(56, 56),
+ x(57, 57), x(58, 58), x(59, 59),
+ z(12), z(27), z(1), z(17));
+ for_each_depth()
+ s3(y(39, 60), y(40, 61), y(41, 62),
+ y(42, 63), y(43, 64), y(44, 65),
+ z(23), z(15), z(29), z(5));
+ for_each_depth()
+ s4(y(43, 66), y(44, 67), y(45, 68),
+ y(46, 69), y(47, 70), y(48, 71),
+ z(25), z(19), z(9), z(0));
+ for_each_depth()
+ s5(x(72, 72), x(73, 73), x(74, 74),
+ x(75, 75), x(76, 76), x(77, 77),
+ z(7), z(13), z(24), z(2));
+ for_each_depth()
+ s6(x(78, 78), x(79, 79), x(80, 80),
+ x(81, 81), x(82, 82), x(83, 83),
+ z(3), z(28), z(10), z(18));
+ for_each_depth()
+ s7(y(55, 84), y(56, 85), y(57, 86),
+ y(58, 87), y(59, 88), y(60, 89),
+ z(31), z(11), z(21), z(6));
+ for_each_depth()
+ s8(y(59, 90), y(60, 91), y(61, 92),
+ y(62, 93), y(63, 94), y(32, 95),
+ z(4), z(26), z(14), z(20));
k += 96;
@@ -270,6 +463,8 @@ next:
goto start;
}
+#undef x
+
#undef kd
#if DES_BS_VECTOR
#define kd [depth]
@@ -279,164 +474,109 @@ next:
void DES_bs_crypt_LM(void)
{
+ vtype zero;
ARCH_WORD **k;
int rounds;
#if DES_BS_VECTOR
int depth;
#endif
- for_each_depth() {
- b[0] bd = 0;
- b[1] bd = 0;
- b[2] bd = 0;
- b[3] bd = 0;
- b[4] bd = 0;
- b[5] bd = 0;
- b[6] bd = 0;
- b[7] bd = 0;
- b[8] bd = ~(ARCH_WORD)0;
- b[9] bd = ~(ARCH_WORD)0;
- b[10] bd = ~(ARCH_WORD)0;
- b[11] bd = 0;
- b[12] bd = ~(ARCH_WORD)0;
- b[13] bd = 0;
- b[14] bd = 0;
- b[15] bd = 0;
- b[16] bd = 0;
- b[17] bd = 0;
- b[18] bd = 0;
- b[19] bd = 0;
- b[20] bd = 0;
- b[21] bd = 0;
- b[22] bd = 0;
- b[23] bd = ~(ARCH_WORD)0;
- b[24] bd = 0;
- b[25] bd = 0;
- b[26] bd = ~(ARCH_WORD)0;
- b[27] bd = 0;
- b[28] bd = 0;
- b[29] bd = ~(ARCH_WORD)0;
- b[30] bd = ~(ARCH_WORD)0;
- b[31] bd = ~(ARCH_WORD)0;
- b[32] bd = 0;
- b[33] bd = 0;
- b[34] bd = 0;
- b[35] bd = ~(ARCH_WORD)0;
- b[36] bd = 0;
- b[37] bd = ~(ARCH_WORD)0;
- b[38] bd = ~(ARCH_WORD)0;
- b[39] bd = ~(ARCH_WORD)0;
- b[40] bd = 0;
- b[41] bd = 0;
- b[42] bd = 0;
- b[43] bd = 0;
- b[44] bd = 0;
- b[45] bd = ~(ARCH_WORD)0;
- b[46] bd = 0;
- b[47] bd = 0;
- b[48] bd = ~(ARCH_WORD)0;
- b[49] bd = ~(ARCH_WORD)0;
- b[50] bd = 0;
- b[51] bd = 0;
- b[52] bd = 0;
- b[53] bd = 0;
- b[54] bd = ~(ARCH_WORD)0;
- b[55] bd = 0;
- b[56] bd = ~(ARCH_WORD)0;
- b[57] bd = 0;
- b[58] bd = ~(ARCH_WORD)0;
- b[59] bd = 0;
- b[60] bd = ~(ARCH_WORD)0;
- b[61] bd = ~(ARCH_WORD)0;
- b[62] bd = ~(ARCH_WORD)0;
- b[63] bd = ~(ARCH_WORD)0;
- }
+/* This may produce an "uninitialized" warning */
+ vxor(zero, zero, zero);
+ DES_bs_set_block_8(0, zero, zero, zero, zero, zero, zero, zero, zero);
+ DES_bs_set_block_8(8, ones, ones, ones, zero, ones, zero, zero, zero);
+ DES_bs_set_block_8(16, zero, zero, zero, zero, zero, zero, zero, ones);
+ DES_bs_set_block_8(24, zero, zero, ones, zero, zero, ones, ones, ones);
+ DES_bs_set_block_8(32, zero, zero, zero, ones, zero, ones, ones, ones);
+ DES_bs_set_block_8(40, zero, zero, zero, zero, zero, ones, zero, zero);
+ DES_bs_set_block_8(48, ones, ones, zero, zero, zero, zero, ones, zero);
+ DES_bs_set_block_8(56, ones, zero, ones, zero, ones, ones, ones, ones);
k = DES_bs_all.KS.p;
rounds = 8;
do {
for_each_depth()
- s1(b[31] bd ^ k[0] kd, b[0] bd ^ k[1] kd,
- b[1] bd ^ k[2] kd, b[2] bd ^ k[3] kd,
- b[3] bd ^ k[4] kd, b[4] bd ^ k[5] kd,
- &b[40] bd, &b[48] bd, &b[54] bd, &b[62] bd);
- for_each_depth()
- s2(b[3] bd ^ k[6] kd, b[4] bd ^ k[7] kd,
- b[5] bd ^ k[8] kd, b[6] bd ^ k[9] kd,
- b[7] bd ^ k[10] kd, b[8] bd ^ k[11] kd,
- &b[44] bd, &b[59] bd, &b[33] bd, &b[49] bd);
- for_each_depth()
- s3(b[7] bd ^ k[12] kd, b[8] bd ^ k[13] kd,
- b[9] bd ^ k[14] kd, b[10] bd ^ k[15] kd,
- b[11] bd ^ k[16] kd, b[12] bd ^ k[17] kd,
- &b[55] bd, &b[47] bd, &b[61] bd, &b[37] bd);
- for_each_depth()
- s4(b[11] bd ^ k[18] kd, b[12] bd ^ k[19] kd,
- b[13] bd ^ k[20] kd, b[14] bd ^ k[21] kd,
- b[15] bd ^ k[22] kd, b[16] bd ^ k[23] kd,
- &b[57] bd, &b[51] bd, &b[41] bd, &b[32] bd);
- for_each_depth()
- s5(b[15] bd ^ k[24] kd, b[16] bd ^ k[25] kd,
- b[17] bd ^ k[26] kd, b[18] bd ^ k[27] kd,
- b[19] bd ^ k[28] kd, b[20] bd ^ k[29] kd,
- &b[39] bd, &b[45] bd, &b[56] bd, &b[34] bd);
- for_each_depth()
- s6(b[19] bd ^ k[30] kd, b[20] bd ^ k[31] kd,
- b[21] bd ^ k[32] kd, b[22] bd ^ k[33] kd,
- b[23] bd ^ k[34] kd, b[24] bd ^ k[35] kd,
- &b[35] bd, &b[60] bd, &b[42] bd, &b[50] bd);
- for_each_depth()
- s7(b[23] bd ^ k[36] kd, b[24] bd ^ k[37] kd,
- b[25] bd ^ k[38] kd, b[26] bd ^ k[39] kd,
- b[27] bd ^ k[40] kd, b[28] bd ^ k[41] kd,
- &b[63] bd, &b[43] bd, &b[53] bd, &b[38] bd);
- for_each_depth()
- s8(b[27] bd ^ k[42] kd, b[28] bd ^ k[43] kd,
- b[29] bd ^ k[44] kd, b[30] bd ^ k[45] kd,
- b[31] bd ^ k[46] kd, b[0] bd ^ k[47] kd,
- &b[36] bd, &b[58] bd, &b[46] bd, &b[52] bd);
-
- for_each_depth()
- s1(b[63] bd ^ k[48] kd, b[32] bd ^ k[49] kd,
- b[33] bd ^ k[50] kd, b[34] bd ^ k[51] kd,
- b[35] bd ^ k[52] kd, b[36] bd ^ k[53] kd,
- &b[8] bd, &b[16] bd, &b[22] bd, &b[30] bd);
- for_each_depth()
- s2(b[35] bd ^ k[54] kd, b[36] bd ^ k[55] kd,
- b[37] bd ^ k[56] kd, b[38] bd ^ k[57] kd,
- b[39] bd ^ k[58] kd, b[40] bd ^ k[59] kd,
- &b[12] bd, &b[27] bd, &b[1] bd, &b[17] bd);
- for_each_depth()
- s3(b[39] bd ^ k[60] kd, b[40] bd ^ k[61] kd,
- b[41] bd ^ k[62] kd, b[42] bd ^ k[63] kd,
- b[43] bd ^ k[64] kd, b[44] bd ^ k[65] kd,
- &b[23] bd, &b[15] bd, &b[29] bd, &b[5] bd);
- for_each_depth()
- s4(b[43] bd ^ k[66] kd, b[44] bd ^ k[67] kd,
- b[45] bd ^ k[68] kd, b[46] bd ^ k[69] kd,
- b[47] bd ^ k[70] kd, b[48] bd ^ k[71] kd,
- &b[25] bd, &b[19] bd, &b[9] bd, &b[0] bd);
- for_each_depth()
- s5(b[47] bd ^ k[72] kd, b[48] bd ^ k[73] kd,
- b[49] bd ^ k[74] kd, b[50] bd ^ k[75] kd,
- b[51] bd ^ k[76] kd, b[52] bd ^ k[77] kd,
- &b[7] bd, &b[13] bd, &b[24] bd, &b[2] bd);
- for_each_depth()
- s6(b[51] bd ^ k[78] kd, b[52] bd ^ k[79] kd,
- b[53] bd ^ k[80] kd, b[54] bd ^ k[81] kd,
- b[55] bd ^ k[82] kd, b[56] bd ^ k[83] kd,
- &b[3] bd, &b[28] bd, &b[10] bd, &b[18] bd);
- for_each_depth()
- s7(b[55] bd ^ k[84] kd, b[56] bd ^ k[85] kd,
- b[57] bd ^ k[86] kd, b[58] bd ^ k[87] kd,
- b[59] bd ^ k[88] kd, b[60] bd ^ k[89] kd,
- &b[31] bd, &b[11] bd, &b[21] bd, &b[6] bd);
- for_each_depth()
- s8(b[59] bd ^ k[90] kd, b[60] bd ^ k[91] kd,
- b[61] bd ^ k[92] kd, b[62] bd ^ k[93] kd,
- b[63] bd ^ k[94] kd, b[32] bd ^ k[95] kd,
- &b[4] bd, &b[26] bd, &b[14] bd, &b[20] bd);
+ s1(y(31, 0), y(0, 1),
+ y(1, 2), y(2, 3),
+ y(3, 4), y(4, 5),
+ z(40), z(48), z(54), z(62));
+ for_each_depth()
+ s2(y(3, 6), y(4, 7),
+ y(5, 8), y(6, 9),
+ y(7, 10), y(8, 11),
+ z(44), z(59), z(33), z(49));
+ for_each_depth()
+ s3(y(7, 12), y(8, 13),
+ y(9, 14), y(10, 15),
+ y(11, 16), y(12, 17),
+ z(55), z(47), z(61), z(37));
+ for_each_depth()
+ s4(y(11, 18), y(12, 19),
+ y(13, 20), y(14, 21),
+ y(15, 22), y(16, 23),
+ z(57), z(51), z(41), z(32));
+ for_each_depth()
+ s5(y(15, 24), y(16, 25),
+ y(17, 26), y(18, 27),
+ y(19, 28), y(20, 29),
+ z(39), z(45), z(56), z(34));
+ for_each_depth()
+ s6(y(19, 30), y(20, 31),
+ y(21, 32), y(22, 33),
+ y(23, 34), y(24, 35),
+ z(35), z(60), z(42), z(50));
+ for_each_depth()
+ s7(y(23, 36), y(24, 37),
+ y(25, 38), y(26, 39),
+ y(27, 40), y(28, 41),
+ z(63), z(43), z(53), z(38));
+ for_each_depth()
+ s8(y(27, 42), y(28, 43),
+ y(29, 44), y(30, 45),
+ y(31, 46), y(0, 47),
+ z(36), z(58), z(46), z(52));
+
+ for_each_depth()
+ s1(y(63, 48), y(32, 49),
+ y(33, 50), y(34, 51),
+ y(35, 52), y(36, 53),
+ z(8), z(16), z(22), z(30));
+ for_each_depth()
+ s2(y(35, 54), y(36, 55),
+ y(37, 56), y(38, 57),
+ y(39, 58), y(40, 59),
+ z(12), z(27), z(1), z(17));
+ for_each_depth()
+ s3(y(39, 60), y(40, 61),
+ y(41, 62), y(42, 63),
+ y(43, 64), y(44, 65),
+ z(23), z(15), z(29), z(5));
+ for_each_depth()
+ s4(y(43, 66), y(44, 67),
+ y(45, 68), y(46, 69),
+ y(47, 70), y(48, 71),
+ z(25), z(19), z(9), z(0));
+ for_each_depth()
+ s5(y(47, 72), y(48, 73),
+ y(49, 74), y(50, 75),
+ y(51, 76), y(52, 77),
+ z(7), z(13), z(24), z(2));
+ for_each_depth()
+ s6(y(51, 78), y(52, 79),
+ y(53, 80), y(54, 81),
+ y(55, 82), y(56, 83),
+ z(3), z(28), z(10), z(18));
+ for_each_depth()
+ s7(y(55, 84), y(56, 85),
+ y(57, 86), y(58, 87),
+ y(59, 88), y(60, 89),
+ z(31), z(11), z(21), z(6));
+ for_each_depth()
+ s8(y(59, 90), y(60, 91),
+ y(61, 92), y(62, 93),
+ y(63, 94), y(32, 95),
+ z(4), z(26), z(14), z(20));
k += 96;
} while (--rounds);
diff -urpN john-1.7.5.orig/src/Makefile john-1.7.5-des-intrinsics/src/Makefile
--- john-1.7.5.orig/src/Makefile 2009-12-17 19:11:03 +0000
+++ john-1.7.5-des-intrinsics/src/Makefile 2010-05-03 11:22:28 +0000
@@ -19,7 +19,8 @@ CFLAGS = -c -Wall -O2 -fomit-frame-point
ASFLAGS = -c
LDFLAGS = -s
OPT_NORMAL = -funroll-loops
-OPT_INLINE = -finline-functions
+#OPT_INLINE = -finline-functions
+OPT_INLINE = -finline-functions -finline-limit=4000 --param inline-unit-growth=2000 --param large-function-growth=2000 -I/usr/local/lib/gcc/i386-pc-solaris2.10/3.4.6/include
JOHN_OBJS_MINIMAL = \
DES_fmt.o DES_std.o DES_bs.o \
@@ -149,7 +150,7 @@ default:
linux-x86-64:
$(LN) x86-64.h arch.h
$(MAKE) $(PROJ) \
- JOHN_OBJS="$(JOHN_OBJS_MINIMAL) x86-64.o"
+# JOHN_OBJS="$(JOHN_OBJS_MINIMAL) x86-64.o"
linux-x86-64-32-sse2:
$(LN) x86-sse.h arch.h
@@ -439,21 +440,21 @@ spro-sparc.o: sparc.S
solaris-x86-64-cc:
$(LN) x86-64.h arch.h
$(MAKE) $(PROJ) \
- JOHN_OBJS="$(JOHN_OBJS_MINIMAL) x86-64.o" \
CC=cc \
CFLAGS="-c -fast -xarch=native64" \
ASFLAGS="-c -xarch=native64" \
LDFLAGS="-s -xarch=native64 -lrt" \
OPT_NORMAL="" \
OPT_INLINE="-xinline=s1,s2,s3,s4,s5,s6,s7,s8"
+# JOHN_OBJS="$(JOHN_OBJS_MINIMAL) x86-64.o" \
solaris-x86-64-gcc:
$(LN) x86-64.h arch.h
$(MAKE) $(PROJ) \
- JOHN_OBJS="$(JOHN_OBJS_MINIMAL) x86-64.o" \
CFLAGS="$(CFLAGS) -m64" \
ASFLAGS="$(CFLAGS) -m64" \
LDFLAGS="$(LDFLAGS) -m64 -lrt"
+# JOHN_OBJS="$(JOHN_OBJS_MINIMAL) x86-64.o" \
solaris-x86-sse2-cc:
$(LN) x86-sse.h arch.h
@@ -844,11 +845,13 @@ DES_bs_b.o: DES_bs_b.c DES_bs_s.c DES_bs
$(CC) $(CFLAGS) $(OPT_INLINE) DES_bs_b.c
# I prefer to distribute Matthew Kwan's S-box files unmodified...
-DES_bs_s.c: sboxes.c
- $(SED) "s/unsigned long/ARCH_WORD/" sboxes.c > DES_bs_s.c
-
-DES_bs_n.c: nonstd.c
- $(SED) "s/unsigned long/ARCH_WORD/" nonstd.c > DES_bs_n.c
+DES_bs_s.c: sboxes.c v.pl
+ $(PERL) v.pl sboxes.c > DES_bs_s.c
+# $(SED) "s/unsigned long/ARCH_WORD/" sboxes.c > DES_bs_s.c
+
+DES_bs_n.c: nonstd.c v.pl
+ $(PERL) v.pl nonstd.c > DES_bs_n.c
+# $(SED) "s/unsigned long/ARCH_WORD/" nonstd.c > DES_bs_n.c
DES_bs_a.c: nonstd.c ppc-alti.pl
$(PERL) ppc-alti.pl nonstd.c > DES_bs_a.c
diff -urpN john-1.7.5.orig/src/v.pl john-1.7.5-des-intrinsics/src/v.pl
--- john-1.7.5.orig/src/v.pl 1970-01-01 00:00:00 +0000
+++ john-1.7.5-des-intrinsics/src/v.pl 2010-05-18 22:06:45 +0000
@@ -0,0 +1,34 @@
+#!/usr/bin/perl
+#
+# This file is part of John the Ripper password cracker,
+# Copyright (c) 2005,2010 by Solar Designer
+#
+
+%vec_ops = ("&", "and", "|", "or", "^", "xor");
+
+while (<>) {
+ s/unsigned long/vtype/;
+ ($r, $a, $op, $n, $b) =
+ /^\t*([\w\d]+) *= *([\w\d]+) *([&|^]) *\(*(~*)([\w\d]+)\)*;$/;
+ if (!$r) {
+ ($a, $op, $b) = /^\t*(\*[\w\d]+) *(\^)= *([\w\d]+);$/;
+ $r = $a;
+ undef $n;
+ }
+ if (!$r) {
+ ($r, $n, $a) = /^\t*([\w\d]+) *= *(~*)([\w\d]+);$/;
+ }
+ $op = $vec_ops{$op};
+ if ($n && !$op) {
+ $_ = "\tvnot($r, $a);\n"
+ } elsif ($n && $op eq "and") {
+ $op = "andn";
+ } elsif ($n && $op eq "xor") {
+ $op = "xorn";
+ } elsif ($n) {
+ die;
+ }
+ $_ = "\tv" . $op . "($r, $a, $b);\n"
+ if ($r && $a && $op && $b);
+ print;
+}
diff -urpN john-1.7.5.orig/src/x86-64.h john-1.7.5-des-intrinsics/src/x86-64.h
--- john-1.7.5.orig/src/x86-64.h 2008-06-22 01:29:02 +0000
+++ john-1.7.5-des-intrinsics/src/x86-64.h 2010-05-03 11:36:31 +0000
@@ -32,11 +32,12 @@
#define DES_SCALE 1
#define DES_EXTB 1
#define DES_COPY 0
-#define DES_BS_ASM 1
-#define DES_BS 2
-#define DES_BS_VECTOR 2
+#define DES_BS_ASM 0
+#define DES_BS 1
+#define DES_BS_VECTOR 4
+#define DES_BS_VECTOR34
#define DES_BS_EXPAND 1
-#define DES_BS_ALGORITHM_NAME "128/128 BS SSE2-16"
+// #define DES_BS_ALGORITHM_NAME "128/128 BS SSE2-16"
#define MD5_ASM 0
#define MD5_X2 1
Powered by blists - more mailing lists
Powered by Openwall GNU/*/Linux -
Powered by OpenVZ