Follow us on Twitter or via RSS feeds with tweets or complete announcement texts or excerpts
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Fri, 25 Feb 2011 11:26:46 +0300
From: Solar Designer <solar@...nwall.com>
To: john-users@...ts.openwall.com
Subject: Re: bitslice DES on AVX

Hi,

Here's another update.  This time, I've added make targets for 32-bit
Linux systems (linux-x86-avx and linux-x86-xop).  Like before, they're
tested under Intel's Software Development Emulator - the AVX one works
(approx. 40% slower than it does with linux-x86-64-avx under 64-bit
Linux on the same machine), the XOP one crashes on vpcmov with "Illegal
instruction", as expected.

Make targets for non-Linux systems may be added easily, but I did not
bother doing it yet.

The new patch (john-1.7.6-avx-3.diff) is attached to this message and
uploaded to the wiki:

http://openwall.info/wiki/john/patches

More detailed announcement of the previous revision (most of what I
wrote there still applies):

http://www.openwall.com/lists/john-users/2011/02/23/10

If you try this out on a real CPU or on a XOP emulator, please post your
results to john-users.  Seriously, with 1000+ subscribers we ought to
have someone with an AVX-capable CPU already.  These are available for
purchase since early January.  Anyone?

A week ago, Phoronix published benchmarks of JtR 1.7.3.1 on Core i7
2820QM mobile "Sandy Bridge" CPU clocked at 2.3 GHz (perhaps with Turbo
Boost to a higher frequency).  According to those, JtR does over
3.8M c/s on a single core with its pre-generated SSE2 assembly code
(which is why there's almost no difference across C compilers):

http://www.phoronix.com/scan.php?page=article&item=intel_snb_llvm&num=2

This is quite impressive.  I wonder how a CPU like this will perform
when we go from SSE2 instructions (2-op 128-bit) to AVX (3-op 256-bit
or 128-bit).

Thanks,

Alexander

diff -urp john-1.7.6.orig/src/DES_bs_b.c john-1.7.6/src/DES_bs_b.c
--- john-1.7.6.orig/src/DES_bs_b.c	2010-06-13 18:17:29 +0000
+++ john-1.7.6/src/DES_bs_b.c	2011-02-23 07:45:53 +0000
@@ -1,6 +1,6 @@
 /*
  * This file is part of John the Ripper password cracker,
- * Copyright (c) 1996-2001,2003,2010 by Solar Designer
+ * Copyright (c) 1996-2001,2003,2010,2011 by Solar Designer
  */
 
 #include "arch.h"
@@ -127,14 +127,56 @@ typedef struct {
 	(dst).f = vec_sel((a).f, (b).f, (c).f); \
 	(dst).g = vec_sel((a).g, (b).g, (c).g)
 
+#elif defined(__AVX__) && DES_BS_DEPTH == 256
+#undef DES_BS_VECTOR
+
+#include <immintrin.h>
+
+/* Not __m256i because bitwise ops are "floating-point" with AVX */
+typedef __m256 vtype;
+
+#define vst(dst, ofs, src) \
+	_mm256_store_ps((float *)((DES_bs_vector *)&(dst) + (ofs)), (src))
+
+#define vxorf(a, b) \
+	_mm256_xor_ps((a), (b))
+
+#define vnot(dst, a) \
+	(dst) = _mm256_xor_ps((a), *(vtype *)&DES_bs_all.ones)
+#define vand(dst, a, b) \
+	(dst) = _mm256_and_ps((a), (b))
+#define vor(dst, a, b) \
+	(dst) = _mm256_or_ps((a), (b))
+#define vandn(dst, a, b) \
+	(dst) = _mm256_andnot_ps((b), (a))
+#define vxorn(dst, a, b) \
+	(dst) = _mm256_xor_ps(_mm256_xor_ps((a), (b)), \
+	    *(vtype *)&DES_bs_all.ones)
+
+#ifdef __XOP__
+#define vnor(dst, a, b) \
+	(dst) = _mm256_xor_ps(_mm256_or_ps((a), (b)), \
+	    *(vtype *)&DES_bs_all.ones)
+/* This could be _mm256_cmov_ps(), but it does not exist (yet?) */
+#define vsel(dst, a, b, c) \
+	(dst) = __builtin_ia32_vpcmov_v8sf256((b), (a), (c))
+#endif
+
 #elif defined(__SSE2__) && DES_BS_DEPTH == 128
 #undef DES_BS_VECTOR
 
-#ifdef __GNUC__
+#if defined(__GNUC__) && !defined(__AVX__)
 #warning Notice: with recent versions of gcc, we are currently using SSE2 intrinsics instead of the supplied SSE2 assembly code.  This choice is made in the x86-*.h file.
 #endif
 
+#ifdef __AVX__
+#include <immintrin.h>
+#ifdef __XOP__
+#include <x86intrin.h>
+#endif
+#else
 #include <emmintrin.h>
+#endif
 
 typedef __m128i vtype;
 
@@ -156,6 +198,14 @@ typedef __m128i vtype;
 	(dst) = _mm_xor_si128(_mm_xor_si128((a), (b)), \
 	    *(vtype *)&DES_bs_all.ones)
 
+#ifdef __XOP__
+#define vnor(dst, a, b) \
+	(dst) = _mm_xor_si128(_mm_or_si128((a), (b)), \
+	    *(vtype *)&DES_bs_all.ones)
+#define vsel(dst, a, b, c) \
+	(dst) = _mm_cmov_si128((b), (a), (c))
+#endif
+
 #elif defined(__SSE2__) && defined(__MMX__) && DES_BS_DEPTH == 192 && \
     !defined(DES_BS_NO_MMX)
 #undef DES_BS_VECTOR
diff -urp john-1.7.6.orig/src/Makefile john-1.7.6/src/Makefile
--- john-1.7.6.orig/src/Makefile	2010-06-13 21:12:37 +0000
+++ john-1.7.6/src/Makefile	2011-02-25 07:19:33 +0000
@@ -1,6 +1,6 @@
 #
 # This file is part of John the Ripper password cracker,
-# Copyright (c) 1996-2010 by Solar Designer
+# Copyright (c) 1996-2011 by Solar Designer
 #
 
 CC = gcc
@@ -73,12 +73,16 @@ default:
 	@echo "To build John the Ripper, type:"
 	@echo "	make clean SYSTEM"
 	@echo "where SYSTEM can be one of the following:"
-	@echo "linux-x86-64             Linux, x86-64 with SSE2 (best)"
+	@echo "linux-x86-64             Linux, x86-64 with SSE2 (best tested)"
+	@echo "linux-x86-64-avx         Linux, x86-64 with AVX (experimental)"
+	@echo "linux-x86-64-xop         Linux, x86-64 with AVX and XOP (experimental)"
 #	@echo "linux-x86-64-32-sse2     Linux, x86-64, 32-bit with SSE2"
 #	@echo "linux-x86-64-32-mmx      Linux, x86-64, 32-bit with MMX"
-	@echo "linux-x86-sse2           Linux, x86 with SSE2 (best if 32-bit)"
-	@echo "linux-x86-mmx            Linux, x86 with MMX"
-	@echo "linux-x86-any            Linux, x86"
+	@echo "linux-x86-sse2           Linux, x86 32-bit with SSE2 (best tested if 32-bit)"
+	@echo "linux-x86-mmx            Linux, x86 32-bit with MMX (for old computers)"
+	@echo "linux-x86-any            Linux, x86 32-bit (for truly ancient computers)"
+	@echo "linux-x86-avx            Linux, x86 32-bit with AVX (experimental)"
+	@echo "linux-x86-xop            Linux, x86 32-bit with AVX and XOP (experimental)"
 	@echo "linux-alpha              Linux, Alpha"
 	@echo "linux-sparc              Linux, SPARC 32-bit"
 	@echo "linux-ppc32-altivec      Linux, PowerPC w/AltiVec (best)"
@@ -150,6 +154,20 @@ linux-x86-64:
 		CFLAGS="$(CFLAGS) -DHAVE_CRYPT" \
 		LDFLAGS="$(LDFLAGS) -lcrypt"
 
+linux-x86-64-avx:
+	$(LN) x86-64.h arch.h
+	$(MAKE) $(PROJ) \
+		JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o" \
+		CFLAGS="$(CFLAGS) -mavx -DHAVE_CRYPT" \
+		LDFLAGS="$(LDFLAGS) -lcrypt"
+
+linux-x86-64-xop:
+	$(LN) x86-64.h arch.h
+	$(MAKE) $(PROJ) \
+		JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o" \
+		CFLAGS="$(CFLAGS) -mxop -DHAVE_CRYPT" \
+		LDFLAGS="$(LDFLAGS) -lcrypt"
+
 linux-x86-64-32-sse2:
 	$(LN) x86-sse.h arch.h
 	$(MAKE) $(PROJ) \
@@ -187,6 +205,20 @@ linux-x86-any:
 		CFLAGS="$(CFLAGS) -DHAVE_CRYPT" \
 		LDFLAGS="$(LDFLAGS) -lcrypt"
 
+linux-x86-avx:
+	$(LN) x86-sse.h arch.h
+	$(MAKE) $(PROJ) \
+		JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o x86.o" \
+		CFLAGS="$(CFLAGS) -mavx -DHAVE_CRYPT" \
+		LDFLAGS="$(LDFLAGS) -lcrypt"
+
+linux-x86-xop:
+	$(LN) x86-sse.h arch.h
+	$(MAKE) $(PROJ) \
+		JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o x86.o" \
+		CFLAGS="$(CFLAGS) -mxop -DHAVE_CRYPT" \
+		LDFLAGS="$(LDFLAGS) -lcrypt"
+
 linux-alpha:
 	$(LN) alpha.h arch.h
 	$(MAKE) $(PROJ) \
diff -urp john-1.7.6.orig/src/params.h john-1.7.6/src/params.h
--- john-1.7.6.orig/src/params.h	2010-06-14 02:38:55 +0000
+++ john-1.7.6/src/params.h	2011-02-25 07:32:15 +0000
@@ -15,7 +15,7 @@
 /*
  * John's version number.
  */
-#define JOHN_VERSION			"1.7.6"
+#define JOHN_VERSION			"1.7.6-avx-3"
 
 /*
  * Notes to packagers of John for *BSD "ports", Linux distributions, etc.:
Only in john-1.7.6/src: sde-log.txt
diff -urp john-1.7.6.orig/src/x86-64.h john-1.7.6/src/x86-64.h
--- john-1.7.6.orig/src/x86-64.h	2010-06-13 00:33:38 +0000
+++ john-1.7.6/src/x86-64.h	2011-02-25 07:13:38 +0000
@@ -1,6 +1,6 @@
 /*
  * This file is part of John the Ripper password cracker,
- * Copyright (c) 2003,2006,2008,2010 by Solar Designer
+ * Copyright (c) 2003,2006,2008,2010,2011 by Solar Designer
  */
 
 /*
@@ -32,7 +32,30 @@
 #define DES_SCALE			1
 #define DES_EXTB			1
 #define DES_COPY			0
-#if defined(__SSE2__) && \
+#define DES_BS				1
+#ifdef __AVX__
+#define DES_BS_ASM			0
+#if 1
+#define DES_BS_VECTOR			4
+#if defined(__XOP__) && defined(__GNUC__)
+/* Require gcc for 256-bit XOP because of __builtin_ia32_vpcmov_v8sf256() */
+#undef DES_BS
+#define DES_BS				3
+#define DES_BS_ALGORITHM_NAME		"256/256 BS XOP"
+#else
+#define DES_BS_ALGORITHM_NAME		"256/256 BS AVX"
+#endif
+#else
+#define DES_BS_VECTOR			2
+#ifdef __XOP__
+#undef DES_BS
+#define DES_BS				3
+#define DES_BS_ALGORITHM_NAME		"128/256 BS XOP"
+#else
+#define DES_BS_ALGORITHM_NAME		"128/256 BS AVX"
+#endif
+#endif
+#elif defined(__SSE2__) && \
     ((__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __GNUC__ > 4)
 #define DES_BS_ASM			0
 #if 1
@@ -56,7 +79,6 @@
 #define DES_BS_VECTOR			2
 #define DES_BS_ALGORITHM_NAME		"128/128 BS SSE2-16"
 #endif
-#define DES_BS				1
 #define DES_BS_EXPAND			1
 
 #define MD5_ASM				0
diff -urp john-1.7.6.orig/src/x86-sse.h john-1.7.6/src/x86-sse.h
--- john-1.7.6.orig/src/x86-sse.h	2010-06-09 20:03:53 +0000
+++ john-1.7.6/src/x86-sse.h	2011-02-25 07:27:32 +0000
@@ -1,6 +1,6 @@
 /*
  * This file is part of John the Ripper password cracker,
- * Copyright (c) 1996-2002,2005,2006,2008,2010 by Solar Designer
+ * Copyright (c) 1996-2002,2005,2006,2008,2010,2011 by Solar Designer
  */
 
 /*
@@ -45,7 +45,30 @@
 #define DES_EXTB			0
 #define DES_COPY			1
 #define DES_STD_ALGORITHM_NAME		"48/64 4K MMX"
-#if defined(__SSE2__) && 0
+#define DES_BS				1
+#ifdef __AVX__
+#define DES_BS_ASM			0
+#if 1
+#define DES_BS_VECTOR			8
+#if defined(__XOP__) && defined(__GNUC__)
+/* Require gcc for 256-bit XOP because of __builtin_ia32_vpcmov_v8sf256() */
+#undef DES_BS
+#define DES_BS				3
+#define DES_BS_ALGORITHM_NAME		"256/256 BS XOP"
+#else
+#define DES_BS_ALGORITHM_NAME		"256/256 BS AVX"
+#endif
+#else
+#define DES_BS_VECTOR			4
+#ifdef __XOP__
+#undef DES_BS
+#define DES_BS				3
+#define DES_BS_ALGORITHM_NAME		"128/256 BS XOP"
+#else
+#define DES_BS_ALGORITHM_NAME		"128/256 BS AVX"
+#endif
+#endif
+#elif defined(__SSE2__) && 0
 #define DES_BS_ASM			0
 #if 1
 #define DES_BS_VECTOR			4
@@ -68,7 +91,6 @@
 #define DES_BS_VECTOR			4
 #define DES_BS_ALGORITHM_NAME		"128/128 BS SSE2"
 #endif
-#define DES_BS				1
 #define DES_BS_EXPAND			1
 
 #define MD5_ASM				1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux - Powered by OpenVZ