[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Wed, 23 Feb 2011 02:22:54 +0300
From: Solar Designer <solar@...nwall.com>
To: john-users@...ts.openwall.com
Subject: bitslice DES on AVX
Hi,
Does anyone in here have a CPU with AVX already (Intel Sandy Bridge) in
a machine running 64-bit Linux? If so, please try the attached patch
and report back in here.
For me, the patched tree builds cleanly with "make -j8 linux-x86-64-avx",
then correctly fails with:
Benchmarking: Traditional DES [256/256 BS AVX]... Illegal instruction
This is on a Core i7 920, which does not have AVX yet. The system is
Owl 3.0 with a user-local build of gcc 4.5.0:
http://openwall.info/wiki/internal/gcc-local-build
The version of binutils is:
solar@...:~/john/john-1.7.6-avx/src $ rpm -q binutils
binutils-2.20.51.0.11-owl3
If you get this to fail at runtime with other than "Illegal instruction",
please try swapping the "a" and "b" arguments to _mm256_blendv_ps().
I put them in arbitrary order without bothering to look at the
documentation and without checking this in any other way, so there's a
50% chance I got this wrong. ;-) If this does not help, please try
changing DES_BS from 3 to 1. In fact, please try this second change
either way - I am not entirely sure that 3 will produce faster code than
1, although I expect that it will (hence the default of 3). Of course,
please document any such change in your e-mail reply.
Once we get this working, a further step will be to try mixed AVX+SSE2
builds and the like.
Thanks,
Alexander
P.S. It took me a whopping 40 minutes to come up with this patch and to
write this message. The bitslice DES intrinsics "framework" introduced
in 1.7.6 is nice, making things like this trivial.
diff -urp john-1.7.6.orig/src/DES_bs_b.c john-1.7.6-avx/src/DES_bs_b.c
--- john-1.7.6.orig/src/DES_bs_b.c 2010-06-13 18:17:29 +0000
+++ john-1.7.6-avx/src/DES_bs_b.c 2011-02-22 23:04:36 +0000
@@ -1,6 +1,6 @@
/*
* This file is part of John the Ripper password cracker,
- * Copyright (c) 1996-2001,2003,2010 by Solar Designer
+ * Copyright (c) 1996-2001,2003,2010,2011 by Solar Designer
*/
#include "arch.h"
@@ -127,6 +127,37 @@ typedef struct {
(dst).f = vec_sel((a).f, (b).f, (c).f); \
(dst).g = vec_sel((a).g, (b).g, (c).g)
+#elif defined(__AVX__) && DES_BS_DEPTH == 256
+#undef DES_BS_VECTOR
+
+#include <immintrin.h>
+
+/* Not __m256i because bitwise ops are "floating-point" with AVX */
+typedef __m256 vtype;
+
+#define vst(dst, ofs, src) \
+ _mm256_store_ps((float *)((DES_bs_vector *)&(dst) + (ofs)), (src))
+
+#define vxorf(a, b) \
+ _mm256_xor_ps((a), (b))
+
+#define vnot(dst, a) \
+ (dst) = _mm256_xor_ps((a), *(vtype *)&DES_bs_all.ones)
+#define vand(dst, a, b) \
+ (dst) = _mm256_and_ps((a), (b))
+#define vor(dst, a, b) \
+ (dst) = _mm256_or_ps((a), (b))
+#define vandn(dst, a, b) \
+ (dst) = _mm256_andnot_ps((b), (a))
+#define vxorn(dst, a, b) \
+ (dst) = _mm256_xor_ps(_mm256_xor_ps((a), (b)), \
+ *(vtype *)&DES_bs_all.ones)
+#define vnor(dst, a, b) \
+ (dst) = _mm256_xor_ps(_mm256_or_ps((a), (b)), \
+ *(vtype *)&DES_bs_all.ones)
+#define vsel(dst, a, b, c) \
+ (dst) = _mm256_blendv_ps((a), (b), (c))
+
#elif defined(__SSE2__) && DES_BS_DEPTH == 128
#undef DES_BS_VECTOR
diff -urp john-1.7.6.orig/src/Makefile john-1.7.6-avx/src/Makefile
--- john-1.7.6.orig/src/Makefile 2010-06-13 21:12:37 +0000
+++ john-1.7.6-avx/src/Makefile 2011-02-22 22:55:10 +0000
@@ -1,6 +1,6 @@
#
# This file is part of John the Ripper password cracker,
-# Copyright (c) 1996-2010 by Solar Designer
+# Copyright (c) 1996-2011 by Solar Designer
#
CC = gcc
@@ -73,7 +73,8 @@ default:
@echo "To build John the Ripper, type:"
@echo " make clean SYSTEM"
@echo "where SYSTEM can be one of the following:"
- @echo "linux-x86-64 Linux, x86-64 with SSE2 (best)"
+ @echo "linux-x86-64 Linux, x86-64 with SSE2 (best tested)"
+ @echo "linux-x86-64-avx Linux, x86-64 with AVX (experimental)"
# @echo "linux-x86-64-32-sse2 Linux, x86-64, 32-bit with SSE2"
# @echo "linux-x86-64-32-mmx Linux, x86-64, 32-bit with MMX"
@echo "linux-x86-sse2 Linux, x86 with SSE2 (best if 32-bit)"
@@ -150,6 +151,13 @@ linux-x86-64:
CFLAGS="$(CFLAGS) -DHAVE_CRYPT" \
LDFLAGS="$(LDFLAGS) -lcrypt"
+linux-x86-64-avx:
+ $(LN) x86-64.h arch.h
+ $(MAKE) $(PROJ) \
+ JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o x86-64.o" \
+ CFLAGS="$(CFLAGS) -mavx -DHAVE_CRYPT" \
+ LDFLAGS="$(LDFLAGS) -lcrypt"
+
linux-x86-64-32-sse2:
$(LN) x86-sse.h arch.h
$(MAKE) $(PROJ) \
diff -urp john-1.7.6.orig/src/params.h john-1.7.6-avx/src/params.h
--- john-1.7.6.orig/src/params.h 2010-06-14 02:38:55 +0000
+++ john-1.7.6-avx/src/params.h 2011-02-22 23:06:38 +0000
@@ -15,7 +15,7 @@
/*
* John's version number.
*/
-#define JOHN_VERSION "1.7.6"
+#define JOHN_VERSION "1.7.6-avx-1"
/*
* Notes to packagers of John for *BSD "ports", Linux distributions, etc.:
diff -urp john-1.7.6.orig/src/x86-64.h john-1.7.6-avx/src/x86-64.h
--- john-1.7.6.orig/src/x86-64.h 2010-06-13 00:33:38 +0000
+++ john-1.7.6-avx/src/x86-64.h 2011-02-22 22:59:52 +0000
@@ -1,6 +1,6 @@
/*
* This file is part of John the Ripper password cracker,
- * Copyright (c) 2003,2006,2008,2010 by Solar Designer
+ * Copyright (c) 2003,2006,2008,2010,2011 by Solar Designer
*/
/*
@@ -32,7 +32,14 @@
#define DES_SCALE 1
#define DES_EXTB 1
#define DES_COPY 0
-#if defined(__SSE2__) && \
+#define DES_BS 1
+#ifdef __AVX__
+#define DES_BS_ASM 0
+#define DES_BS_VECTOR 4
+#define DES_BS_ALGORITHM_NAME "256/256 BS AVX"
+#undef DES_BS
+#define DES_BS 3
+#elif defined(__SSE2__) && \
((__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __GNUC__ > 4)
#define DES_BS_ASM 0
#if 1
@@ -56,7 +63,6 @@
#define DES_BS_VECTOR 2
#define DES_BS_ALGORITHM_NAME "128/128 BS SSE2-16"
#endif
-#define DES_BS 1
#define DES_BS_EXPAND 1
#define MD5_ASM 0
Powered by blists - more mailing lists
Powered by Openwall GNU/*/Linux -
Powered by OpenVZ