diff -urp john-1.7.6.orig/src/DES_bs_b.c john-1.7.6-avx/src/DES_bs_b.c --- john-1.7.6.orig/src/DES_bs_b.c 2010-06-13 18:17:29 +0000 +++ john-1.7.6-avx/src/DES_bs_b.c 2011-02-23 07:45:53 +0000 @@ -1,6 +1,6 @@ /* * This file is part of John the Ripper password cracker, - * Copyright (c) 1996-2001,2003,2010 by Solar Designer + * Copyright (c) 1996-2001,2003,2010,2011 by Solar Designer */ #include "arch.h" @@ -127,14 +127,56 @@ typedef struct { (dst).f = vec_sel((a).f, (b).f, (c).f); \ (dst).g = vec_sel((a).g, (b).g, (c).g) +#elif defined(__AVX__) && DES_BS_DEPTH == 256 +#undef DES_BS_VECTOR + +#include + +/* Not __m256i because bitwise ops are "floating-point" with AVX */ +typedef __m256 vtype; + +#define vst(dst, ofs, src) \ + _mm256_store_ps((float *)((DES_bs_vector *)&(dst) + (ofs)), (src)) + +#define vxorf(a, b) \ + _mm256_xor_ps((a), (b)) + +#define vnot(dst, a) \ + (dst) = _mm256_xor_ps((a), *(vtype *)&DES_bs_all.ones) +#define vand(dst, a, b) \ + (dst) = _mm256_and_ps((a), (b)) +#define vor(dst, a, b) \ + (dst) = _mm256_or_ps((a), (b)) +#define vandn(dst, a, b) \ + (dst) = _mm256_andnot_ps((b), (a)) +#define vxorn(dst, a, b) \ + (dst) = _mm256_xor_ps(_mm256_xor_ps((a), (b)), \ + *(vtype *)&DES_bs_all.ones) + +#ifdef __XOP__ +#define vnor(dst, a, b) \ + (dst) = _mm256_xor_ps(_mm256_or_ps((a), (b)), \ + *(vtype *)&DES_bs_all.ones) +/* This could be _mm256_cmov_ps(), but it does not exist (yet?) */ +#define vsel(dst, a, b, c) \ + (dst) = __builtin_ia32_vpcmov_v8sf256((b), (a), (c)) +#endif + #elif defined(__SSE2__) && DES_BS_DEPTH == 128 #undef DES_BS_VECTOR -#ifdef __GNUC__ +#if defined(__GNUC__) && !defined(__AVX__) #warning Notice: with recent versions of gcc, we are currently using SSE2 intrinsics instead of the supplied SSE2 assembly code. This choice is made in the x86-*.h file. #endif +#ifdef __AVX__ +#include +#ifdef __XOP__ +#include +#endif +#else #include +#endif typedef __m128i vtype; @@ -156,6 +198,14 @@ typedef __m128i vtype; (dst) = _mm_xor_si128(_mm_xor_si128((a), (b)), \ *(vtype *)&DES_bs_all.ones) +#ifdef __XOP__ +#define vnor(dst, a, b) \ + (dst) = _mm_xor_si128(_mm_or_si128((a), (b)), \ + *(vtype *)&DES_bs_all.ones) +#define vsel(dst, a, b, c) \ + (dst) = _mm_cmov_si128((b), (a), (c)) +#endif + #elif defined(__SSE2__) && defined(__MMX__) && DES_BS_DEPTH == 192 && \ !defined(DES_BS_NO_MMX) #undef DES_BS_VECTOR diff -urp john-1.7.6.orig/src/Makefile john-1.7.6-avx/src/Makefile --- john-1.7.6.orig/src/Makefile 2010-06-13 21:12:37 +0000 +++ john-1.7.6-avx/src/Makefile 2011-02-23 07:49:36 +0000 @@ -1,6 +1,6 @@ # # This file is part of John the Ripper password cracker, -# Copyright (c) 1996-2010 by Solar Designer +# Copyright (c) 1996-2011 by Solar Designer # CC = gcc @@ -73,7 +73,9 @@ default: @echo "To build John the Ripper, type:" @echo " make clean SYSTEM" @echo "where SYSTEM can be one of the following:" - @echo "linux-x86-64 Linux, x86-64 with SSE2 (best)" + @echo "linux-x86-64 Linux, x86-64 with SSE2 (best tested)" + @echo "linux-x86-64-avx Linux, x86-64 with AVX (experimental)" + @echo "linux-x86-64-xop Linux, x86-64 with AVX and XOP (experimental)" # @echo "linux-x86-64-32-sse2 Linux, x86-64, 32-bit with SSE2" # @echo "linux-x86-64-32-mmx Linux, x86-64, 32-bit with MMX" @echo "linux-x86-sse2 Linux, x86 with SSE2 (best if 32-bit)" @@ -150,6 +152,20 @@ linux-x86-64: CFLAGS="$(CFLAGS) -DHAVE_CRYPT" \ LDFLAGS="$(LDFLAGS) -lcrypt" +linux-x86-64-avx: + $(LN) x86-64.h arch.h + $(MAKE) $(PROJ) \ + JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o" \ + CFLAGS="$(CFLAGS) -mavx -DHAVE_CRYPT" \ + LDFLAGS="$(LDFLAGS) -lcrypt" + +linux-x86-64-xop: + $(LN) x86-64.h arch.h + $(MAKE) $(PROJ) \ + JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o" \ + CFLAGS="$(CFLAGS) -mxop -DHAVE_CRYPT" \ + LDFLAGS="$(LDFLAGS) -lcrypt" + linux-x86-64-32-sse2: $(LN) x86-sse.h arch.h $(MAKE) $(PROJ) \ diff -urp john-1.7.6.orig/src/params.h john-1.7.6-avx/src/params.h --- john-1.7.6.orig/src/params.h 2010-06-14 02:38:55 +0000 +++ john-1.7.6-avx/src/params.h 2011-02-23 00:59:24 +0000 @@ -15,7 +15,7 @@ /* * John's version number. */ -#define JOHN_VERSION "1.7.6" +#define JOHN_VERSION "1.7.6-avx-2" /* * Notes to packagers of John for *BSD "ports", Linux distributions, etc.: Only in john-1.7.6-avx/src: sde-log.txt diff -urp john-1.7.6.orig/src/x86-64.h john-1.7.6-avx/src/x86-64.h --- john-1.7.6.orig/src/x86-64.h 2010-06-13 00:33:38 +0000 +++ john-1.7.6-avx/src/x86-64.h 2011-02-23 08:00:19 +0000 @@ -1,6 +1,6 @@ /* * This file is part of John the Ripper password cracker, - * Copyright (c) 2003,2006,2008,2010 by Solar Designer + * Copyright (c) 2003,2006,2008,2010,2011 by Solar Designer */ /* @@ -32,7 +32,30 @@ #define DES_SCALE 1 #define DES_EXTB 1 #define DES_COPY 0 -#if defined(__SSE2__) && \ +#define DES_BS 1 +#ifdef __AVX__ +#define DES_BS_ASM 0 +#if 1 +#define DES_BS_VECTOR 4 +#if defined(__XOP__) && defined(__GNUC__) +/* Require gcc for 256-bit XOP because of __builtin_ia32_vpcmov_v8sf256() */ +#undef DES_BS +#define DES_BS 3 +#define DES_BS_ALGORITHM_NAME "256/256 BS XOP" +#else +#define DES_BS_ALGORITHM_NAME "256/256 BS AVX" +#endif +#else +#define DES_BS_VECTOR 2 +#ifdef __XOP__ +#undef DES_BS +#define DES_BS 3 +#define DES_BS_ALGORITHM_NAME "128/128 BS XOP" +#else +#define DES_BS_ALGORITHM_NAME "128/128 BS AVX" +#endif +#endif +#elif defined(__SSE2__) && \ ((__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __GNUC__ > 4) #define DES_BS_ASM 0 #if 1 @@ -56,7 +79,6 @@ #define DES_BS_VECTOR 2 #define DES_BS_ALGORITHM_NAME "128/128 BS SSE2-16" #endif -#define DES_BS 1 #define DES_BS_EXPAND 1 #define MD5_ASM 0