diff -ru a/src/DES_bs.c b/src/DES_bs.c --- a/src/DES_bs.c 2012-07-14 09:36:44.000000000 -0400 +++ b/src/DES_bs.c 2016-08-07 15:08:48.011369100 -0400 @@ -52,7 +52,7 @@ }; #if DES_BS_ASM -extern void DES_bs_init_asm(void); +extern void DES_INTERNAL_CALL DES_bs_init_asm(void); #endif void DES_bs_init(int LM, int cpt) diff -ru a/src/DES_bs.h b/src/DES_bs.h --- a/src/DES_bs.h 2013-05-06 02:19:49.000000000 -0400 +++ b/src/DES_bs.h 2016-08-07 17:57:00.097603400 -0400 @@ -35,6 +35,16 @@ #define DES_bs_vector ARCH_WORD #endif +#ifndef HAVE_INTERNAL_ATTR +#define HAVE_INTERNAL_ATTR 0 +#endif + +#if HAVE_INTERNAL_ATTR +#define DES_INTERNAL_CALL __attribute__((visibility("internal"))) +#else +#define DES_INTERNAL_CALL +#endif + /* * All bitslice DES parameters combined into one struct for more efficient * cache usage. Don't re-order unless you know what you're doing, as there @@ -112,7 +122,7 @@ #else #define DES_bs_mt 0 #define DES_bs_cpt 1 -extern DES_bs_combined DES_bs_all; +extern DES_INTERNAL_CALL DES_bs_combined DES_bs_all; #define for_each_t(n) #define init_t() #endif @@ -120,7 +130,7 @@ /* * Initializes the internal structures. */ -extern void DES_bs_init(int LM, int cpt); +extern void DES_INTERNAL_CALL DES_bs_init(int LM, int cpt); /* * Sets a salt for DES_bs_crypt(). @@ -139,17 +149,17 @@ /* * Almost generic implementation: 24-bit salts, variable iteration count. */ -extern void DES_bs_crypt(int count, int keys_count); +extern void DES_INTERNAL_CALL DES_bs_crypt(int count, int keys_count); /* * A simplified special-case implementation: 12-bit salts, 25 iterations. */ -extern void DES_bs_crypt_25(int keys_count); +extern void DES_INTERNAL_CALL DES_bs_crypt_25(int keys_count); /* * Another special-case version: a non-zero IV, no salts, no iterations. */ -extern int DES_bs_crypt_LM(int *keys_count, struct db_salt *salt); +extern int DES_INTERNAL_CALL DES_bs_crypt_LM(int *keys_count, struct db_salt *salt); /* * Converts an ASCII ciphertext to binary to be used with one of the diff -ru a/src/john.c b/src/john.c --- a/src/john.c 2013-05-29 19:27:25.000000000 -0400 +++ b/src/john.c 2016-08-07 17:03:35.227295300 -0400 @@ -61,8 +61,12 @@ #include "batch.h" #if CPU_DETECT +#if defined(HAVE_INTERNAL_ATTR) && HAVE_INTERNAL_ATTR +extern int __attribute__((visibility("internal"))) CPU_detect(void); +#else extern int CPU_detect(void); #endif +#endif extern struct fmt_main fmt_DES, fmt_BSDI, fmt_MD5, fmt_BF; extern struct fmt_main fmt_AFS, fmt_LM; diff -ru a/src/Makefile b/src/Makefile --- a/src/Makefile 2013-05-29 19:21:25.000000000 -0400 +++ b/src/Makefile 2016-08-07 20:17:30.889817100 -0400 @@ -104,6 +104,8 @@ # @echo "linux-ppc64-altivec Linux, PowerPC 64-bit w/AltiVec" @echo "linux-ppc64 Linux, PowerPC 64-bit" @echo "linux-ia64 Linux, IA-64" + @echo "midipix-x86-64-avx Midipix, x86-64 with AVX (2011+ Intel CPUs)" + @echo "midipix-x86-64 Midipix, x86-64 with SSE2 (most common)" @echo "freebsd-x86-64 FreeBSD, x86-64 with SSE2 (best)" @echo "freebsd-x86-sse2 FreeBSD, x86 with SSE2 (best if 32-bit)" @echo "freebsd-x86-mmx FreeBSD, x86 with MMX" @@ -306,6 +308,22 @@ CFLAGS="$(CFLAGS) -DHAVE_CRYPT" \ LDFLAGS="$(LDFLAGS) -lcrypt" +midipix-x86-64-avx: + $(LN) x86-64.h arch.h + $(MAKE) $(PROJ) \ + JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o x86-64.o" \ + CFLAGS_MAIN="$(CFLAGS) -DJOHN_AVX -DHAVE_CRYPT" \ + CFLAGS="$(CFLAGS) -mavx -DHAVE_CRYPT -DHAVE_INTERNAL_ATTR" \ + ASFLAGS="$(ASFLAGS) -mavx" \ + LDFLAGS="$(LDFLAGS) -lcrypt" + +midipix-x86-64: + $(LN) x86-64.h arch.h + $(MAKE) $(PROJ) \ + JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o x86-64.o" \ + CFLAGS="$(CFLAGS) -DHAVE_CRYPT -DHAVE_INTERNAL_ATTR" \ + LDFLAGS="$(LDFLAGS) -lcrypt" + freebsd-x86-64: $(LN) x86-64.h arch.h $(MAKE) $(PROJ) \ diff -ru a/src/x86-64.S b/src/x86-64.S --- a/src/x86-64.S 2012-07-21 09:08:57.000000000 -0400 +++ b/src/x86-64.S 2016-08-07 19:20:55.800629100 -0400 @@ -963,9 +963,27 @@ .text +#ifdef __PE__ +DES_PE_pop_ret: + pop %rsi + pop %rdi + ret +#endif + +#ifdef __PE__ +.def DES_bs_init_asm; .scl 2; .type 32; .endef +#endif + DO_ALIGN(6) .globl DES_bs_init_asm + DES_bs_init_asm: +#ifdef __PE__ + push %rdi + push %rsi + mov %rcx,%rdi + mov %rdx,%rsi +#endif pcmpeqd %xmm0,%xmm0 movdqa %xmm0,pnot paddb %xmm0,%xmm0 @@ -985,19 +1003,34 @@ movdqa %xmm0,mask40 SHLB1(%xmm0) movdqa %xmm0,mask80 +#ifdef __PE__ + jmp DES_PE_pop_ret +#else ret +#endif #define iterations %edi #define rounds_and_swapped %eax +#ifdef __PE__ +.def DES_bs_crypt; .scl 2; .type 32; .endef +#endif + DO_ALIGN(6) .globl DES_bs_crypt DES_bs_crypt: +#ifdef __PE__ + push %rdi + push %rsi + mov %rcx,%rdi + mov %rdx,%rsi +#endif cmpl $0,DES_bs_all_keys_changed(%rip) jz DES_bs_crypt_body pushq %rdi call DES_bs_finalize_keys popq %rdi + DES_bs_crypt_body: pxor zero,zero leaq DES_bs_all_KS_v(%rip),k_ptr @@ -1046,17 +1079,36 @@ movl $0x108,rounds_and_swapped subl $1,iterations jnz DES_bs_crypt_swap +#ifdef __PE__ + jmp DES_PE_pop_ret +#else ret +#endif + DES_bs_crypt_next: subq $nvec(0x300-48),k_ptr movl $8,rounds_and_swapped subl $1,iterations jnz DES_bs_crypt_start +#ifdef __PE__ + jmp DES_PE_pop_ret +#else ret +#endif + +#ifdef __PE__ +.def DES_bs_crypt_25; .scl 2; .type 32; .endef +#endif DO_ALIGN(6) .globl DES_bs_crypt_25 DES_bs_crypt_25: +#ifdef __PE__ + push %rdi + push %rsi + mov %rcx,%rdi + mov %rdx,%rsi +#endif cmpl $0,DES_bs_all_keys_changed(%rip) jnz DES_bs_finalize_keys_25 DES_bs_crypt_25_body: @@ -1108,7 +1160,12 @@ movl $0x108,rounds_and_swapped subl $1,iterations jnz DES_bs_crypt_25_swap +#ifdef __PE__ + jmp DES_PE_pop_ret +#else ret +#endif + DES_bs_crypt_25_next: subq $nvec(0x300-48),k_ptr movl $8,rounds_and_swapped @@ -1174,9 +1231,19 @@ #define rounds %eax +#ifdef __PE__ +.def DES_bs_crypt_LM; .scl 2; .type 32; .endef +#endif + DO_ALIGN(6) .globl DES_bs_crypt_LM DES_bs_crypt_LM: +#ifdef __PE__ + push %rdi + push %rsi + mov %rcx,%rdi + mov %rdx,%rsi +#endif movl (%rdi),%r8d movdqa mask01,%xmm7 movdqa mask02,%xmm8 @@ -1333,7 +1400,11 @@ subl $1,rounds jnz DES_bs_crypt_LM_loop xchgq %r8,%rax +#ifdef __PE__ + jmp DES_PE_pop_ret +#else ret +#endif #endif @@ -1350,8 +1421,19 @@ #ifdef UNDERSCORES #define CPU_detect _CPU_detect #endif + +#ifdef __PE__ +.def CPU_detect; .scl 2; .type 32; .endef +#endif + .globl CPU_detect CPU_detect: +#ifdef __PE__ + push %rdi + push %rsi + mov %rcx,%rdi + mov %rdx,%rsi +#endif pushq %rbx movl $1,%eax cpuid @@ -1376,12 +1458,21 @@ #endif movl $1,%eax popq %rbx +#ifdef __PE__ + jmp DES_PE_pop_ret +#else ret +#endif + CPU_detect_fail: xorl %eax,%eax popq %rbx +#ifdef __PE__ + jmp DES_PE_pop_ret +#else ret #endif +#endif #if defined(__ELF__) && defined(__linux__) .section .note.GNU-stack,"",@progbits