Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [day] [month] [year] [list]
Date: Mon, 08 Aug 2016 01:47:11 -0700
From: <writeonce@...ipix.org>
To: john-dev@...ts.openwall.com
Subject: RE: x86-64.S Win64 support

> On Sun, Aug 07, 2016 at 05:47:57PM -0700, writeonce@...ipix.org wrote:
> > As part of alpha release preparation we've added JtR to midipix_build.sh
> > for inclusion in the release tarball. After reading this thread I went
> > on to test the binary, then realized that x86-64.S was still an open
> > issue due to the different calling conventions. As all global functions
> > in x86-64.S take two arguments at the most, I went ahead and patched the
> > relevant spots using #ifdef __PE__ and a push-mov-pop wrapping method
> > (note that in the WIN64 ABI, both %rdi and %rsi are nonvolatile
> > registers). After applying the attached patch, all tests passed.
> > Attached log is from a Dell Latitude E5450, JtR built with -g3 -O2, musl
> > libc and libpsxscl built with -g3 -O0.
> 
> Thanks for the patch.
> 
> Looking at x86-64.S in jumbo, I see it also saves/restores %xmm6 through
> %xmm15 when building for Win64. Perhaps this is unnecessary in a build
> with midipix, since musl does not currently use those registers and JtR
> core does not use any other libraries. But this may be something to
> include if I apply a patch like this to the core tree. Or at least
> there should be a comment added about this piece missing. Otherwise it
> could be an unpleasant surprise to someone when things are neither fully
> working nor fully broken in a non-midipix Win64 build of the core tree.
> 
> Alexander

Good point, and after looking at the jombo patch I thought it might be
best to have the same prologue/epilogue (and less #ifdef's) in both
projects. Attached an updated patch with a reference comment.


diff -ru a/src/DES_bs.c b/src/DES_bs.c
--- a/src/DES_bs.c	2012-07-14 09:36:44.000000000 -0400
+++ b/src/DES_bs.c	2016-08-07 15:08:48.011369100 -0400
@@ -52,7 +52,7 @@
 };
 
 #if DES_BS_ASM
-extern void DES_bs_init_asm(void);
+extern void DES_INTERNAL_CALL DES_bs_init_asm(void);
 #endif
 
 void DES_bs_init(int LM, int cpt)
diff -ru a/src/DES_bs.h b/src/DES_bs.h
--- a/src/DES_bs.h	2013-05-06 02:19:49.000000000 -0400
+++ b/src/DES_bs.h	2016-08-07 17:57:00.097603400 -0400
@@ -35,6 +35,16 @@
 #define DES_bs_vector			ARCH_WORD
 #endif
 
+#ifndef HAVE_INTERNAL_ATTR
+#define HAVE_INTERNAL_ATTR 0
+#endif
+
+#if     HAVE_INTERNAL_ATTR
+#define DES_INTERNAL_CALL  __attribute__((visibility("internal")))
+#else
+#define DES_INTERNAL_CALL
+#endif
+
 /*
  * All bitslice DES parameters combined into one struct for more efficient
  * cache usage. Don't re-order unless you know what you're doing, as there
@@ -112,7 +122,7 @@
 #else
 #define DES_bs_mt			0
 #define DES_bs_cpt			1
-extern DES_bs_combined DES_bs_all;
+extern  DES_INTERNAL_CALL DES_bs_combined DES_bs_all;
 #define for_each_t(n)
 #define init_t()
 #endif
@@ -120,7 +130,7 @@
 /*
  * Initializes the internal structures.
  */
-extern void DES_bs_init(int LM, int cpt);
+extern void DES_INTERNAL_CALL DES_bs_init(int LM, int cpt);
 
 /*
  * Sets a salt for DES_bs_crypt().
@@ -139,17 +149,17 @@
 /*
  * Almost generic implementation: 24-bit salts, variable iteration count.
  */
-extern void DES_bs_crypt(int count, int keys_count);
+extern void DES_INTERNAL_CALL DES_bs_crypt(int count, int keys_count);
 
 /*
  * A simplified special-case implementation: 12-bit salts, 25 iterations.
  */
-extern void DES_bs_crypt_25(int keys_count);
+extern void DES_INTERNAL_CALL DES_bs_crypt_25(int keys_count);
 
 /*
  * Another special-case version: a non-zero IV, no salts, no iterations.
  */
-extern int DES_bs_crypt_LM(int *keys_count, struct db_salt *salt);
+extern int DES_INTERNAL_CALL DES_bs_crypt_LM(int *keys_count, struct db_salt *salt);
 
 /*
  * Converts an ASCII ciphertext to binary to be used with one of the
diff -ru a/src/john.c b/src/john.c
--- a/src/john.c	2013-05-29 19:27:25.000000000 -0400
+++ b/src/john.c	2016-08-07 17:03:35.227295300 -0400
@@ -61,8 +61,12 @@
 #include "batch.h"
 
 #if CPU_DETECT
+#if defined(HAVE_INTERNAL_ATTR) && HAVE_INTERNAL_ATTR
+extern int __attribute__((visibility("internal"))) CPU_detect(void);
+#else
 extern int CPU_detect(void);
 #endif
+#endif
 
 extern struct fmt_main fmt_DES, fmt_BSDI, fmt_MD5, fmt_BF;
 extern struct fmt_main fmt_AFS, fmt_LM;
diff -ru a/src/Makefile b/src/Makefile
--- a/src/Makefile	2013-05-29 19:21:25.000000000 -0400
+++ b/src/Makefile	2016-08-07 20:17:30.889817100 -0400
@@ -104,6 +104,8 @@
 #	@echo "linux-ppc64-altivec      Linux, PowerPC 64-bit w/AltiVec"
 	@echo "linux-ppc64              Linux, PowerPC 64-bit"
 	@echo "linux-ia64               Linux, IA-64"
+	@echo "midipix-x86-64-avx       Midipix, x86-64 with AVX (2011+ Intel CPUs)"
+	@echo "midipix-x86-64           Midipix, x86-64 with SSE2 (most common)"
 	@echo "freebsd-x86-64           FreeBSD, x86-64 with SSE2 (best)"
 	@echo "freebsd-x86-sse2         FreeBSD, x86 with SSE2 (best if 32-bit)"
 	@echo "freebsd-x86-mmx          FreeBSD, x86 with MMX"
@@ -306,6 +308,22 @@
 		CFLAGS="$(CFLAGS) -DHAVE_CRYPT" \
 		LDFLAGS="$(LDFLAGS) -lcrypt"
 
+midipix-x86-64-avx:
+	$(LN) x86-64.h arch.h
+	$(MAKE) $(PROJ) \
+		JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o x86-64.o" \
+		CFLAGS_MAIN="$(CFLAGS) -DJOHN_AVX -DHAVE_CRYPT" \
+		CFLAGS="$(CFLAGS) -mavx -DHAVE_CRYPT -DHAVE_INTERNAL_ATTR" \
+		ASFLAGS="$(ASFLAGS) -mavx" \
+		LDFLAGS="$(LDFLAGS) -lcrypt"
+
+midipix-x86-64:
+	$(LN) x86-64.h arch.h
+	$(MAKE) $(PROJ) \
+		JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o x86-64.o" \
+		CFLAGS="$(CFLAGS) -DHAVE_CRYPT -DHAVE_INTERNAL_ATTR" \
+		LDFLAGS="$(LDFLAGS) -lcrypt"
+
 freebsd-x86-64:
 	$(LN) x86-64.h arch.h
 	$(MAKE) $(PROJ) \
diff -ru a/src/x86-64.S b/src/x86-64.S
--- a/src/x86-64.S	2012-07-21 09:08:57.000000000 -0400
+++ b/src/x86-64.S	2016-08-08 03:57:15.695577800 -0400
@@ -963,9 +963,72 @@
 
 .text
 
+# the following WIN64 PROLOGUE/EPILOGUE were derived from johm-jombo;
+# sufficient for all functions which expect up to two integer parameters
+# in RCX and RDX.
+
+#if defined (__PE__) || defined (_WIN64) || defined (__CYGWIN64__)
+/*
+ * MS use a different x64 calling convention than everyone else:
+ * Arguments: RCX, RDX, R8, R9 then stack right-to-left.
+ * Volatile: RAX, RCX, RDX, R8, R9, R10, R11, XMM0:XMM5
+ * Non-volatile: RBX, RBP, RSI, RDI, R12:R15, XMM6:XMM15
+ * Return: RAX.
+ */
+#define ARG1				%rdi
+#define PROLOGUE \
+	subq $(8+10*16), %rsp; \
+	movapd %xmm6, 0*16(%rsp); \
+	movapd %xmm7, 1*16(%rsp); \
+	movapd %xmm8, 2*16(%rsp); \
+	movapd %xmm9, 3*16(%rsp); \
+	movapd %xmm10, 4*16(%rsp); \
+	movapd %xmm11, 5*16(%rsp); \
+	movapd %xmm12, 6*16(%rsp); \
+	movapd %xmm13, 7*16(%rsp); \
+	movapd %xmm14, 8*16(%rsp); \
+	movapd %xmm15, 9*16(%rsp); \
+	push %rdi; \
+	push %rsi; \
+	movq %rcx, %rdi; \
+	movq %rdx, %rsi
+
+#define EPILOGUE \
+	pop %rsi; \
+	pop %rdi; \
+	movapd 0*16(%rsp), %xmm6; \
+	movapd 1*16(%rsp), %xmm7; \
+	movapd 2*16(%rsp), %xmm8; \
+	movapd 3*16(%rsp), %xmm9; \
+	movapd 4*16(%rsp), %xmm10; \
+	movapd 5*16(%rsp), %xmm11; \
+	movapd 6*16(%rsp), %xmm12; \
+	movapd 7*16(%rsp), %xmm13; \
+	movapd 8*16(%rsp), %xmm14; \
+	movapd 9*16(%rsp), %xmm15; \
+	addq $(8+10*16), %rsp
+#else
+/*
+ * System V AMD64 ABI (followed by everybody else including linux-X32):
+ * Arguments: RDI, RSI, RDX, RCX, R8, R9 then stack right-to-left.
+ * Volatile: RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, XMM0:XMM15
+ * Non-volatile: RBX, RBP, R12:R15
+ * Return: RAX.
+ */
+#define ARG1				%rdi
+#define PROLOGUE
+#define EPILOGUE
+#endif
+
+#ifdef __PE__
+.def DES_bs_init_asm; .scl 2; .type 32; .endef
+#endif
+
 DO_ALIGN(6)
 .globl DES_bs_init_asm
+
 DES_bs_init_asm:
+	PROLOGUE
 	pcmpeqd %xmm0,%xmm0
 	movdqa %xmm0,pnot
 	paddb %xmm0,%xmm0
@@ -985,19 +1048,26 @@
 	movdqa %xmm0,mask40
 	SHLB1(%xmm0)
 	movdqa %xmm0,mask80
+	EPILOGUE
 	ret
 
 #define iterations			%edi
 #define rounds_and_swapped		%eax
 
+#ifdef __PE__
+.def DES_bs_crypt; .scl 2; .type 32; .endef
+#endif
+
 DO_ALIGN(6)
 .globl DES_bs_crypt
 DES_bs_crypt:
+	PROLOGUE
 	cmpl $0,DES_bs_all_keys_changed(%rip)
 	jz DES_bs_crypt_body
 	pushq %rdi
 	call DES_bs_finalize_keys
 	popq %rdi
+
 DES_bs_crypt_body:
 	pxor zero,zero
 	leaq DES_bs_all_KS_v(%rip),k_ptr
@@ -1046,17 +1116,25 @@
 	movl $0x108,rounds_and_swapped
 	subl $1,iterations
 	jnz DES_bs_crypt_swap
+	EPILOGUE
 	ret
+
 DES_bs_crypt_next:
 	subq $nvec(0x300-48),k_ptr
 	movl $8,rounds_and_swapped
 	subl $1,iterations
 	jnz DES_bs_crypt_start
+	EPILOGUE
 	ret
 
+#ifdef __PE__
+.def DES_bs_crypt_25; .scl 2; .type 32; .endef
+#endif
+
 DO_ALIGN(6)
 .globl DES_bs_crypt_25
 DES_bs_crypt_25:
+	PROLOGUE
 	cmpl $0,DES_bs_all_keys_changed(%rip)
 	jnz DES_bs_finalize_keys_25
 DES_bs_crypt_25_body:
@@ -1108,7 +1186,9 @@
 	movl $0x108,rounds_and_swapped
 	subl $1,iterations
 	jnz DES_bs_crypt_25_swap
+	EPILOGUE
 	ret
+
 DES_bs_crypt_25_next:
 	subq $nvec(0x300-48),k_ptr
 	movl $8,rounds_and_swapped
@@ -1174,9 +1254,14 @@
 
 #define rounds				%eax
 
+#ifdef __PE__
+.def DES_bs_crypt_LM; .scl 2; .type 32; .endef
+#endif
+
 DO_ALIGN(6)
 .globl DES_bs_crypt_LM
 DES_bs_crypt_LM:
+	PROLOGUE
 	movl (%rdi),%r8d
 	movdqa mask01,%xmm7
 	movdqa mask02,%xmm8
@@ -1333,8 +1418,8 @@
 	subl $1,rounds
 	jnz DES_bs_crypt_LM_loop
 	xchgq %r8,%rax
+	EPILOGUE
 	ret
-
 #endif
 
 #if defined(CPU_REQ_AVX) || defined(CPU_REQ_XOP)
@@ -1350,8 +1435,14 @@
 #ifdef UNDERSCORES
 #define CPU_detect _CPU_detect
 #endif
+
+#ifdef __PE__
+.def CPU_detect; .scl 2; .type 32; .endef
+#endif
+
 .globl CPU_detect
 CPU_detect:
+	PROLOGUE
 	pushq %rbx
 	movl $1,%eax
 	cpuid
@@ -1376,10 +1467,13 @@
 #endif
 	movl $1,%eax
 	popq %rbx
+	EPILOGUE
 	ret
+
 CPU_detect_fail:
 	xorl %eax,%eax
 	popq %rbx
+	EPILOGUE
 	ret
 #endif
 

Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ