Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Wed, 16 Sep 2015 04:28:42 +0300
From: Solar Designer <solar@...nwall.com>
To: john-dev@...ts.openwall.com
Subject: Re: Judy array

On Wed, Sep 16, 2015 at 02:29:17AM +0300, Solar Designer wrote:
> real    0m57.785s
> user    4m21.243s
> sys     0m15.629s
> 
> Oops.  Almost same speed without actual prefetching.  So maybe the
> re-arranged code is somehow faster in this case, even when I don't
> prefetch.  Or maybe the CPU does speculative execution and thus
> prefetches anyway, using those pointers that the following few loop
> iterations would use (especially as the compiler unrolls the loop).

Implemented better prefetching, patch attached.  New speed:

real    0m56.260s
user    4m9.024s
sys     0m16.240s

Best of several runs:

real    0m55.987s
user    4m7.024s
sys     0m16.082s

I think this should be committed, and I might get it into core tree
later.  Even though we'll probably replace this with Fred's code soon.
I guess what I am doing is providing another baseline for comparison.

Alexander

diff -urp bleeding-jumbo-opt3/src/cracker.c bleeding-jumbo-opt/src/cracker.c
--- bleeding-jumbo-opt3/src/cracker.c	2015-09-16 01:15:59 +0000
+++ bleeding-jumbo-opt/src/cracker.c	2015-09-16 01:14:09 +0000
@@ -1,6 +1,6 @@
 /*
  * This file is part of John the Ripper password cracker,
- * Copyright (c) 1996-2003,2006,2010-2013 by Solar Designer
+ * Copyright (c) 1996-2003,2006,2010-2013,2015 by Solar Designer
  *
  * ...with heavy changes in the jumbo patch, by magnum & JimF
  */
@@ -28,11 +28,11 @@
 #include <io.h> // open()
 #endif
 
-#ifdef CRACKER_PREFETCH
-#ifdef __SSE2__
+#define CRK_PREFETCH 64
+
+#if CRK_PREFETCH && defined(__SSE2__)
 #include <emmintrin.h>
 #endif
-#endif
 
 #include "arch.h"
 #include "misc.h"
@@ -676,7 +676,6 @@ static int crk_process_event(void)
 
 static int crk_password_loop(struct db_salt *salt)
 {
-	struct db_password *pw;
 	int count, match, index;
 
 #if !OS_TIMER
@@ -702,7 +701,7 @@ static int crk_password_loop(struct db_s
 		return 0;
 
 	if (!salt->bitmap) {
-		pw = salt->list;
+		struct db_password *pw = salt->list;
 		do {
 			if (crk_methods.cmp_all(pw->binary, match))
 			for (index = 0; index < match; index++)
@@ -718,32 +717,53 @@ static int crk_password_loop(struct db_s
 			}
 		} while ((pw = pw->next));
 	} else
-#ifndef CRACKER_PREFETCH
+#if !CRK_PREFETCH
 	for (index = 0; index < match; index++) {
 		int hash = salt->index(index);
 		if (salt->bitmap[hash / (sizeof(*salt->bitmap) * 8)] &
 		    (1U << (hash % (sizeof(*salt->bitmap) * 8)))) {
-			pw = salt->hash[hash >> PASSWORD_HASH_SHR];
+			struct db_password *pw =
+			    salt->hash[hash >> PASSWORD_HASH_SHR];
 #else
 	for (index = 0; index < match; ) {
-		int slot, ahead, target = index + 64;
-		int h[64];
-		unsigned int *b[64];
+		int slot, ahead, target = index + CRK_PREFETCH, lucky = 0;
+		struct {
+			unsigned int h;
+			union {
+				unsigned int *b;
+				struct db_password **p;
+			} u;
+		} a[CRK_PREFETCH];
 		if (target > match)
 			target = match;
 		for (slot = 0, ahead = index; ahead < target; slot++, ahead++) {
-			h[slot] = salt->index(ahead);
-			b[slot] = &salt->bitmap[h[slot] / (sizeof(*salt->bitmap) * 8)];
+			unsigned int h = salt->index(ahead);
+			unsigned int *b = &salt->bitmap[h / (sizeof(*salt->bitmap) * 8)];
+			a[slot].h = h;
+			a[slot].u.b = b;
+#ifdef __SSE2__
+			_mm_prefetch((const char *)b, _MM_HINT_NTA);
+#else
+			*(volatile unsigned int *)b;
+#endif
+		}
+		for (slot = 0, ahead = index; ahead < target; slot++, ahead++) {
+			unsigned int h = a[slot].h;
+			if (*a[slot].u.b & (1U << (h % (sizeof(*salt->bitmap) * 8)))) {
+				struct db_password **pwp = &salt->hash[h >> PASSWORD_HASH_SHR];
 #ifdef __SSE2__
-			_mm_prefetch((const char *)b[slot], _MM_HINT_NTA);
-//			_mm_prefetch((const char *)&salt->hash[h[slot] >> PASSWORD_HASH_SHR], _MM_HINT_NTA);
+				_mm_prefetch((const char *)pwp, _MM_HINT_NTA);
 #else
-			*(volatile unsigned int *)b[slot];
+				*(void * volatile *)pwp;
 #endif
+				a[slot].u.p = pwp;
+				lucky = ahead;
+			} else
+				a[slot].u.p = NULL;
 		}
-		for (slot = 0; index < target; slot++, index++)
-		if (*b[slot] & (1U << (h[slot] % (sizeof(*salt->bitmap) * 8)))) {
-			pw = salt->hash[h[slot] >> PASSWORD_HASH_SHR];
+		for (slot = 0; index <= lucky; slot++, index++)
+		if (a[slot].u.p) {
+			struct db_password *pw = *a[slot].u.p;
 #endif
 			do {
 				if (crk_methods.cmp_one(pw->binary, index))
@@ -753,6 +773,9 @@ static int crk_password_loop(struct db_s
 					return 1;
 			} while ((pw = pw->next_hash));
 		}
+#if CRK_PREFETCH
+		index = target;
+#endif
 	}
 
 	return 0;
diff -urp bleeding-jumbo-opt3/src/params.h bleeding-jumbo-opt/src/params.h
--- bleeding-jumbo-opt3/src/params.h	2015-09-15 22:50:39 +0000
+++ bleeding-jumbo-opt/src/params.h	2015-09-16 01:15:15 +0000
@@ -236,7 +236,11 @@ extern int password_hash_thresholds[PASS
  * 5 or 6 will make them the same size in bytes on systems with 32-bit or
  * 64-bit pointers, respectively.
  */
+#if ARCH_BITS >= 64
 #define PASSWORD_HASH_SHR		0
+#else
+#define PASSWORD_HASH_SHR		2
+#endif
 
 /*
  * Cracked password hash size, used while loading.
@@ -285,6 +289,16 @@ extern int password_hash_thresholds[PASS
 #define LDR_HASH_COLLISIONS_MAX		1000
 
 /*
+ * How many bitmap entries should the cracker prefetch at once.  Set this to 0
+ * to disable prefetching.
+ */
+#ifdef __SSE2__
+#define CRK_PREFETCH			64
+#else
+#define CRK_PREFETCH			0
+#endif
+
+/*
  * Maximum number of GECOS words to try in pairs.
  */
 #define SINGLE_WORDS_PAIR_MAX		6

Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ