Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Sat, 23 Jun 2012 05:20:53 +0400
From: Solar Designer <solar@...nwall.com>
To: john-dev@...ts.openwall.com
Subject: WPA-PSK fixes, OpenMP support

Lukas, magnum, all -

The attached patch fixes two out of bounds writes that occurred all the
time (both of them in CPU code, one of them in GPU code), prevents out
of bounds writes on over-long passwords and on missed set_key() (which
may happen during self-tests with large max_keys_per_crypt), removes the
dependency on some char arrays on the stack being int-aligned, and
finally adds OpenMP support for the CPU code.  (Of course, we'd achieve
much better speed by also using sse-intrinsics.c code for SHA-1.)

BTW, where does the length 15 limit come from?  Can/should we avoid it?

Here are some speeds.  FX-8120, one CPU core in use:

Benchmarking: WPA-PSK PBKDF2-HMAC-SHA-1 [32/64]... DONE
Raw:    401 c/s real, 401 c/s virtual

FX-8120, OpenMP build:

Benchmarking: WPA-PSK PBKDF2-HMAC-SHA-1 [32/64]... (8xOMP) DONE
Raw:    2032 c/s real, 253 c/s virtual

GTX 570 1600 MHz:

Benchmarking: WPA-PSK PBKDF2-HMAC-SHA-1 [CUDA]... DONE
Raw:    28444 c/s real, 28595 c/s virtual

HD 7970:

OpenCL platform 1: AMD Accelerated Parallel Processing, 2 device(s).
Using device 0: Tahiti
Max Group Work Size 256
Optimal Group work Size = 96
Benchmarking: WPA-PSK PBKDF2-HMAC-SHA-1 [OpenCL]... DONE
Raw:    42164 c/s real, 121720 c/s virtual

Same two GPUs, OpenMP build:

Benchmarking: WPA-PSK PBKDF2-HMAC-SHA-1 [CUDA]... (8xOMP) DONE
Raw:    32385 c/s real, 16541 c/s virtual

OpenCL platform 1: AMD Accelerated Parallel Processing, 2 device(s).
Using device 0: Tahiti
Max Group Work Size 256
Optimal Group work Size = 128
Benchmarking: WPA-PSK PBKDF2-HMAC-SHA-1 [OpenCL]... (8xOMP) DONE
Raw:    55138 c/s real, 41890 c/s virtual

Hmm, somehow "Optimal Group work Size" is different here.

BTW, "group work size" sounds weird.  Do we actually mean "global work
size" or "work-group size"?

Alexander

diff --git a/src/cuda_wpapsk_fmt.c b/src/cuda_wpapsk_fmt.c
index e037c11..f1ce35b 100644
--- a/src/cuda_wpapsk_fmt.c
+++ b/src/cuda_wpapsk_fmt.c
@@ -12,9 +12,8 @@
 #include "cuda_wpapsk.h"
 #include "cuda_common.h"
 
-
 #define FORMAT_LABEL		"wpapsk-cuda"
-#define FORMAT_NAME		"WPA-PSK"
+#define FORMAT_NAME		"WPA-PSK PBKDF2-HMAC-SHA-1"
 #define ALGORITHM_NAME		"CUDA"
 
 #define BENCHMARK_COMMENT	""
@@ -56,6 +55,17 @@ static void init(struct fmt_main *pFmt)
 	check_mem_allocation(inbuffer, outbuffer);
 	mic = (mic_t *) malloc(sizeof(mic_t) * MAX_KEYS_PER_CRYPT);
 	atexit(cleanup);
+
+/*
+ * Zeroize the lengths in case crypt_all() is called with some keys still
+ * not set.  This may happen during self-tests.
+ */
+	{
+		int i;
+		for (i = 0; i < pFmt->params.max_keys_per_crypt; i++)
+			inbuffer[i].length = 0;
+	}
+
 	///Initialize CUDA
 	cuda_init(gpu_id);
 }
diff --git a/src/opencl_wpapsk_fmt.c b/src/opencl_wpapsk_fmt.c
index 7fef317..0d8107d 100644
--- a/src/opencl_wpapsk_fmt.c
+++ b/src/opencl_wpapsk_fmt.c
@@ -15,7 +15,7 @@
 #include "wpapsk.h"
 
 #define FORMAT_LABEL		"wpapsk-opencl"
-#define FORMAT_NAME		"WPA-PSK"
+#define FORMAT_NAME		"WPA-PSK PBKDF2-HMAC-SHA-1"
 #define ALGORITHM_NAME		"OpenCL"
 
 #define BENCHMARK_COMMENT	""
@@ -70,6 +70,16 @@ static void init(struct fmt_main *pFmt)
 	    (wpapsk_hash *) malloc(sizeof(wpapsk_hash) * MAX_KEYS_PER_CRYPT);
 	mic = (mic_t *) malloc(sizeof(mic_t) * MAX_KEYS_PER_CRYPT);
 
+/*
+ * Zeroize the lengths in case crypt_all() is called with some keys still
+ * not set.  This may happen during self-tests.
+ */
+	{
+		int i;
+		for (i = 0; i < pFmt->params.max_keys_per_crypt; i++)
+			inbuffer[i].length = 0;
+	}
+
 	//listOpenCLdevices();
 	opencl_init("$JOHN/wpapsk_kernel.cl", gpu_id, platform_id);
 	/// Alocate memory
diff --git a/src/wpapsk.h b/src/wpapsk.h
index 8ac588c..591c6cd 100644
--- a/src/wpapsk.h
+++ b/src/wpapsk.h
@@ -15,7 +15,7 @@
 #define HCCAP_SIZE		392
 #define uint8_t			unsigned char
 #define uint16_t		unsigned short
-#define uint32_t		unsigned int
+#define uint32_t		ARCH_WORD_32
 
 #define BINARY_SIZE		sizeof(mic_t)
 #define PLAINTEXT_LENGTH	15
@@ -174,6 +174,8 @@ static void set_salt(void *salt)
 static void set_key(char *key, int index)
 {
 	uint8_t length = strlen(key);
+	if (length > PLAINTEXT_LENGTH)
+		length = PLAINTEXT_LENGTH;
 	inbuffer[index].length = length;
 	memcpy(inbuffer[index].v, key, length);
 }
@@ -233,9 +235,11 @@ static void wpapsk_postprocess(int keys)
 #endif
 		for (i = 0; i < keys; i++) {
 			uint32_t prf[20];
+			unsigned char keymic[20];
 			prf_512(outbuffer[i].v, data, prf);
 			HMAC(EVP_sha1(), prf, 16, hccap.eapol,
-			    hccap.eapol_size, mic[i].keymic, NULL);
+			    hccap.eapol_size, keymic, NULL);
+			memcpy(mic[i].keymic, keymic, 16);
 		}
 	}
 }
@@ -288,7 +292,7 @@ static int get_hash_0(int index)
 #ifdef WPAPSK_DEBUG
 	int i;
 	puts("get_hash");
-	uint32_t *b = mic[index].keymic;
+	uint32_t *b = (uint32_t *)mic[index].keymic;
 	for (i = 0; i < 4; i++)
 		printf("%08x ", b[i]);
 	puts("");
@@ -360,4 +364,4 @@ static int cmp_exact(char *source, int count)
 	return 1;
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/wpapsk_fmt.c b/src/wpapsk_fmt.c
index 16532a4..fa11aa6 100644
--- a/src/wpapsk_fmt.c
+++ b/src/wpapsk_fmt.c
@@ -11,19 +11,20 @@
 #include "formats.h"
 #include "common.h"
 #include "misc.h"
+//#define WPAPSK_DEBUG
 #include "wpapsk.h"
 #include <openssl/hmac.h>
 #include <openssl/sha.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
 
 #define FORMAT_LABEL		"wpapsk"
-#define FORMAT_NAME		FORMAT_LABEL
-#define ALGORITHM_NAME		"OpenSSL"
-
-#define	KEYS_PER_CRYPT		1
-#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
-#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define FORMAT_NAME		"WPA-PSK PBKDF2-HMAC-SHA-1"
+#define ALGORITHM_NAME		"32/" ARCH_BITS_STR
 
-//#define WPAPSK_DEBUG
+#define MIN_KEYS_PER_CRYPT	1
+#define MAX_KEYS_PER_CRYPT	1
 
 extern wpapsk_password *inbuffer;
 extern wpapsk_hash *outbuffer;
@@ -38,95 +39,117 @@ static struct fmt_tests tests[] = {
 
 static void init(struct fmt_main *pFmt)
 {
+#ifdef _OPENMP
+	int omp_t = omp_get_max_threads();
+	pFmt->params.min_keys_per_crypt *= omp_t;
+	pFmt->params.max_keys_per_crypt *= omp_t;
+#endif
+
 	assert(sizeof(hccap_t) == HCCAP_SIZE);
-	inbuffer =
-	    (wpapsk_password *) malloc(sizeof(wpapsk_password) *
-	    MAX_KEYS_PER_CRYPT);
-	outbuffer =
-	    (wpapsk_hash *) malloc(sizeof(wpapsk_hash) * MAX_KEYS_PER_CRYPT);
-	mic = (mic_t *) malloc(sizeof(mic_t) * MAX_KEYS_PER_CRYPT);
-	if (inbuffer == NULL || outbuffer == NULL || mic == NULL) {
-		fprintf(stderr, "Memory alocation error\n");
-		exit(1);
+
+	inbuffer = mem_alloc(sizeof(*inbuffer) *
+	    pFmt->params.max_keys_per_crypt);
+	outbuffer = mem_alloc(sizeof(*outbuffer) *
+	    pFmt->params.max_keys_per_crypt);
+	mic = mem_alloc(sizeof(*mic) *
+	    pFmt->params.max_keys_per_crypt);
+
+/*
+ * Zeroize the lengths in case crypt_all() is called with some keys still
+ * not set.  This may happen during self-tests.
+ */
+	{
+		int i;
+		for (i = 0; i < pFmt->params.max_keys_per_crypt; i++)
+			inbuffer[i].length = 0;
 	}
 }
 
-static void wpapsk_cpu(wpapsk_password * in, wpapsk_hash * out,
-    wpapsk_salt * salt)
+static MAYBE_INLINE void wpapsk_cpu(int count,
+    wpapsk_password * in, wpapsk_hash * out, wpapsk_salt * salt)
 {
-	int i, j, k;
-	unsigned char essid[32 + 4];
-	unsigned char buffer[64];
-	memset(essid, 0, 32 + 4);
-	memcpy(essid, salt->salt, salt->length);
+	int j;
 	int slen = salt->length + 4;
 
-	for (j = 0; j < KEYS_PER_CRYPT; j++) {
+#ifdef _OPENMP
+#pragma omp parallel for default(none) private(j) shared(count, slen, salt, in, out)
+#endif
+	for (j = 0; j < count; j++) {
+		int i, k;
+		unsigned char essid[32 + 4];
+		union {
+			unsigned char c[64];
+			uint32_t i[16];
+		} buffer;
+		union {
+			unsigned char c[40];
+			uint32_t i[10];
+		} outbuf;
 		SHA_CTX ctx_ipad;
 		SHA_CTX ctx_opad;
 		SHA_CTX sha1_ctx;
-		memset(buffer, 0, 64);
-		memcpy(buffer, in[j].v, in[j].length);
+
+		memset(essid, 0, 32 + 4);
+		memcpy(essid, salt->salt, salt->length);
+		memset(&buffer, 0, 64);
+		memcpy(&buffer, in[j].v, in[j].length);
 
 		SHA1_Init(&ctx_ipad);
 		SHA1_Init(&ctx_opad);
 
-		for (i = 0; i < 64; i++)
-			buffer[i] ^= 0x36;
-		SHA1_Update(&ctx_ipad, buffer, 64);
+		for (i = 0; i < 16; i++)
+			buffer.i[i] ^= 0x36363636;
+		SHA1_Update(&ctx_ipad, buffer.c, 64);
 
-		for (i = 0; i < 64; i++)
-			buffer[i] ^= 0x6a;
-		SHA1_Update(&ctx_opad, buffer, 64);
+		for (i = 0; i < 16; i++)
+			buffer.i[i] ^= 0x6a6a6a6a;
+		SHA1_Update(&ctx_opad, buffer.c, 64);
 
 		essid[slen - 1] = 1;
 		HMAC(EVP_sha1(), in[j].v, in[j].length, essid, slen,
-		    (unsigned char *) out[j].v, NULL);
-		memcpy(buffer, out[j].v, 20);
+		    outbuf.c, NULL);
+		memcpy(&buffer, &outbuf, 20);
 
 		for (i = 1; i < 4096; i++) {
 			memcpy(&sha1_ctx, &ctx_ipad, sizeof(sha1_ctx));
-			SHA1_Update(&sha1_ctx, buffer, 20);
-			SHA1_Final(buffer, &sha1_ctx);
+			SHA1_Update(&sha1_ctx, buffer.c, 20);
+			SHA1_Final(buffer.c, &sha1_ctx);
 
 			memcpy(&sha1_ctx, &ctx_opad, sizeof(sha1_ctx));
-			SHA1_Update(&sha1_ctx, buffer, 20);
-			SHA1_Final(buffer, &sha1_ctx);
+			SHA1_Update(&sha1_ctx, buffer.c, 20);
+			SHA1_Final(buffer.c, &sha1_ctx);
 
-			for (k = 0; k < 5; k++) {
-				unsigned int *p = (unsigned int *) buffer;
-				out[j].v[k] ^= p[k];
-			}
+			for (k = 0; k < 5; k++)
+				outbuf.i[k] ^= buffer.i[k];
 		}
 		essid[slen - 1] = 2;
 		HMAC(EVP_sha1(), in[j].v, in[j].length, essid, slen,
-		    (unsigned char *) out[j].v + 5 * 4, NULL);
-		memcpy(buffer, out[j].v + 5, 20);
+		    &outbuf.c[20], NULL);
+		memcpy(&buffer, &outbuf.c[20], 20);
 
 		for (i = 1; i < 4096; i++) {
 			memcpy(&sha1_ctx, &ctx_ipad, sizeof(sha1_ctx));
-			SHA1_Update(&sha1_ctx, buffer, 20);
-			SHA1_Final(buffer, &sha1_ctx);
+			SHA1_Update(&sha1_ctx, buffer.c, 20);
+			SHA1_Final(buffer.c, &sha1_ctx);
 
 			memcpy(&sha1_ctx, &ctx_opad, sizeof(sha1_ctx));
-			SHA1_Update(&sha1_ctx, buffer, 20);
-			SHA1_Final(buffer, &sha1_ctx);
+			SHA1_Update(&sha1_ctx, buffer.c, 20);
+			SHA1_Final(buffer.c, &sha1_ctx);
 
-			for (k = 5; k < 8; k++) {
-				unsigned int *p = (unsigned int *) buffer;
-				outbuffer[j].v[k] ^= p[k - 5];
-			}
+			for (k = 5; k < 8; k++)
+				outbuf.i[k] ^= buffer.i[k - 5];
 		}
+
+		memcpy(&out[j], &outbuf, 32);
 	}
 }
 
 static void crypt_all(int count)
 {
-	wpapsk_cpu(inbuffer, outbuffer, &currentsalt);
+	wpapsk_cpu(count, inbuffer, outbuffer, &currentsalt);
 	wpapsk_postprocess(count);
 }
 
-
 struct fmt_main fmt_wpapsk = {
 	{
 		    FORMAT_LABEL,
@@ -139,7 +162,7 @@ struct fmt_main fmt_wpapsk = {
 		    SALT_SIZE,
 		    MIN_KEYS_PER_CRYPT,
 		    MAX_KEYS_PER_CRYPT,
-		    FMT_CASE | FMT_8_BIT,
+		    FMT_CASE | FMT_8_BIT | FMT_OMP,
 		    tests
 	},
 	{

Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ